/* This program will read a text file and make an alphabetical list of all the distinct words in that file, together with the number of times that each word occurs. A "word" is defined as a sequence of letters. Other characters in the input file are discarded. All letters are converted to lower case. The data will be output to another file. The user will be asked to input the names of the input and output files. The program will end if either file can't be opened. If the output file already exists, it will be overwritten without warning. This version of the program stores the words in a binary sort tree. Other versions (wordlist.cc and wordlist_list.cc) use an array or a linked list. */ #include #include #include using namespace std; /* A TreeNode is a node in a binary sort tree that holds data about * one word that was found in the file. */ struct TreeNode { string word; // A word, with all letters converted to lower case. int frequency; // Number of times that the word occurred in the file. TreeNode *left; // Pointer to left subtree, which contains words // that precede this word alphabetically. TreeNode *right; // Pointer to right subtree, which contains words // that follow this word alphabetically. }; /* Read a "word", meaning a sequence of letters, from an input stream. * Any non-letter characters are skipped before reading the word. If * no word can be read (because of end-of-stream, for example), then * an empty string is returned. */ string readWord(istream &in) { string word; // the word read from the input stream char ch; // one char read from the input stream while (in && !isalpha(in.peek())) { // skip past non-letter character in.get(ch); } word = ""; while (in && isalpha(in.peek())) { in.get(ch); word += ch; } return word; } /* Convert any upper-case characters in a string to lower case. * Any non-letter characters in the string are left unchanged. */ void makeLowerCase(string &str) { int top = str.length(); for (int i = 0; i < top; i++) str[i] = tolower(str[i]); } /* Add a word to the binary sort tree. If the word already exists * in the tree, then the associated frequency count is incremented. * Otherwise, a new node is created to hold the word and is added * to the tree. */ void insert(TreeNode* &sorttree, string word) { if (sorttree == NULL) { // Create a new node to hold the word, with frequency eaqul // to 1, since this is the first time the word has been seen. sorttree = new TreeNode; sorttree->word = word; sorttree->frequency = 1; sorttree->left = NULL; sorttree->right = NULL; } else if (word == sorttree->word) { // Found word already in tree; add 1 to the frequency count. sorttree->frequency++; } else if (word < sorttree->word) { // Add word to left subtree. insert(sorttree->left, word); } else { // Add word to right subtree. insert(sorttree->right, word); } } /* Read all the "words" from an input stream. All words are converted * to lower case, and add the words to a binary sort tree. */ void makeWordList(istream &in, TreeNode* &tree) { string word; word = readWord(in); while (word != "") { makeLowerCase(word); insert(tree,word); word = readWord(in); } } /* Returns the number of nodes in a tree. */ int countNodes(TreeNode *tree) { if (tree == NULL) return 0; else return 1 + countNodes(tree->left) + countNodes(tree->right); } /* Print the data from the binary sort tree of words, using an * in-order traversal so that the words will appear in alphabetical order. */ void printWordTree(ostream &out, TreeNode *root) { if (root != NULL) { printWordTree(out, root->left); out << " " << root->word << " (" << root->frequency << ")\n"; printWordTree(out, root->right); } } int main() { string inFileName, outFileName; // file names, input by user ifstream in; // stream for reading from input file ofstream out; // stream for writing the output TreeNode *wordTree; // Pointer to a binary sort tree that holds the words. int wordCount; // the number of distinct words; this is the // number of spaces used in the wordList and // wordFreq arrays cout << "This program will make a list of all the distinct words\n"; cout << "in a specified input file, in alphabetical order, along\n"; cout << "with the number of times each word occurs. The output\n"; cout << "will be written to a specified file.\n\n"; // Open the files. cout << "What is the name of the input file? "; cin >> inFileName; in.open(inFileName.c_str()); if (!in) { cout << "Sorry, can't open file \"" << inFileName << "\" for input.\n"; exit(1); } cout << "What is the name of the output file? "; cin >> outFileName; out.open(outFileName.c_str()); if (!out) { cout << "Sorry, can't open file \"" << outFileName << "\" for output.\n"; exit(1); } // Make the list of words, then count the number of words found. wordTree = NULL; // Start with empty tree. makeWordList(in,wordTree); wordCount = countNodes(wordTree); // Output the word data. out << wordCount << " distinct words found in file \"" << inFileName << "\":\n\n"; printWordTree(out,wordTree); // Tell user what happened. if (wordCount == 0) { cout << "Warning: no words found in file \"" << inFileName << "\".\n\n"; } else { cout << wordCount << " distinct words found in file \"" << inFileName << "\".\n"; cout << "Output in file \"" << outFileName << "\".\n"; if (!out) cout << "Output might not be complete; some error occurred on output!\n\n"; } } // end main()