/* This program will read a text file and make an alphabetical list of all the distinct words in that file, together with the number of times that each word occurs. A "word" is defined as a sequence of letters. Other characters in the input file are discarded. All letters are converted to lower case. The data will be output to another file. The user will be asked to input the names of the input and output files. The program will end if either file can't be opened. If the output file already exists, it will be overwritten without warning. This version of the program stores the words in a sorted linked list. Other versions (wordlist.cc and wordlist_list.cc) use an array or a binary sort tree. */ #include #include #include using namespace std; /* A ListNode is a node in a sorted linked list that holds data about * one word that was found in the file. */ struct ListNode { string word; // A word, with all letters converted to lower case. int frequency; // Number of times that the word occurred in the file. ListNode *next; // Pointer to next nodein the list. }; /* Read a "word", meaning a sequence of letters, from an input stream. * Any non-letter characters are skipped before reading the word. If * no word can be read (because of end-of-stream, for example), then * an empty string is returned. */ string readWord(istream &in) { string word; // the word read from the input stream char ch; // one char read from the input stream while (in && !isalpha(in.peek())) { // skip past non-letter character in.get(ch); } word = ""; while (in && isalpha(in.peek())) { in.get(ch); word += ch; } return word; } /* Convert any upper-case characters in a string to lower case. * Any non-letter characters in the string are left unchanged. */ void makeLowerCase(string &str) { int top = str.length(); for (int i = 0; i < top; i++) str[i] = tolower(str[i]); } /* Add a word to the sorted linked list. If the word already exists * in the list, then the associated frequency count is incremented. * Otherwise, a new node is created to hold the word and is added * to the list. */ void insert(ListNode* &sortedList, string word) { if (sortedList == NULL || word < sortedList->word) { // List is empty, or the new word comes before first word in list. // Create a new node to hold the word, with frequency equal // to 1, since this is the first time the word has been seen, // and add it to the front of the list. ListNode *newNode = new ListNode; newNode->word = word; newNode->frequency = 1; newNode->next = sortedList; sortedList = newNode; } else { // The word comes somewhere later in the list ListNode *runner = sortedList->next; ListNode *previous = sortedList; while (runner != NULL && word > runner->word) { previous = runner; runner = runner->next; } if (runner != NULL && word == runner->word) { // Found the word already in the list -- add 1 to the frequency. runner->frequency++; } else { // Insert a new node between previous and runner ListNode *newNode = new ListNode; newNode->word = word; newNode->frequency = 1; newNode->next = runner; previous->next = newNode; } } } /* Read all the "words" from an input stream. All words are converted * to lower case, and add the words to a binary sort tree. */ void makeWordList(istream &in, ListNode* &tree) { string word; word = readWord(in); while (word != "") { makeLowerCase(word); insert(tree,word); word = readWord(in); } } /* Returns the number of nodes in a List. */ int countNodes(ListNode *list) { int ct = 0; for (ListNode *runner = list; runner; runner = runner->next) { ct++; } return ct; } /* Print the data from the sorted linked list. */ void printWordList(ostream &out, ListNode *list) { while (list) { // Note: OK to change list, since it's passed by value. out << " " << list->word << " (" << list->frequency << ")\n"; list = list->next; } } int main() { string inFileName, outFileName; // file names, input by user ifstream in; // stream for reading from input file ofstream out; // stream for writing the output ListNode *wordList; // Pointer to a sorted linked list that holds the words. int wordCount; // the number of distinct words; this is the // number of spaces used in the wordList and // wordFreq arrays cout << "This program will make a list of all the distinct words\n"; cout << "in a specified input file, in alphabetical order, along\n"; cout << "with the number of times each word occurs. The output\n"; cout << "will be written to a specified file.\n\n"; // Open the files. cout << "What is the name of the input file? "; cin >> inFileName; in.open(inFileName.c_str()); if (!in) { cout << "Sorry, can't open file \"" << inFileName << "\" for input.\n"; exit(1); } cout << "What is the name of the output file? "; cin >> outFileName; out.open(outFileName.c_str()); if (!out) { cout << "Sorry, can't open file \"" << outFileName << "\" for output.\n"; exit(1); } // Make the list of words, then count the number of words found. wordList = NULL; // Start with empty tree. makeWordList(in,wordList); wordCount = countNodes(wordList); // Output the word data. out << wordCount << " distinct words found in file \"" << inFileName << "\":\n\n"; printWordList(out,wordList); // Tell user what happened. if (wordCount == 0) { cout << "Warning: no words found in file \"" << inFileName << "\".\n\n"; } else { cout << wordCount << " distinct words found in file \"" << inFileName << "\".\n"; cout << "Output in file \"" << outFileName << "\".\n"; if (!out) cout << "Output might not be complete; some error occurred on output!\n\n"; } } // end main()