/* ########## # # wCount.cpp # version 1.00 # MDJ 1999-05-18 # # Words Count: # Counts the numbers of individual # words in a specified text # file. # # Copyright (c) 1999 by # M. David Johnson # BDS Software # P.O. Box 485 # Glenview, IL 60025-0485 # Phone: 847-998-1656 # Fax: 847-657-8359 # Email: mdjohnson@worldnet.att.net # Web: http://www.bds-soft.com # All Rights Reserved # ########## */ // header files #include #include #include #include #include #include #include void zeroWH(); void parseWord(); // Global Variable declarations FILE *fp; // file pointer char ch; // character holder char word[19]; // word holder unsigned long cct = 0; // run-of-file character counter unsigned long wct = 0; // run-of-file word counter struct node { // linked list word info storage char word[19]; // max. word length = 18 + "\0" unsigned long count; // no. of occurrences of word node *pNext; // pointer to next node }; node *pHNode; // pointer to head node node *pTNode; // pointer to tail node node *pCNode; // pointer to current node node *pPNode; // pointer to previous node node *pNNode; // pointer to new node int main(int argc, char *argv[]) { // Check for command line entry of filename if (argc != 2) { cout << "\n\nERROR: Filename required on command line!\n\n"; exit(1); } // Open file for counting fp = fopen (argv[1], "r"); if (fp == NULL) { cout << "\n\nERROR: Could not open " << argv[1] << "!\n\n"; exit(1); } // Inintialize the linked list try { pCNode = new node; // dummy tail node = current } catch (bad_alloc xa) { cout << "\n\nERROR: Allocation failure\n\n"; exit(1); } for (long i1 = 0; i1 < 18; i1++) { word[i1] = 0xFF; } word[18] = 0x00; strcpy(pCNode->word, word); pCNode->count = 0; pCNode->pNext = NULL; try { pPNode = new node; // dummy head node = previous } catch (bad_alloc xa) { cout << "\n\nERROR: Allocation failure\n\n"; exit(1); } for (long i2 = 0; i2 < 19; i2++) { word[i2] = 0x00; } strcpy(pPNode->word, word); pPNode->count = 0; pPNode->pNext = pCNode; pHNode = pPNode; // head node pTNode = pCNode; // tail node // Do parsing and counting for (;;) { zeroWH(); // clear the word holder parseWord(); // parse out a word if (feof(fp) != 0) { break; // exit if EOF } // Process the parsed word pCNode = pHNode; // point to head of list if ((strcmp(pCNode->word, word)) == 0) { pCNode->count++; // increment "a" count } else { for (;;) { if ((strcmp(pCNode->word, word)) < 0) { if (pCNode->pNext == NULL) { cout << "\n\nERROR: Stepped past End-of-List\n\n"; exit(1); } pPNode = pCNode; // increment pointers pCNode = pPNode->pNext; continue; } else { if ((strcmp(pCNode->word, word)) == 0) { pCNode->count++; // increment listed word count break; } else { try { pNNode = new node; // create new node } catch (bad_alloc xa) { cout << "\n\nERROR: Allocation failure\n\n"; exit(1); } strcpy(pNNode->word, word); // store parsed word pNNode->count = 1; // initialize count pNNode->pNext = pCNode; // point to current node pPNode->pNext = pNNode; // point from previous node break; } } } } } // Close the file and open the report file fclose(fp); FILE *fpR; char *rFilename = "wReport.txt"; fpR = fopen(rFilename, "w"); if (fpR == NULL) { cout << "\n\nError: Could not open " << rFilename << "\n\n"; exit(1); } // Output the report to the report file pCNode = pHNode; // point to head of list pCNode = pCNode->pNext; // point past dummy head node for (;;) { fprintf(fpR, "%s %u\n", pCNode->word, pCNode->count); pCNode = pCNode->pNext; // point to next node if (pCNode->pNext == NULL) { break; // End-of-List - exclude dummy tail node } } cout << "\n\nCheck: total words = " << wct; // Close the report file and indicate completion fclose(fpR); cout << "\nReport Complete\n"; // Free memory pPNode = pHNode; // point to head of list for (;;) { if (pPNode->pNext == NULL) { delete pPNode; break; // End-of-List } pCNode = pPNode->pNext; // point to next node delete pPNode; pPNode = pCNode; } cout << "\nRun Complete\n\n"; return(0); } /* ########## # # zeroWH # MDJ 1999-05-15 # # Zero the global word holder # and the global character count # ########## */ void zeroWH() { for (int i = 0; i < 19; i++) { word[i] = 0; } cct = 0; } /* ########## # # parseWord # MDJ 1999-05-15 # # Parse a word from the document # # Characters: # ASCII 39 = ' # ASCII 45 = - # ASCII 48-57 = 0-9 # ASCII 65-90 = A-Z # ASCII 97-122 = a-z # # All other ASCII values # = whitespace # ########## */ void parseWord() { for (;;) { ch = fgetc(fp); // get a character if (feof(fp) != 0) { break; // exit if EOF } // Skip leading whitespace if ((cct == 0) && (ch != 39) && (ch != 45) && (!((ch > 47) && (ch < 58))) && (!((ch > 64) && (ch < 91))) && (!((ch > 96) && (ch < 123)))) { continue; } // Add characters to word holder if ((ch == 39) || (ch == 45) || ((ch > 47) && (ch < 58)) || ((ch > 64) && (ch < 91)) || ((ch > 96) && (ch < 123))) { if ((ch > 64) && (ch < 91)) { ch += 32; // convert to lower case } word[cct] = ch; cct++; continue; } // End-of-word at whitespace wct++; // increment word counter if ((wct % 1000) == 0) { cout << wct << word << "\n"; // progress counter } break; } } /* ########## # # end of wCount.cpp # ########## */