/* ########## # # cWCount.cpp # version 1.00 # MDJ 1999-05-15 # # Common Words Count: # Counts the number of common # words in a specified text # file. # # Common words = a, an, and, as, be, but, by, for, from, # he, her, him, his, I, in, is, it, me, my, not, of, # our, out, shall, she, that, the, their, them, they, # to, unto, up, upon, us, was, we, were, with, you. # # Copyright (c) 1999 by # M. David Johnson # BDS Software # P.O. Box 485 # Glenview, IL 60025-0485 # Phone: 847-998-1656 # Fax: 847-657-8359 # Email: mdjohnson@worldnet.att.net # Web: http://www.bds-soft.com # All Rights Reserved # ########## */ // header files #include #include #include #include #include #include void zeroWH(); void parseWord(); // Global Variable declarations FILE *fp; // file pointer char ch; // character holder char word[128]; // word holder char maxWord[128] = "a"; // maximum length word holder unsigned long cct = 0; // run-of-file character counter unsigned long wct = 0; // run-of-file word counter unsigned long maxWordLen; // maximum word length int main(int argc, char *argv[]) { // Check for command line entry of filename if (argc != 2) { cout << "\n\nERROR: Filename required on command line!\n\n"; exit(1); } // Open file for counting fp = fopen (argv[1], "r"); if (fp == NULL) { cout << "\n\nERROR: Could not open " << argv[1] << "!\n\n"; exit(1); } // Common Word Counters declarations and initialization unsigned long aCounter = 0; unsigned long anCounter = 0; unsigned long andCounter = 0; unsigned long asCounter = 0; unsigned long beCounter = 0; unsigned long butCounter = 0; unsigned long byCounter = 0; unsigned long forCounter = 0; unsigned long fromCounter = 0; unsigned long heCounter = 0; unsigned long herCounter = 0; unsigned long himCounter = 0; unsigned long hisCounter = 0; unsigned long ICounter = 0; unsigned long inCounter = 0; unsigned long isCounter = 0; unsigned long itCounter = 0; unsigned long meCounter = 0; unsigned long myCounter = 0; unsigned long notCounter = 0; unsigned long ofCounter = 0; unsigned long ourCounter = 0; unsigned long outCounter = 0; unsigned long shallCounter = 0; unsigned long sheCounter = 0; unsigned long thatCounter = 0; unsigned long theCounter = 0; unsigned long theirCounter = 0; unsigned long themCounter = 0; unsigned long theyCounter = 0; unsigned long toCounter = 0; unsigned long untoCounter = 0; unsigned long upCounter = 0; unsigned long uponCounter = 0; unsigned long usCounter = 0; unsigned long wasCounter = 0; unsigned long weCounter = 0; unsigned long wereCounter = 0; unsigned long withCounter = 0; unsigned long youCounter = 0; // Do parsing and counting for (;;) { zeroWH(); // clear the word holder parseWord(); // parse out a word if ((strcmp(word, "a")) == 0) { aCounter++; } if ((strcmp(word, "an")) == 0) { anCounter++; } if ((strcmp(word, "and")) == 0) { andCounter++; } if ((strcmp(word, "as")) == 0) { asCounter++; } if ((strcmp(word, "be")) == 0) { beCounter++; } if ((strcmp(word, "but")) == 0) { butCounter++; } if ((strcmp(word, "by")) == 0) { byCounter++; } if ((strcmp(word, "for")) == 0) { forCounter++; } if ((strcmp(word, "from")) == 0) { fromCounter++; } if ((strcmp(word, "he")) == 0) { heCounter++; } if ((strcmp(word, "her")) == 0) { herCounter++; } if ((strcmp(word, "him")) == 0) { himCounter++; } if ((strcmp(word, "his")) == 0) { hisCounter++; } if ((strcmp(word, "i")) == 0) { ICounter++; } if ((strcmp(word, "in")) == 0) { inCounter++; } if ((strcmp(word, "is")) == 0) { isCounter++; } if ((strcmp(word, "it")) == 0) { itCounter++; } if ((strcmp(word, "me")) == 0) { meCounter++; } if ((strcmp(word, "my")) == 0) { myCounter++; } if ((strcmp(word, "not")) == 0) { notCounter++; } if ((strcmp(word, "of")) == 0) { ofCounter++; } if ((strcmp(word, "our")) == 0) { ourCounter++; } if ((strcmp(word, "out")) == 0) { outCounter++; } if ((strcmp(word, "shall")) == 0) { shallCounter++; } if ((strcmp(word, "she")) == 0) { sheCounter++; } if ((strcmp(word, "that")) == 0) { thatCounter++; } if ((strcmp(word, "the")) == 0) { theCounter++; } if ((strcmp(word, "their")) == 0) { theirCounter++; } if ((strcmp(word, "them")) == 0) { themCounter++; } if ((strcmp(word, "they")) == 0) { theyCounter++; } if ((strcmp(word, "to")) == 0) { toCounter++; } if ((strcmp(word, "unto")) == 0) { untoCounter++; } if ((strcmp(word, "up")) == 0) { upCounter++; } if ((strcmp(word, "upon")) == 0) { uponCounter++; } if ((strcmp(word, "us")) == 0) { usCounter++; } if ((strcmp(word, "was")) == 0) { wasCounter++; } if ((strcmp(word, "we")) == 0) { weCounter++; } if ((strcmp(word, "were")) == 0) { wereCounter++; } if ((strcmp(word, "with")) == 0) { withCounter++; } if ((strcmp(word, "you")) == 0) { youCounter++; } if (feof(fp) != 0) { break; // exit if EOF } } // Close the file and open the report file fclose(fp); FILE *fpR; char *rFilename = "cWReport.txt"; fpR = fopen(rFilename, "w"); if (fpR == NULL) { cout << "\n\nError: Could not open " << rFilename << "\n\n"; exit(1); } // Output the report to the report file fprintf(fpR, " a = %u \n", aCounter); fprintf(fpR, " an = %u \n", anCounter); fprintf(fpR, " and = %u \n", andCounter); fprintf(fpR, " as = %u \n", asCounter); fprintf(fpR, " be = %u \n", beCounter); fprintf(fpR, " but = %u \n", butCounter); fprintf(fpR, " by = %u \n", byCounter); fprintf(fpR, " for = %u \n", forCounter); fprintf(fpR, " from = %u \n", fromCounter); fprintf(fpR, " he = %u \n", heCounter); fprintf(fpR, " her = %u \n", herCounter); fprintf(fpR, " him = %u \n", himCounter); fprintf(fpR, " his = %u \n", hisCounter); fprintf(fpR, " I = %u \n", ICounter); fprintf(fpR, " in = %u \n", inCounter); fprintf(fpR, " is = %u \n", isCounter); fprintf(fpR, " it = %u \n", itCounter); fprintf(fpR, " me = %u \n", meCounter); fprintf(fpR, " my = %u \n", myCounter); fprintf(fpR, " not = %u \n", notCounter); fprintf(fpR, " of = %u \n", ofCounter); fprintf(fpR, " our = %u \n", ourCounter); fprintf(fpR, " out = %u \n", outCounter); fprintf(fpR, "shall = %u \n", shallCounter); fprintf(fpR, " she = %u \n", sheCounter); fprintf(fpR, " that = %u \n", thatCounter); fprintf(fpR, " the = %u \n", theCounter); fprintf(fpR, "their = %u \n", theirCounter); fprintf(fpR, " them = %u \n", themCounter); fprintf(fpR, " they = %u \n", theyCounter); fprintf(fpR, " to = %u \n", toCounter); fprintf(fpR, " unto = %u \n", untoCounter); fprintf(fpR, " up = %u \n", upCounter); fprintf(fpR, " upon = %u \n", uponCounter); fprintf(fpR, " us = %u \n", usCounter); fprintf(fpR, " was = %u \n", wasCounter); fprintf(fpR, " we = %u \n", weCounter); fprintf(fpR, " were = %u \n", wereCounter); fprintf(fpR, " with = %u \n", withCounter); fprintf(fpR, " you = %u \n\n", youCounter); fprintf(fpR, " total words = %u \n\n", wct - 1); fprintf(fpR, "max word length = %u \n\n", maxWordLen); fprintf(fpR, "max word = %s\n", maxWord); // Close the report file and indicate completion fclose(fpR); cout << "\nReport Complete\n\n"; return(0); } /* ########## # # zeroWH # MDJ 1999-05-15 # # Zero the global word holder # and the global character count # ########## */ void zeroWH() { for (int i = 0; i < 128; i++) { word[i] = 0; } cct = 0; } /* ########## # # parseWord # MDJ 1999-05-15 # # Parse a word from the document # # Characters: # ASCII 39 = ' # ASCII 45 = - # ASCII 48-57 = 0-9 # ASCII 65-90 = A-Z # ASCII 97-122 = a-z # # All other ASCII values # = whitespace # ########## */ void parseWord() { for (;;) { ch = fgetc(fp); // get a character if (feof(fp) != 0) { break; // exit if EOF } // Skip leading whitespace if ((cct == 0) && (ch != 39) && (ch != 45) && (!((ch > 47) && (ch < 58))) && (!((ch > 64) && (ch < 91))) && (!((ch > 96) && (ch < 123)))) { continue; } // Add characters to word holder if ((ch == 39) || (ch == 45) || ((ch > 47) && (ch < 58)) || ((ch > 64) && (ch < 91)) || ((ch > 96) && (ch < 123))) { if ((ch > 64) && (ch < 91)) { ch += 32; // convert to lower case } word[cct] = ch; cct++; continue; } // End-of-word at whitespace if ((cct) > maxWordLen) { maxWordLen = cct; // new max word length strcpy(maxWord, word); // new max word } wct++; // increment word counter if ((wct % 10000) == 0) { cout << wct << "\n"; // progress counter } break; } } /* ########## # # end of cWCount.cpp # ########## */