Changeset 1633 for trunk/gsdl
- Timestamp:
- 2000-10-31T10:07:22+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/host/phindcgi.cpp
r1629 r1633 69 69 bool &XMLmode); 70 70 71 void print_expansions(char *cgi_script, char *collection, bool XMLmode, 72 TextData &textdata, vector <unsigned long> dlist,71 void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body, 72 TextData &textdata, vector <unsigned long> elist, 73 73 unsigned long first, unsigned long last); 74 74 … … 91 91 vector <unsigned long> &docnum, 92 92 vector <unsigned long> &docfrq); 93 94 void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix); 95 bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end); 93 96 94 97 void get_document_all_data(TextData &docdata, unsigned long docNum, … … 203 206 << "\" end=\"" << last_e << "\">" << endl; 204 207 205 print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e); 206 208 print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e); 207 209 208 210 cout << "</expansionlist>" << endl; … … 217 219 } 218 220 219 cout << "<p><table ><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;220 print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e);221 cout << "<p><table border=0><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl; 222 print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e); 221 223 cout << "</table>" << endl; 222 224 … … 304 306 // Print a list of expansions 305 307 // 306 // Given the textData and a list of phrase numbers, 307 // print out each of the words.308 309 void print_expansions(char *cgi_script, char *collection, bool XMLmode, 310 TextData &textdata, vector <unsigned long> dlist,308 // Given the textData and a list of phrase numbers, print out each of the 309 // expansions. 310 311 void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body, 312 TextData &textdata, vector <unsigned long> elist, 311 313 unsigned long first, unsigned long last) { 312 314 313 315 UCArray word; 314 316 unsigned long phrase, tf, df, ef; 317 318 UCArray suffix, prefix; 315 319 316 320 for (unsigned long e = first; e < last; e++) { 317 321 318 phrase = dlist[e];322 phrase = elist[e]; 319 323 get_phrase_freq_data(textdata, phrase, word, tf, ef, df); 324 325 split_phrase(word, body, prefix, suffix); 320 326 321 327 if (XMLmode) { 322 328 cout << "<expansion num=\"" << e 323 329 << "\" id=\"" << phrase 330 << "\" prefix=\"" << prefix 331 << "\" suffix=\"" << suffix 324 332 << "\" text=\"" << word 325 333 << "\" tf=\"" << tf 326 334 << "\" df=\"" << df << "\"/>" << endl; 327 335 } else { 328 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection 329 << "&n=" << phrase << "'>" << word << "</a>" 330 << "</td><td>" << tf << "</td><td>" << df << "</td></tr>" 331 << endl; 336 cout << "<tr valign=top><td align=right><a href='" << cgi_script 337 << "?c=" << collection << "&n=" << phrase << "'>" << prefix << "</a></td>" 338 << "<td align=center><a href='" << cgi_script 339 << "?c=" << collection << "&n=" << phrase << "'>" << body << "</a></td>" 340 << "<td align=left><a href='" << cgi_script 341 << "?c=" << collection << "&n=" << phrase << "'>" << suffix << "</a></td>" 342 << "<td>" << tf << "</td><td>" << df << "</td></tr>" << endl; 332 343 } 333 344 } … … 374 385 // The phrase is stored in textData as record phrase. 375 386 // We retrieve: 376 // word - the text o dthe phrase387 // word - the text of the phrase 377 388 // tf - the total frequency of the phrase 378 389 // ef - the expansion frequency of the phrase … … 648 659 } 649 660 661 // d: the last document number 662 else if (key[0] == 'd') { 663 last_d = toLongInt(value); 664 } 665 666 // e: the last expansion number 667 else if (key[0] == 'e') { 668 last_e = toLongInt(value); 669 } 670 671 // f: the first document number 672 else if (key[0] == 'f') { 673 first_d = toLongInt(value); 674 } 675 676 // g: the first expansion number 677 else if (key[0] == 'g') { 678 first_e = toLongInt(value); 679 } 680 681 // x: XML mode 682 else if (key[0] == 'x') { 683 XMLmode = true; 684 } 685 650 686 // n: the phrase number 651 687 else if (key[0] == 'n') { … … 656 692 else if (key[0] == 'p') { 657 693 toUCArray(value, phrasetext); 658 }659 660 // d: the last document number661 else if (key[0] == 'd') {662 last_d = toLongInt(value);663 }664 665 // e: the last expansion number666 else if (key[0] == 'e') {667 last_e = toLongInt(value);668 }669 670 // f: the first document number671 else if (key[0] == 'f') {672 first_d = toLongInt(value);673 }674 675 // g: the first expansion number676 else if (key[0] == 'g') {677 first_e = toLongInt(value);678 }679 680 // x: XML mode681 else if (key[0] == 'x') {682 XMLmode = true;683 694 } 684 695 … … 747 758 748 759 760 // split an expansion into prefix and suffix 761 762 void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix) { 763 764 prefix.clear(); 765 suffix.clear(); 766 767 bool readingPrefix = true; 768 UCArray::iterator here = word.begin(); 769 UCArray::iterator end = word.end(); 770 771 while (here != end) { 772 773 // if we've not read all the prefix, add the next char to the prefix 774 if (readingPrefix) { 775 if (phrase_match(body, here, end)) { 776 readingPrefix = false; 777 // trim whitespace from end of prefix & start of suffix 778 if (!prefix.empty()) { 779 prefix.pop_back(); 780 } 781 while (*here == ' ') { 782 here++; 783 } 784 } else { 785 prefix.push_back(*here); 786 here++; 787 } 788 } 789 // if we've finished with the prefix, update the suffix 790 else { 791 suffix.push_back(*here); 792 here++; 793 } 794 } 795 } 796 797 // phrase_match 798 // 799 // compare two strings, one represented as an UCArray, the other as two 800 // UCArray iterators. 801 // 802 // Return true if the UCArray is the same as the phrase the iterator points 803 // too for the length of the UCArray. 804 805 bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end) { 806 807 UCArray::iterator one_here = text.begin(); 808 UCArray::iterator one_end = text.end(); 809 UCArray::iterator two_here = here; 810 811 // iterate over the length of the first string, comparing each element to 812 // the corresponding element in the second string. 813 while (one_here != one_end) { 814 if (*one_here != *two_here) { 815 return false; 816 } 817 one_here++; 818 two_here++; 819 } 820 821 here = two_here; 822 return true; 823 } 824 825 749 826 // Convert from text_t format 750 827 //
Note:
See TracChangeset
for help on using the changeset viewer.