Changeset 1828
- Timestamp:
- 2001-01-11T10:09:17+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/host/phindcgi.cpp
r1809 r1828 66 66 unsigned long &phrasenumber, UCArray &phrasetext, 67 67 unsigned long &first_e, unsigned long &last_e, 68 unsigned long &first_l, unsigned long &last_l, 68 69 unsigned long &first_d, unsigned long &last_d, 69 70 bool &XMLmode); … … 72 73 TextData &textdata, vector <unsigned long> elist, 73 74 unsigned long first, unsigned long last); 75 76 void print_thesaurus_links(char *cgi_script, char *collection, 77 bool XMLmode, UCArray body, TextData &textdata, 78 vector <unsigned long> &linkdest, 79 vector <UCArray> &linktype, 80 unsigned long first, unsigned long last); 74 81 75 82 void print_documents(bool XMLmode, char *basepath, char *cgi_script, … … 86 93 87 94 void get_phrase_all_data(TextData &textdata, unsigned long phrase, 88 UCArray &word, unsigned long &tf, 89 unsigned long &ef, unsigned long &df, 90 vector <unsigned long> &el, 95 UCArray &word, 96 unsigned long &tf, unsigned long &ef, 97 unsigned long &lf, unsigned long &df, 98 vector <unsigned long> &el, 99 vector <unsigned long> &linkdest, 100 vector <UCArray> &linktype, 91 101 vector <unsigned long> &docnum, 92 102 vector <unsigned long> &docfrq); … … 113 123 114 124 // the frequency and occurances of the phrase 115 unsigned long tf, ef, df; 116 vector <unsigned long> el, docNums, docfreq; 125 unsigned long tf; 126 vector <unsigned long> el, linkdest, docNums, docfreq; 127 vector <UCArray> linktype; 117 128 118 129 // the number of occurances to display 119 unsigned long first_e, last_e, count_e, first_d, last_d, count_d; 130 unsigned long ef, first_e, last_e, count_e, 131 lf, first_l, last_l, count_l, 132 df, first_d, last_d, count_d; 120 133 121 134 // are we in XML mode (as opposed to HTML mode) … … 134 147 text_tmap param; 135 148 get_cgi_parameters(collection, phrase, word, 136 first_e, last_e, first_ d, last_d, XMLmode);149 first_e, last_e, first_l, last_l, first_d, last_d, XMLmode); 137 150 138 151 if (collection == NULL) { … … 170 183 FatalError (1, "Couldn't load text information for \"%s\"", filename); 171 184 } 172 get_phrase_all_data(textdata, phrase, word, tf, ef, df, el, docNums, docfreq); 185 get_phrase_all_data(textdata, phrase, word, tf, ef, lf, df, el, 186 linkdest, linktype, docNums, docfreq); 173 187 174 188 … … 179 193 << "\" text=\"" << word 180 194 << "\" tf=\"" << tf 195 << "\" ef=\"" << ef 181 196 << "\" df=\"" << df 182 << "\" ef=\"" << ef197 << "\" lf=\"" << lf 183 198 << "\">" << endl; 184 199 } else { … … 190 205 << tf << " times in " << df << " documents" << endl; 191 206 } 207 208 209 // Output the thesaurus links 210 if ((lf > 0) && (first_l < last_l)) { 211 212 // figure out the number of phrases to output 213 if (last_l > lf) { 214 last_l = lf; 215 } 216 count_l = last_l - first_l; 217 218 if (XMLmode) { 219 cout << "<thesauruslist length=\"" << lf 220 << "\" start=\"" << first_l 221 << "\" end=\"" << last_l << "\">" << endl; 222 print_thesaurus_links(argv[0], collection, XMLmode, word, textdata, 223 linkdest, linktype, first_l, last_l); 224 cout << "</thesauruslist>" << endl; 225 } 226 227 // output links as HTML 228 else { 229 if (count_l == lf) { 230 cout << "<p><b> " << count_l << " thesaurus links</b>" << endl; 231 } else { 232 cout << "<p><b>" << count_l << " of " << lf << " thesaurus links</b>" << endl; 233 } 234 235 cout << "<p><table border=1><tr><th>type</th><th>topic</th><th>freq</th><th>docs</th></tr>" << endl; 236 print_thesaurus_links(argv[0], collection, XMLmode, word, textdata, 237 linkdest, linktype, first_l, last_l); 238 239 cout << "</table>" << endl; 240 241 if (last_l < lf) { 242 if ((last_l + 10) < lf) { 243 cout << "<br><a href='" << argv[0] 244 << "?c=" << collection 245 << "&n=" << phrase 246 << "&e=" << first_e 247 << "&f=" << last_e 248 << "&h=" << first_d 249 << "&i=" << last_d 250 << "&k=" << first_l 251 << "&l=" << (last_l + 10) 252 << "'>Get more thesaurus links</a>" 253 << endl; 254 } 255 cout << "<br><a href='" << argv[0] 256 << "?c=" << collection 257 << "&n=" << phrase 258 << "&e=" << first_e 259 << "&f=" << last_e 260 << "&h=" << first_d 261 << "&i=" << last_d 262 << "&k=" << first_l 263 << "&l=" << lf 264 << "'>Get every thesaurus link</a>" 265 << endl; 266 } 267 } 268 269 } 192 270 193 271 // Output the expansions … … 219 297 } 220 298 221 cout << "<p><table border= 0><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;299 cout << "<p><table border=1><tr><th colspan=3>phrase</th><th>freq</th><th>docs</th></tr>" << endl; 222 300 print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e); 223 301 cout << "</table>" << endl; 224 302 225 if (last_e < el.size()) { 303 if (last_e < ef) { 304 if ((last_e + 10) < ef) { 305 cout << "<br><a href='" << argv[0] 306 << "?c=" << collection 307 << "&n=" << phrase 308 << "&e=" << first_e 309 << "&f=" << (last_e + 10) 310 << "&h=" << first_d 311 << "&i=" << last_d 312 << "&k=" << first_l 313 << "&l=" << last_l 314 << "'>Get more expansions</a>" 315 << endl; 316 } 226 317 cout << "<br><a href='" << argv[0] 227 << "?c=" << collection << "&n=" << phrase 228 << "&e=" << (last_e + 10) << "&d=" << last_d 229 << "&g=" << first_e << "&f=" << first_d 230 << "'>Get more phrases</a>" 231 << endl 232 << "<br><a href='" << argv[0] 233 << "?c=" << collection << "&n=" << phrase 234 << "&e=" << el.size() << "&d=" << last_d 235 << "&g=" << first_e << "&f=" << first_d 236 << "'>Get every phrase</a>" 318 << "?c=" << collection 319 << "&n=" << phrase 320 << "&e=" << first_e 321 << "&f=" << ef 322 << "&h=" << first_d 323 << "&i=" << last_d 324 << "&k=" << first_l 325 << "&l=" << last_l 326 << "'>Get every expansion</a>" 237 327 << endl; 238 328 } … … 270 360 } 271 361 272 cout << "<p><table ><tr><th align=left>Document</th><th>freq</th></tr>" << endl;362 cout << "<p><table border=1><tr><th align=left>document</th><th>freq</th></tr>" << endl; 273 363 print_documents(XMLmode, basepath, "library", collection, 274 364 docNums, docfreq, first_d, last_d); 275 365 cout << "</table>" << endl; 276 366 277 if (last_d < docNums.size()) { 367 if (last_d < df) { 368 if ((last_d + 10) < df) { 369 cout << "<br><a href='" << argv[0] 370 << "?c=" << collection 371 << "&n=" << phrase 372 << "&e=" << first_e 373 << "&f=" << last_e 374 << "&h=" << first_d 375 << "&i=" << (last_d + 10) 376 << "&k=" << first_l 377 << "&l=" << last_l 378 << "'>Get more documents</a>" << endl; 379 } 278 380 cout << "<br><a href='" << argv[0] 279 << "?c=" << collection << "&n=" << phrase 280 << "&e=" << last_e << "&d=" << (last_d + 10) 281 << "&g=" << first_e << "&f=" << first_d 282 << "'>Get more documents</a>" << endl 283 << "<br><a href='" << argv[0] 284 << "?c=" << collection << "&n=" << phrase 285 << "&g=" << first_e 286 << "&e=" << last_e 287 << "&f=" << first_d 288 << "&d=" << docNums.size() 381 << "?c=" << collection 382 << "&n=" << phrase 383 << "&e=" << first_e 384 << "&f=" << last_e 385 << "&h=" << first_d 386 << "&i=" << df 387 << "&k=" << first_l 388 << "&l=" << last_l 289 389 << "'>Get every document</a>" << endl; 290 390 } … … 328 428 // body is always the same as the text of the phrase, so no need to send it 329 429 cout << "<expansion num=\"" << e 330 << "\" id=\"" << phrase 331 << "\" prefix=\"" << prefix 332 << "\" suffix=\"" << suffix 430 << "\" id=\"" << phrase 333 431 << "\" tf=\"" << tf 334 << "\" df=\"" << df << "\"/>" << endl; 432 << "\" df=\"" << df; 433 if (!prefix.empty()) { 434 cout << "\" prefix=\"" << prefix; 435 } 436 if (!suffix.empty()) { 437 cout << "\" suffix=\"" << suffix; 438 } 439 cout << "\"/>" << endl; 335 440 } else { 336 441 cout << "<tr valign=top><td align=right><a href='" << cgi_script … … 344 449 } 345 450 } 451 452 void print_thesaurus_links(char *cgi_script, char *collection, 453 bool XMLmode, UCArray body, TextData &textdata, 454 vector <unsigned long> &linkdest, 455 vector <UCArray> &linktype, 456 unsigned long first, unsigned long last) { 457 458 // information describing each link in the list 459 unsigned long phrase, tf, ef, df; 460 UCArray type, text, newbody, suffix, prefix; 461 462 for (unsigned long l = first; l < last; l++) { 463 464 // get the phrase data 465 phrase = linkdest[l]; 466 type = linktype[l]; 467 get_phrase_freq_data(textdata, phrase, text, tf, ef, df); 468 // split_phrase(text, newbody, prefix, suffix); 469 470 if (XMLmode) { 471 cout << "<thesaurus num=\"" << l 472 << "\" id=\"" << phrase 473 << "\" tf=\"" << tf 474 << "\" df=\"" << df 475 << "\" type=\"" << type 476 << "\" text=\"" << text 477 << "\"/>" << endl; 478 } else { 479 cout << "<tr valign=top><td>" << type << "</td><td>" 480 << "<a href='" << cgi_script << "?c=" << collection 481 << "&n=" << phrase << "'>" << text << "</a>" 482 << "</td><td>" << tf << "</td><td>" << df << "</td></tr>" << endl; 483 } 484 } 485 } 486 346 487 347 488 void print_documents(bool XMLmode, char *basepath, char *cgi_script, char *collection, … … 400 541 // Look the word up in the textData 401 542 if (!GetDocText (textdata, docLevel, phrase, text)) { 402 FatalError (1, "Error while trying to get document%u", phrase);543 FatalError (1, "Error while trying to get phrase %u", phrase); 403 544 } 404 545 … … 439 580 // The phrase is stored in textData as record phrase. 440 581 // We retrieve: 441 // word - the text o dthe phrase582 // word - the text of the phrase 442 583 // tf - the total frequency of the phrase 443 584 // ef - the expansion frequency of the phrase 585 // lf - the thesaurus link frequency of the phrase 444 586 // df - the document frequency of the phrase 445 587 // el - the list of phrases that are expansions of phrase 588 // ll - the list of phrases that are thesaurus links 446 589 // dl - the list of documents that contain phrase 447 590 448 591 void get_phrase_all_data(TextData &textdata, unsigned long phrase, 449 UCArray &word, unsigned long &tf, 450 unsigned long &ef, unsigned long &df, 451 vector <unsigned long> &el, 592 UCArray &word, 593 unsigned long &tf, unsigned long &ef, 594 unsigned long &lf, unsigned long &df, 595 vector <unsigned long> &el, 596 vector <unsigned long> &linkdest, 597 vector <UCArray> &linktype, 452 598 vector <unsigned long> &docnum, 453 599 vector <unsigned long> &docfrq) { … … 464 610 UCArray::iterator next = text.begin(); 465 611 while (*next++ != ':'); 612 613 // ignore training cariage returns 614 while (text.back() == '\n') { 615 text.pop_back(); 616 } 466 617 467 618 // Get the word … … 504 655 } 505 656 } 506 el.push_back(e);507 657 508 658 // Get document list & the document frequency list 509 while (text.back() == '\n') {510 text.pop_back();511 }512 text.push_back(';');513 text.push_back(':');514 659 docnum.clear(); 515 660 docfrq.clear(); … … 535 680 } 536 681 } 682 683 // Get thesaurus link frequency & link list 684 text.push_back(':'); 685 text.push_back(':'); 686 687 // link frequency 688 lf = 0; 689 for (next++; *next != ':'; next++) { 690 lf *= 10; 691 lf += (*next - '0'); 692 } 693 694 // two lists of link data 695 linkdest.clear(); 696 linktype.clear(); 697 698 UCArray thistype; 699 thistype.clear(); 700 bool typedone = false; 701 unsigned long l = 0; 702 for (next++; *next != ':'; next++) { 703 704 if (!typedone) { 705 // first read the link type, a charactor string 706 if (*next == ',') { 707 typedone = true; 708 } else { 709 thistype.push_back(*next); 710 } 711 } else { 712 // having read the link type, read the list of link destinations 713 if (*next == ',') { 714 linkdest.push_back(l); 715 linktype.push_back(thistype); 716 l = 0; 717 } else if (*next == ';') { 718 linkdest.push_back(l); 719 linktype.push_back(thistype); 720 l = 0; 721 thistype.clear(); 722 typedone = false; 723 } else { 724 l *= 10; 725 l += (*next - '0'); 726 } 727 } 728 } 537 729 } 538 730 … … 608 800 unsigned long &phrasenumber, UCArray &phrasetext, 609 801 unsigned long &first_e, unsigned long &last_e, 802 unsigned long &first_l, unsigned long &last_l, 610 803 unsigned long &first_d, unsigned long &last_d, 611 804 bool &XMLmode) { … … 617 810 first_e = 0; 618 811 last_e = 10; 812 first_l = 0; 813 last_l = 10; 619 814 first_d = 0; 620 815 last_d = 10; … … 659 854 } 660 855 661 // d: the last document number 662 else if (key[0] == 'd') { 856 // e: the first expansion number 857 else if (key[0] == 'e') { 858 first_e = toLongInt(value); 859 } 860 861 // f: the last expansion number 862 else if (key[0] == 'f') { 863 last_e = toLongInt(value); 864 } 865 866 // h: the first document number 867 else if (key[0] == 'h') { 868 first_d = toLongInt(value); 869 } 870 871 // i: the last document number 872 else if (key[0] == 'i') { 663 873 last_d = toLongInt(value); 664 874 } 665 875 666 // e: the last expansion number 667 else if (key[0] == 'e') { 668 last_e = toLongInt(value); 669 } 670 671 // f: the first document number 672 else if (key[0] == 'f') { 673 first_d = toLongInt(value); 674 } 675 676 // g: the first expansion number 677 else if (key[0] == 'g') { 678 first_e = toLongInt(value); 679 } 680 681 // x: XML mode 682 else if (key[0] == 'x') { 683 XMLmode = true; 876 // k: the first thesaurus list number 877 else if (key[0] == 'k') { 878 first_l = toLongInt(value); 879 } 880 881 // l: the last thesaurus list number 882 else if (key[0] == 'l') { 883 last_l = toLongInt(value); 684 884 } 685 885 … … 692 892 else if (key[0] == 'p') { 693 893 toUCArray(value, phrasetext); 894 } 895 896 // x: XML mode 897 else if (key[0] == 'x') { 898 XMLmode = true; 694 899 } 695 900
Note:
See TracChangeset
for help on using the changeset viewer.