Changeset 1619
- Timestamp:
- 2000-10-27T09:23:55+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/host/phindcgi.cpp
r1603 r1619 1 // phindcgi.cpp 2 3 // The program itself reads request for phind data from STDIN, 4 // looks up the phrase's charatoristics in the mgpp files, and 5 // reports output to STDOUT. 6 1 /********************************************************************** 2 * 3 * phindcgi.cpp -- cgi program to serve phind phrase hierarchies 4 * 5 * Copyright 2000 Gordon Paynter 6 * Copyright 2000 The New Zealand Digital Library Project 7 * 8 * 9 * A component of the Greenstone digital library software 10 * from the New Zealand Digital Library Project at the 11 * University of Waikato, New Zealand. 12 * 13 * This program is free software; you can redistribute it and/or modify 14 * it under the terms of the GNU General Public License as published by 15 * the Free Software Foundation; either version 2 of the License, or 16 * (at your option) any later version. 17 * 18 * This program is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 * GNU General Public License for more details. 22 * 23 * You should have received a copy of the GNU General Public License 24 * along with this program; if not, write to the Free Software 25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 26 * 27 *********************************************************************/ 28 29 /* 30 * phindcgi.cpp 31 * 32 * The program itself reads request for a phrase's data from the 33 * QUERY_STRING variable, looks up the phrase (if necessary) in the MGPP 34 * pword database, then looks up the phrase's charatoristics in the MGPP 35 * pdata database, and reports output to STDOUT ar crude HTML or XML. 36 * 37 */ 7 38 8 39 … … 15 46 #include <vector.h> 16 47 #include <algo.h> 17 18 48 19 49 // Include MGPP functionality. … … 36 66 unsigned long &phrasenumber, UCArray &phrasetext, 37 67 unsigned long &first_e, unsigned long &last_e, 38 unsigned long &first_d, unsigned long &last_d); 39 40 void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result); 41 42 void print_word_tf_df(char *cgi_script, char *collection, 68 unsigned long &first_d, unsigned long &last_d, 69 bool &XMLmode); 70 71 void print_expansions(char *cgi_script, char *collection, bool XMLmode, 43 72 TextData &textdata, vector <unsigned long> dlist, 44 73 unsigned long first, unsigned long last); 45 74 46 void print_document_df(char *basepath, char *cgi_script, char *collection, 47 vector <unsigned long> docNums, 48 vector <unsigned long> docFreq, 49 unsigned long first, unsigned long last); 50 75 void print_documents(bool XMLmode, char *basepath, char *cgi_script, 76 char *collection, 77 vector <unsigned long> docNums, 78 vector <unsigned long> docFreq, 79 unsigned long first, unsigned long last); 80 81 void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result); 82 51 83 void get_phrase_freq_data(TextData &textdata, unsigned long phrase, 52 84 UCArray &word, unsigned long &tf, … … 57 89 unsigned long &ef, unsigned long &df, 58 90 vector <unsigned long> &el, 59 vector <unsigned long> &docnum, vector <unsigned long> &docfrq); 91 vector <unsigned long> &docnum, 92 vector <unsigned long> &docfrq); 60 93 61 94 void get_document_all_data(TextData &docdata, unsigned long docNum, … … 81 114 82 115 // the number of occurances to display 83 unsigned long first_e, last_e, first_d, last_d; 84 116 unsigned long first_e, last_e, count_e, first_d, last_d, count_d; 117 118 // are we in XML mode (as opposed to HTML mode) 119 bool XMLmode = false; 120 85 121 // Read the gsdlsite.cfg file 86 122 char *gsdlhome = NULL; … … 94 130 char *collection; 95 131 text_tmap param; 96 get_cgi_parameters(collection, phrase, word, first_e, last_e, first_d, last_d); 132 get_cgi_parameters(collection, phrase, word, 133 first_e, last_e, first_d, last_d, XMLmode); 97 134 98 135 if (collection == NULL) { … … 133 170 134 171 135 // Output the HTML page 136 cout << "Content-type: text/html" << endl << endl 137 << "<html><head><title>" << word << "</title></head>" << endl 138 << "<body><center>" << endl 139 << "<p><h1>" << word << "</h1>" << endl 140 << "<p><b>"<< word << "</b> occurs " << tf << " times in " << df << " documents" << endl; 172 // Output the header 173 if (XMLmode) { 174 cout << "Content-type: text/plain" << endl << endl 175 << "<phinddata id=\"" << phrase 176 << "\" text=\"" << word 177 << "\" df=\"" << df 178 << "\" ef=\"" << ef 179 << "\">" << endl; 180 } else { 181 cout << "Content-type: text/html" << endl << endl 182 << "<html><head><title>" << word << "</title></head>" << endl 183 << "<body><center>" << endl 184 << "<p><h1>" << word << "</h1>" << endl 185 << "<p><b>"<< word << "</b> occurs " 186 << tf << " times in " << df << " documents" << endl; 187 } 141 188 142 189 // Output the expansions … … 147 194 last_e = el.size(); 148 195 } 149 150 if (last_e == el.size()) { 151 cout << "<p><b> " << last_e << " expansions</b>" << endl; 152 } else { 153 cout << "<p><b>" << last_e << " of " << ef << " expansions</b>" << endl; 154 } 155 156 cout << "<p><table><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl; 157 print_word_tf_df(argv[0], collection, textdata, el, first_e, last_e); 158 cout << "</table>" << endl; 159 160 if (last_e < el.size()) { 161 cout << "<br><a href='" << argv[0] 162 << "?c=" << collection << "&n=" << phrase 163 << "&e=" << (last_e + 10) << "&d=" << last_d 164 << "'>Get more phrases</a>" 165 << endl 166 << "<br><a href='" << argv[0] 167 << "?c=" << collection << "&n=" << phrase 168 << "&e=0&d=" << last_d 169 << "'>Get every phrase</a>" 170 << endl; 196 count_e = last_e - first_e; 197 198 // output expansions as XML 199 if (XMLmode) { 200 cout << "<expansionlist length=\"" << ef 201 << "\" start=\"" << first_e 202 << "\" end=\"" << last_e << "\">" << endl; 203 204 print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e); 205 206 207 cout << "</expansionlist>" << endl; 208 } 209 210 // output expansions as HTML 211 else { 212 if (count_e == el.size()) { 213 cout << "<p><b> " << count_e << " expansions</b>" << endl; 214 } else { 215 cout << "<p><b>" << count_e << " of " << ef << " expansions</b>" << endl; 216 } 217 218 cout << "<p><table><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl; 219 print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e); 220 cout << "</table>" << endl; 221 222 if (last_e < el.size()) { 223 cout << "<br><a href='" << argv[0] 224 << "?c=" << collection << "&n=" << phrase 225 << "&e=" << (last_e + 10) << "&d=" << last_d 226 << "&g=" << first_e << "&f=" << first_d 227 << "'>Get more phrases</a>" 228 << endl 229 << "<br><a href='" << argv[0] 230 << "?c=" << collection << "&n=" << phrase 231 << "&e=0&d=" << last_d 232 << "&g=" << first_e << "&f=" << first_d 233 << "'>Get every phrase</a>" 234 << endl; 235 } 171 236 } 172 237 } … … 179 244 last_d = docNums.size(); 180 245 } 181 182 if (last_d == docNums.size()) { 183 cout << "<p><b> " << last_d << " documents</b>" << endl; 184 } else { 185 cout << "<p><b>" << last_d << " of " << df << " documents</b>" << endl; 186 } 187 188 cout << "<p><table><tr><th align=left>Document</th><th>freq</th></tr>" << endl; 189 print_document_df(basepath, "library", collection, docNums, docfreq, first_d, last_d); 190 cout << "</table>" << endl; 191 192 if (last_d < docNums.size()) { 193 cout << "<br><a href='" << argv[0] 194 << "?c=" << collection << "&n=" << phrase 195 << "&e=" << last_e << "&d=" << (last_d + 10) 196 << "'>Get more documents</a>" << endl 197 << "<br><a href='" << argv[0] 198 << "?c=" << collection << "&n=" << phrase 199 << "&e=" << last_e 200 << "&d=0'>Get every document</a>" << endl; 201 } 202 203 246 count_d = last_d - first_d; 247 248 // output document list as XML 249 if (XMLmode) { 250 cout << "<documentlist length=\"" << df 251 << "\" start=\"" << first_d 252 << "\" end=\"" << last_d << "\">" << endl; 253 254 print_documents(XMLmode, basepath, "library", collection, 255 docNums, docfreq, first_d, last_d); 256 257 cout << "</documentlist>" << endl; 258 } 259 260 // output document list as HTML 261 else { 262 263 if (count_d == docNums.size()) { 264 cout << "<p><b> " << count_d << " documents</b>" << endl; 265 } else { 266 cout << "<p><b>" << count_d << " of " << df << " documents</b>" << endl; 267 } 268 269 cout << "<p><table><tr><th align=left>Document</th><th>freq</th></tr>" << endl; 270 print_documents(XMLmode, basepath, "library", collection, 271 docNums, docfreq, first_d, last_d); 272 cout << "</table>" << endl; 273 274 if (last_d < docNums.size()) { 275 cout << "<br><a href='" << argv[0] 276 << "?c=" << collection << "&n=" << phrase 277 << "&e=" << last_e << "&d=" << (last_d + 10) 278 << "&g=" << first_e << "&f=" << first_d 279 << "'>Get more documents</a>" << endl 280 << "<br><a href='" << argv[0] 281 << "?c=" << collection << "&n=" << phrase 282 << "&e=" << last_e 283 << "&g=" << first_e << "&f=" << first_d 284 << "&d=0'>Get every document</a>" << endl; 285 } 286 } 287 } 288 289 // Close the document 290 if (XMLmode) { 291 cout << "</phinddata>" << endl; 292 } else { 204 293 cout << "</center></body></html>" << endl; 205 294 } … … 215 304 // print out each of the words. 216 305 217 void print_ word_tf_df(char *cgi_script, char *collection,306 void print_expansions(char *cgi_script, char *collection, bool XMLmode, 218 307 TextData &textdata, vector <unsigned long> dlist, 219 308 unsigned long first, unsigned long last) { … … 227 316 get_phrase_freq_data(textdata, phrase, word, tf, ef, df); 228 317 229 230 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection 231 << "&n=" << phrase << "'>" << word << "</a>" 232 << "</td><td>" << tf << "</td><td>" << df << "</td></tr>" 233 << endl; 234 } 235 } 236 237 void print_document_df(char *basepath, char *cgi_script, char *collection, 238 vector <unsigned long> docNums, vector <unsigned long> docFreq, 239 unsigned long first, unsigned long last) { 318 if (XMLmode) { 319 cout << "<expansion num=\"" << e 320 << "\" id=\"" << phrase 321 << "\" text=\"" << word 322 << "\" tf=\"" << tf 323 << "\" df=\"" << df << "\"/>" << endl; 324 } else { 325 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection 326 << "&n=" << phrase << "'>" << word << "</a>" 327 << "</td><td>" << tf << "</td><td>" << df << "</td></tr>" 328 << endl; 329 } 330 } 331 } 332 333 void print_documents(bool XMLmode, char *basepath, char *cgi_script, char *collection, 334 vector <unsigned long> docNums, vector <unsigned long> docFreq, 335 unsigned long first, unsigned long last) { 240 336 241 337 // Create a TextData object to read the document data … … 255 351 get_document_all_data(docdata, doc, title, hash); 256 352 257 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection 258 << "&a=d&d=" << hash << "'>" << title << "</a>" 259 << "</td><td>" << freq << "</td></tr>" 260 << endl; 353 if (XMLmode) { 354 cout << "<document num=\"" << d 355 << "\" hash=\"" << hash 356 << "\" freq=\"" << freq 357 << "\" title=\"" << title << "\"/>" << endl; 358 } else { 359 cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection 360 << "&a=d&d=" << hash << "'>" << title << "</a>" 361 << "</td><td>" << freq << "</td></tr>" 362 << endl; 363 } 261 364 } 262 365 } … … 390 493 391 494 // Get document list & the document frequency list 495 while (text.back() == '\n') { 496 text.pop_back(); 497 } 498 text.push_back(';'); 392 499 text.push_back(':'); 393 500 docnum.clear(); … … 432 539 // Look the word up in the textData 433 540 if (!GetDocText (docdata, docLevel, docNum, text)) { 434 FatalError (1, "Error while trying to get phrase%u", docNum);541 FatalError (1, "Error while trying to get document %u", docNum); 435 542 } 436 543 … … 439 546 while (*next++ != '\t'); 440 547 441 // Get the title548 // Get the document OID (hash) 442 549 hash.clear(); 443 550 for (; *next != '\t'; next++) { … … 446 553 447 554 // Get the title 448 text.push_back('\ t');555 text.push_back('\n'); 449 556 title.clear(); 450 for (next++; *next != '\ t'; next++) {557 for (next++; *next != '\n'; next++) { 451 558 title.push_back(*next); 452 559 } … … 487 594 unsigned long &phrasenumber, UCArray &phrasetext, 488 595 unsigned long &first_e, unsigned long &last_e, 489 unsigned long &first_d, unsigned long &last_d) { 596 unsigned long &first_d, unsigned long &last_d, 597 bool &XMLmode) { 490 598 491 599 … … 538 646 539 647 // n: the phrase number 540 if (key[0] == 'n') {648 else if (key[0] == 'n') { 541 649 phrasenumber = toLongInt(value); 542 650 } … … 547 655 } 548 656 657 // d: the last document number 658 else if (key[0] == 'd') { 659 last_d = toLongInt(value); 660 } 661 549 662 // e: the last expansion number 550 if (key[0] == 'e') {663 else if (key[0] == 'e') { 551 664 last_e = toLongInt(value); 552 665 } 553 666 554 // d: the last document number 555 if (key[0] == 'd') { 556 last_d = toLongInt(value); 667 // f: the first document number 668 else if (key[0] == 'f') { 669 first_d = toLongInt(value); 670 } 671 672 // g: the first expansion number 673 else if (key[0] == 'g') { 674 first_e = toLongInt(value); 675 } 676 677 // x: XML mode 678 else if (key[0] == 'x') { 679 XMLmode = true; 557 680 } 558 681
Note:
See TracChangeset
for help on using the changeset viewer.