Context Navigation

← Previous Changeset
Next Changeset →

Changeset 1619

Timestamp:

2000-10-27T09:23:55+13:00 (24 years ago)

Author:

paynter

Message:

Added an XML output mode. Fixed a bug reading the last item of the
document list. A few other fixes & some tidying.

File:

: 1 edited

trunk/gsdl/src/phind/host/phindcgi.cpp (modified) (19 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/phind/host/phindcgi.cpp

-              r1603
+              r1619
+// phindcgi.cpp
+// The program itself reads request for phind data from STDIN,
+// looks up the phrase's charatoristics in the mgpp files, and
+// reports output to STDOUT.
+/**********************************************************************
+ *
+ * phindcgi.cpp -- cgi program to serve phind phrase hierarchies
+ *
+ * Copyright 2000 Gordon Paynter
+ * Copyright 2000 The New Zealand Digital Library Project
+ *
+ *
+ * A component of the Greenstone digital library software
+ * from the New Zealand Digital Library Project at the
+ * University of Waikato, New Zealand.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *********************************************************************/
+/*
+ * phindcgi.cpp
+ *
+ * The program itself reads request for a phrase's data from the
+ * QUERY_STRING variable, looks up the phrase (if necessary) in the MGPP
+ * pword database, then looks up the phrase's charatoristics in the MGPP
+ * pdata database, and reports output to STDOUT ar crude HTML or XML.
+ *
+ */
 …
 #include <vector.h>
 #include <algo.h>
 // Include MGPP functionality.
 …
             unsigned long &phrasenumber, UCArray &phrasetext,
             unsigned long &first_e, unsigned long &last_e,
+            unsigned long &first_d, unsigned long &last_d);
+void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result);
+void print_word_tf_df(char *cgi_script, char *collection,
+            unsigned long &first_d, unsigned long &last_d,
+            bool &XMLmode);
+void print_expansions(char *cgi_script, char *collection, bool XMLmode,
               TextData &textdata, vector <unsigned long> dlist,
               unsigned long first, unsigned long last);
+void print_document_df(char *basepath, char *cgi_script, char *collection,
+               vector <unsigned long> docNums,
+               vector <unsigned long> docFreq,
+               unsigned long first, unsigned long last);
+void print_documents(bool XMLmode, char *basepath, char *cgi_script,
+             char *collection,
+             vector <unsigned long> docNums,
+             vector <unsigned long> docFreq,
+             unsigned long first, unsigned long last);
+void find_phrase_number_from_word(char *basepath, UCArray &query, DocNumArray &result);
 void get_phrase_freq_data(TextData &textdata, unsigned long phrase,
               UCArray &word, unsigned long &tf,
 …
              unsigned long &ef, unsigned long &df,
              vector <unsigned long> &el,
+             vector <unsigned long> &docnum, vector <unsigned long> &docfrq);
+             vector <unsigned long> &docnum,
+             vector <unsigned long> &docfrq);
 void get_document_all_data(TextData &docdata, unsigned long docNum,
 …
   // the number of occurances to display
+  unsigned long first_e, last_e, first_d, last_d;
+  unsigned long first_e, last_e, count_e, first_d, last_d, count_d;
+  // are we in XML mode (as opposed to HTML mode)
+  bool XMLmode = false;
   // Read the gsdlsite.cfg file
   char *gsdlhome = NULL;
 …
   char *collection;
   text_tmap param;
+  get_cgi_parameters(collection, phrase, word, first_e, last_e, first_d, last_d);
+  get_cgi_parameters(collection, phrase, word,
+             first_e, last_e, first_d, last_d, XMLmode);
   if (collection == NULL) {
 …
+  // Output the HTML page
+  cout << "Content-type: text/html" << endl << endl
+       << "<html><head><title>" << word << "</title></head>" << endl
+       << "<body><center>" << endl
+       << "<p><h1>" << word << "</h1>" << endl
+       << "<p><b>"<< word << "</b> occurs " << tf << " times in " << df << " documents" << endl;
+  // Output the header
+  if (XMLmode) {
+    cout << "Content-type: text/plain" << endl << endl
+     << "<phinddata id=\"" << phrase
+     << "\" text=\"" << word
+     << "\" df=\"" << df
+     << "\" ef=\"" << ef
+     << "\">" << endl;
+  } else {
+    cout << "Content-type: text/html" << endl << endl
+     << "<html><head><title>" << word << "</title></head>" << endl
+     << "<body><center>" << endl
+     << "<p><h1>" << word << "</h1>" << endl
+     << "<p><b>"<< word << "</b> occurs "
+     << tf << " times in " << df << " documents" << endl;
+  }
   // Output the expansions
 …
       last_e = el.size();
+    }
+    if (last_e == el.size()) {
+      cout << "<p><b> " << last_e << " expansions</b>" << endl;
+    } else {
+      cout << "<p><b>" << last_e << " of " << ef << " expansions</b>" << endl;
+    }
+    cout << "<p><table><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;
+    print_word_tf_df(argv[0], collection, textdata, el, first_e, last_e);
+    cout << "</table>" << endl;
+    if (last_e < el.size()) {
+      cout << "<br><a href='" << argv[0]
+       << "?c=" << collection << "&n=" << phrase
+       << "&e=" << (last_e + 10) << "&d=" << last_d
+       << "'>Get more phrases</a>"
+       << endl
+       << "<br><a href='" << argv[0]
+       << "?c=" << collection << "&n=" << phrase
+       << "&e=0&d=" << last_d
+       << "'>Get every phrase</a>"
+       << endl;
+    count_e = last_e - first_e;
+    // output expansions as XML
+    if (XMLmode) {
+      cout << "<expansionlist length=\"" << ef
+       << "\" start=\"" << first_e
+       << "\" end=\"" << last_e << "\">" << endl;
+      print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e);
+      cout << "</expansionlist>" << endl;
+    }
+    // output expansions as HTML
+    else {
+      if (count_e == el.size()) {
+    cout << "<p><b> " << count_e << " expansions</b>" << endl;
+      } else {
+    cout << "<p><b>" << count_e << " of " << ef << " expansions</b>" << endl;
+      }
+      cout << "<p><table><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;
+      print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e);
+      cout << "</table>" << endl;
+      if (last_e < el.size()) {
+    cout << "<br><a href='" << argv[0]
+         << "?c=" << collection << "&n=" << phrase
+         << "&e=" << (last_e + 10) << "&d=" << last_d
+         << "&g=" << first_e << "&f=" << first_d
+         << "'>Get more phrases</a>"
+         << endl
+         << "<br><a href='" << argv[0]
+         << "?c=" << collection << "&n=" << phrase
+         << "&e=0&d=" << last_d
+         << "&g=" << first_e << "&f=" << first_d
+         << "'>Get every phrase</a>"
+         << endl;
+      }
+    }
+  }
 …
       last_d = docNums.size();
+    }
+    if (last_d == docNums.size()) {
+      cout << "<p><b> " << last_d << " documents</b>" << endl;
+    } else {
+      cout << "<p><b>" << last_d << " of " << df << " documents</b>" << endl;
+    }
+    cout << "<p><table><tr><th align=left>Document</th><th>freq</th></tr>" << endl;
+    print_document_df(basepath, "library", collection, docNums, docfreq, first_d, last_d);
+    cout << "</table>" << endl;
+    if (last_d < docNums.size()) {
+      cout << "<br><a href='" << argv[0]
+       << "?c=" << collection << "&n=" << phrase
+       << "&e=" << last_e << "&d=" << (last_d + 10)
+       << "'>Get more documents</a>" << endl
+       << "<br><a href='" << argv[0]
+       << "?c=" << collection << "&n=" << phrase
+       << "&e=" << last_e
+       << "&d=0'>Get every document</a>" << endl;
+    }
+    count_d = last_d - first_d;
+    // output document list as XML
+    if (XMLmode) {
+      cout << "<documentlist length=\"" << df
+       << "\" start=\"" << first_d
+       << "\" end=\"" << last_d << "\">" << endl;
+      print_documents(XMLmode, basepath, "library", collection,
+              docNums, docfreq, first_d, last_d);
+      cout << "</documentlist>" << endl;
+    }
+    // output document list as HTML
+    else {
+      if (count_d == docNums.size()) {
+    cout << "<p><b> " << count_d << " documents</b>" << endl;
+      } else {
+    cout << "<p><b>" << count_d << " of " << df << " documents</b>" << endl;
+      }
+      cout << "<p><table><tr><th align=left>Document</th><th>freq</th></tr>" << endl;
+      print_documents(XMLmode, basepath, "library", collection,
+              docNums, docfreq, first_d, last_d);
+      cout << "</table>" << endl;
+      if (last_d < docNums.size()) {
+    cout << "<br><a href='" << argv[0]
+         << "?c=" << collection << "&n=" << phrase
+         << "&e=" << last_e << "&d=" << (last_d + 10)
+         << "&g=" << first_e << "&f=" << first_d
+         << "'>Get more documents</a>" << endl
+         << "<br><a href='" << argv[0]
+         << "?c=" << collection << "&n=" << phrase
+         << "&e=" << last_e
+         << "&g=" << first_e << "&f=" << first_d
+         << "&d=0'>Get every document</a>" << endl;
+      }
+    }
+  }
+  // Close the document
+  if (XMLmode) {
+    cout << "</phinddata>" << endl;
+  } else {
     cout << "</center></body></html>" << endl;
+  }
 …
 // print out each of the words.
 void print_word_tf_df(char *cgi_script, char *collection,
+void print_expansions(char *cgi_script, char *collection, bool XMLmode,
               TextData &textdata, vector <unsigned long> dlist,
               unsigned long first, unsigned long last) {
 …
     get_phrase_freq_data(textdata, phrase, word, tf, ef, df);
+    cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
+     << "&n=" << phrase << "'>" << word << "</a>"
+     << "</td><td>" << tf << "</td><td>" << df << "</td></tr>"
+     << endl;
+  }
+}
+void print_document_df(char *basepath, char *cgi_script, char *collection,
+               vector <unsigned long> docNums, vector <unsigned long> docFreq,
+               unsigned long first, unsigned long last) {
+    if (XMLmode) {
+      cout << "<expansion num=\"" << e
+       << "\" id=\"" << phrase
+       << "\" text=\"" << word
+       << "\" tf=\"" << tf
+       << "\" df=\"" << df << "\"/>" << endl;
+    } else {
+      cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
+       << "&n=" << phrase << "'>" << word << "</a>"
+       << "</td><td>" << tf << "</td><td>" << df << "</td></tr>"
+       << endl;
+    }
+  }
+}
+void print_documents(bool XMLmode, char *basepath, char *cgi_script, char *collection,
+             vector <unsigned long> docNums, vector <unsigned long> docFreq,
+             unsigned long first, unsigned long last) {
   // Create a TextData object to read the document data
 …
     get_document_all_data(docdata, doc, title, hash);
+    cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
+     << "&a=d&d=" << hash << "'>" << title << "</a>"
+     << "</td><td>" << freq << "</td></tr>"
+     << endl;
+    if (XMLmode) {
+      cout << "<document num=\"" << d
+       << "\" hash=\"" << hash
+       << "\" freq=\"" << freq
+       << "\" title=\"" << title << "\"/>" << endl;
+    } else {
+      cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
+       << "&a=d&d=" << hash << "'>" << title << "</a>"
+       << "</td><td>" << freq << "</td></tr>"
+       << endl;
+    }
+  }
+}
 …
   // Get document list & the document frequency list
+  while (text.back() == '\n') {
+    text.pop_back();
+  }
+  text.push_back(';');
   text.push_back(':');
   docnum.clear();
 …
   // Look the word up in the textData
   if (!GetDocText (docdata, docLevel, docNum, text)) {
     FatalError (1, "Error while trying to get phrase %u", docNum);
+    FatalError (1, "Error while trying to get document %u", docNum);
+  }
 …
   while (*next++ != '\t');
   // Get the title
+  // Get the document OID (hash)
   hash.clear();
   for (; *next != '\t'; next++) {
 …
   // Get the title
   text.push_back('\t');
+  text.push_back('\n');
   title.clear();
   for (next++; *next != '\t'; next++) {
+  for (next++; *next != '\n'; next++) {
     title.push_back(*next);
+  }
 …
             unsigned long &phrasenumber, UCArray &phrasetext,
             unsigned long &first_e, unsigned long &last_e,
+            unsigned long &first_d, unsigned long &last_d) {
+            unsigned long &first_d, unsigned long &last_d,
+            bool &XMLmode) {
 …
       // n: the phrase number
       if (key[0] == 'n') {
+      else if (key[0] == 'n') {
     phrasenumber = toLongInt(value);
+      }
 …
+      }
+      // d: the last document number
+      else if (key[0] == 'd') {
+    last_d = toLongInt(value);
+      }
       // e: the last expansion number
       if (key[0] == 'e') {
+      else if (key[0] == 'e') {
     last_e = toLongInt(value);
+      }
+      // d: the last document number
+      if (key[0] == 'd') {
+    last_d = toLongInt(value);
+      // f: the first document number
+      else if (key[0] == 'f') {
+    first_d = toLongInt(value);
+      }
+      // g: the first expansion number
+      else if (key[0] == 'g') {
+    first_e = toLongInt(value);
+      }
+      // x: XML mode
+      else if (key[0] == 'x') {
+    XMLmode = true;
+      }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1619

Legend:

trunk/gsdl/src/phind/host/phindcgi.cpp

Download in other formats: