Context Navigation

← Previous Change
Next Change →

Changeset 1633 for trunk/gsdl

Timestamp:

2000-10-31T10:07:22+13:00 (24 years ago)

Author:

paynter

Message:

Split expansion pherases into a prefix, body, and suffix (the old phind
used to do this). This information is used to format the HTML output.

File:

: 1 edited

trunk/gsdl/src/phind/host/phindcgi.cpp (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/phind/host/phindcgi.cpp

-              r1629
+              r1633
             bool &XMLmode);
 void print_expansions(char *cgi_script, char *collection, bool XMLmode,
               TextData &textdata, vector <unsigned long> dlist,
+void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body,
+              TextData &textdata, vector <unsigned long> elist,
               unsigned long first, unsigned long last);
 …
              vector <unsigned long> &docnum,
              vector <unsigned long> &docfrq);
+void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix);
+bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end);
 void get_document_all_data(TextData &docdata, unsigned long docNum,
 …
        << "\" end=\"" << last_e << "\">" << endl;
+      print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e);
+      print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e);
       cout << "</expansionlist>" << endl;
 …
+      }
       cout << "<p><table><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;
       print_expansions(argv[0], collection, XMLmode, textdata, el, first_e, last_e);
+      cout << "<p><table border=0><tr><th align=left>Phrase</th><th>freq</th><th>docs</th></tr>" << endl;
+      print_expansions(argv[0], collection, XMLmode, word, textdata, el, first_e, last_e);
       cout << "</table>" << endl;
 …
 // Print a list of expansions
 //
 // Given the textData and a list of phrase numbers,
 // print out each of the words.
 void print_expansions(char *cgi_script, char *collection, bool XMLmode,
               TextData &textdata, vector <unsigned long> dlist,
+// Given the textData and a list of phrase numbers, print out each of the
+// expansions.
+void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body,
+              TextData &textdata, vector <unsigned long> elist,
               unsigned long first, unsigned long last) {
   UCArray word;
   unsigned long phrase, tf, df, ef;
+  UCArray suffix, prefix;
   for (unsigned long e = first; e < last; e++) {
     phrase = dlist[e];
+    phrase = elist[e];
     get_phrase_freq_data(textdata, phrase, word, tf, ef, df);
+    split_phrase(word, body, prefix, suffix);
     if (XMLmode) {
       cout << "<expansion num=\"" << e
        << "\" id=\"" << phrase
+       << "\" prefix=\"" << prefix
+       << "\" suffix=\"" << suffix
        << "\" text=\"" << word
        << "\" tf=\"" << tf
        << "\" df=\"" << df << "\"/>" << endl;
     } else {
+      cout << "<tr valign=top><td><a href='" << cgi_script << "?c=" << collection
+       << "&n=" << phrase << "'>" << word << "</a>"
+       << "</td><td>" << tf << "</td><td>" << df << "</td></tr>"
+       << endl;
+      cout << "<tr valign=top><td align=right><a href='" << cgi_script
+       << "?c=" << collection << "&n=" << phrase << "'>" << prefix << "</a></td>"
+       << "<td align=center><a href='" << cgi_script
+       << "?c=" << collection << "&n=" << phrase << "'>" << body << "</a></td>"
+       << "<td align=left><a href='" << cgi_script
+       << "?c=" << collection << "&n=" << phrase << "'>" << suffix << "</a></td>"
+       << "<td>" << tf << "</td><td>" << df << "</td></tr>" << endl;
+    }
+  }
 …
 // The phrase is stored in textData as record phrase.
 // We retrieve:
 //   word - the text od the phrase
+//   word - the text of the phrase
 //   tf - the total frequency of the phrase
 //   ef - the expansion frequency of the phrase
 …
+      }
+      // d: the last document number
+      else if (key[0] == 'd') {
+    last_d = toLongInt(value);
+      }
+      // e: the last expansion number
+      else if (key[0] == 'e') {
+    last_e = toLongInt(value);
+      }
+      // f: the first document number
+      else if (key[0] == 'f') {
+    first_d = toLongInt(value);
+      }
+      // g: the first expansion number
+      else if (key[0] == 'g') {
+    first_e = toLongInt(value);
+      }
+      // x: XML mode
+      else if (key[0] == 'x') {
+    XMLmode = true;
+      }
       // n: the phrase number
       else if (key[0] == 'n') {
 …
       else if (key[0] == 'p') {
     toUCArray(value, phrasetext);
+      }
-      // d: the last document number
-      else if (key[0] == 'd') {
-    last_d = toLongInt(value);
+      }
-      // e: the last expansion number
-      else if (key[0] == 'e') {
-    last_e = toLongInt(value);
+      }
-      // f: the first document number
-      else if (key[0] == 'f') {
-    first_d = toLongInt(value);
+      }
-      // g: the first expansion number
-      else if (key[0] == 'g') {
-    first_e = toLongInt(value);
+      }
-      // x: XML mode
-      else if (key[0] == 'x') {
-    XMLmode = true;
+      }
 …
+// split an expansion into prefix and suffix
+void split_phrase(UCArray word, UCArray body, UCArray &prefix, UCArray &suffix) {
+  prefix.clear();
+  suffix.clear();
+  bool readingPrefix = true;
+  UCArray::iterator here = word.begin();
+  UCArray::iterator end = word.end();
+  while (here != end) {
+    // if we've not read all the prefix, add the next char to the prefix
+    if (readingPrefix) {
+      if (phrase_match(body, here, end)) {
+    readingPrefix = false;
+    // trim whitespace from end of prefix & start of suffix
+    if (!prefix.empty()) {
+      prefix.pop_back();
+    }
+    while (*here == ' ') {
+      here++;
+    }
+      } else {
+    prefix.push_back(*here);
+    here++;
+      }
+    }
+    // if we've finished with the prefix, update the suffix
+    else {
+      suffix.push_back(*here);
+      here++;
+    }
+  }
+}
+// phrase_match
+//
+// compare two strings, one represented as an UCArray, the other as two
+// UCArray iterators.
+//
+// Return true if the UCArray is the same as the phrase the iterator points
+// too for the length of the UCArray.
+bool phrase_match(UCArray text, UCArray::iterator &here, UCArray::iterator end) {
+  UCArray::iterator one_here = text.begin();
+  UCArray::iterator one_end  = text.end();
+  UCArray::iterator two_here = here;
+  // iterate over the length of the first string, comparing each element to
+  // the corresponding element in the second string.
+  while (one_here != one_end) {
+    if (*one_here != *two_here) {
+      return false;
+    }
+    one_here++;
+    two_here++;
+  }
+  here = two_here;
+  return true;
+}
 // Convert from text_t format
 //

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1633 for trunk/gsdl

Legend:

trunk/gsdl/src/phind/host/phindcgi.cpp

Download in other formats: