Changeset 2146


Ignore:
Timestamp:
2001-03-09T16:56:35+13:00 (23 years ago)
Author:
sjboddie
Message:

Fixed a bug that was preventing phrase searching from working on
document level indexes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/phrasesearch.cpp

    r2139 r2146  
    7878}
    7979
     80static void get_all_docnums (gdbmclass &gdbm, text_t OID, vector<int> &docnum_list) {
     81
     82  infodbclass OID_info;
     83 
     84  // get OID
     85  if (!gdbm.getinfo (OID, OID_info)) return;
     86  if (OID_info["hastxt"] == "1" && !OID_info["docnum"].empty()) {
     87    docnum_list.push_back (OID_info["docnum"].getint());
     88  }
     89
     90  // get contents set
     91  if (OID_info["contains"].empty()) return;
     92  text_tarray contains; text_t tmptext;
     93  text_t::iterator contains_here = OID_info["contains"].begin();
     94  text_t::iterator contains_end = OID_info["contains"].end();
     95  while (contains_here != contains_end) {
     96    if (*contains_here == '"') tmptext += OID;
     97    else if (*contains_here == ';') {
     98      if (!tmptext.empty()) contains.push_back (tmptext);
     99      tmptext.clear();
     100    } else tmptext.push_back(*contains_here);
     101    contains_here++;
     102  }
     103  if (!tmptext.empty()) contains.push_back (tmptext);
     104
     105  text_tarray::const_iterator here = contains.begin();
     106  text_tarray::const_iterator end = contains.end();
     107  while (here != end) {
     108    get_all_docnums (gdbm, *here, docnum_list);
     109    here ++;
     110  }
     111}
    80112
    81113bool doc_phrase_search (unsigned char *doc, int doclen,
     
    102134  while (doc_here <= doc_end) {
    103135    first = true;
    104    
     136
    105137    // there will be at least one member of phrase (see above)
    106138    termfreqclassarray::const_iterator phrase_here = phrase.begin();
     
    126158  return false;
    127159}
    128 
    129160
    130161// looks for the stemmed phrase in the metadata or text associated with
     
    139170            const termfreqclassarray &phrase,
    140171            int docnum) {
    141   // disect the long index to find out where the text should come from
    142   text_t level, gran;
    143   text_t::const_iterator longindex_here = longindex.begin();
    144   text_t::const_iterator longindex_end = longindex.end();
    145   longindex_here = getdelimitstr (longindex_here, longindex_end, ':', level);
    146   longindex_here = getdelimitstr (longindex_here, longindex_end, ':', gran);
    147 
    148   if (gran.empty()) return false;
    149  
    150   // note that we're treating indexes with granularity of 'all' (i.e. text,Title,Creator)
    151   // as if they were simply 'text' indexes
    152   if (gran == "text" || gran == "all" || findword(gran.begin(),gran.end(),"text")) {
    153     char *doc = NULL;
    154     int doclen = 0;
    155  
    156     // get text from mg.
    157     if (!mgsearch.mgdocument (index, subcollection, language, collection,
    158                   docnum, doc, doclen)) return false;
    159     return doc_phrase_search ((unsigned char *)doc, doclen, phrase);
    160   }
    161172
    162173  // get OID
    163   char *metadata = NULL;
    164   text_t::size_type metadata_len = 0;
    165174  infodbclass docnum_info;
    166   infodbclass OID_info;
    167  
    168175  if (!gdbm.getinfo (docnum, docnum_info)) return false;
    169176  text_t &OID = docnum_info["section"];
    170177  if (OID.empty()) return false;
     178
     179  // disect the long index to find out where the text should come from
     180  text_t gran, type;
     181  text_t::const_iterator longindex_here = longindex.begin();
     182  text_t::const_iterator longindex_end = longindex.end();
     183  longindex_here = getdelimitstr (longindex_here, longindex_end, ':', gran);
     184  longindex_here = getdelimitstr (longindex_here, longindex_end, ':', type);
     185
     186  if (gran.empty()) return false;
     187 
     188  // note that we're treating indexes of type 'all' (i.e. text,Title,Creator)
     189  // or other composite indexes that contain "text" as if they were simply 'text' indexes
     190  if (type == "text" || type == "all" || findword(type.begin(),type.end(),"text")) {
     191    char *doc = NULL;
     192    int doclen = 0;
     193 
     194    // get text from mg.
     195    if (gran == "document") {
     196
     197      // if this is a document level index (which should only happen if
     198      // there are no matching indexes with a finer granularity -- see
     199      // mgqueryfilterclass::mg_parse_query_params) then we must do the
     200      // phrase search on the entire document (i.e. all the sections)
     201      // -- this is going to make a slow process even slower
     202      vector<int> docnum_list; text_t fulldoc;
     203      get_all_docnums (gdbm, OID, docnum_list);
     204      vector<int>::const_iterator this_docnum = docnum_list.begin();
     205      vector<int>::const_iterator end_docnum = docnum_list.end();
     206      while (this_docnum != end_docnum) {
     207    if (mgsearch.mgdocument (index, subcollection, language, collection,
     208                 *this_docnum, doc, doclen)) {
     209      fulldoc.appendcstr (doc);
     210    }
     211    this_docnum ++;
     212      }
     213      doc = fulldoc.getcstr();
     214      doclen = fulldoc.size();
     215      bool rv = doc_phrase_search ((unsigned char *)doc, doclen, phrase);
     216      delete doc;
     217      return rv;
     218
     219    } else {
     220
     221      if (!mgsearch.mgdocument (index, subcollection, language, collection,
     222                docnum, doc, doclen)) return false;
     223      return doc_phrase_search ((unsigned char *)doc, doclen, phrase);
     224    }
     225  }
     226
     227  char *metadata = NULL;
     228  text_t::size_type metadata_len = 0;
     229  infodbclass OID_info;
    171230 
    172231  // get field
     
    174233
    175234  bool result = false;
    176   text_tarray *tarr_ptr = OID_info.getmultinfo (gran);
     235  text_tarray *tarr_ptr = OID_info.getmultinfo (type);
    177236  if (tarr_ptr != NULL ) {
    178237    text_tarray::const_iterator subvalue_here = (*tarr_ptr).begin();
Note: See TracChangeset for help on using the changeset viewer.