Changeset 1662 for trunk/gsdl


Ignore:
Timestamp:
2000-11-09T15:18:58+13:00 (24 years ago)
Author:
nzdl
Message:

Fixed another small bug in the way phrase searching works and made it
always use a boolean and query when phrase searching. It also now forces
the search onto the index with the finest granularity when it detects
a phrase search is being done. I'm not entirely convinced that this is
best as doing a document level phrase search can now return a whole
bunch of section level documents.

Location:
trunk/gsdl/src/colservr
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/mgqueryfilter.cpp

    r1324 r1662  
    2222 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    2323 *
    24  * $Id$
    25  *
    2624 *********************************************************************/
    27 
    28 /*
    29    $Log$
    30    Revision 1.1  2000/08/02 00:58:27  kjm18
    31    mgpp incorporated. the old mgsearchclass and queryfilterclass are changed.
    32    Have a base searchclass, from which mgsearchclass and mgppsearchclass inherit.
    33    Have a base queryfilterclass, from which mgqueryfilterclass and
    34    mgppqueryfilterclass inherit. librarymain in recpt should choose the appropriate
    35    type (mg vs mgpp) for each collection.
    36 
    37    Revision 1.21  1999/11/25 02:21:13  sjboddie
    38    fixed bug in phrasematch stuff
    39 
    40    Revision 1.20  1999/11/01 22:06:06  sjboddie
    41    Added filter option to remove documents not matching a phrase match.
    42    This used to be done in the receptionist.
    43 
    44    Revision 1.19  1999/10/19 03:23:40  davidb
    45    Collection building support through web pages
    46    and internal and external link handling for collection documents
    47 
    48    Revision 1.18  1999/09/22 03:43:18  sjboddie
    49    Endresults queryfilter option may now take '-1' for 'all'
    50 
    51    Revision 1.17  1999/09/21 12:01:07  sjboddie
    52    added Maxdocs queryfilter option (which may be -1 for 'all')
    53 
    54    Revision 1.16  1999/09/07 04:57:24  sjboddie
    55    added gpl notice
    56 
    57    Revision 1.15  1999/08/31 22:47:09  rjmcnab
    58    Added matchmode option for some and all.
    59 
    60    Revision 1.14  1999/07/16 03:42:21  sjboddie
    61    changed isApprox
    62 
    63    Revision 1.13  1999/07/16 00:17:06  sjboddie
    64    got using phrasesearch for post-processing
    65 
    66    Revision 1.12  1999/07/09 02:19:43  rjmcnab
    67    Fixed a couple of compiler conflicts
    68 
    69    Revision 1.11  1999/07/08 20:49:44  rjmcnab
    70    Added result_num to the ResultDocInto_t structure.
    71 
    72    Revision 1.10  1999/07/07 06:19:46  rjmcnab
    73    Added ability to combine two or more independant queries.
    74 
    75    Revision 1.9  1999/07/01 09:29:20  rjmcnab
    76    Changes for better reporting of number documents which match a query. Changes
    77    should still work as before with older versions of mg.
    78 
    79    Revision 1.8  1999/07/01 03:59:54  rjmcnab
    80    reduced MAXDOCS to 200 (more reasonable ???). I also added a virtual
    81    method for post-processing the query.
    82 
    83    Revision 1.7  1999/06/30 04:04:13  rjmcnab
    84    made stemming functions available from mgsearch and made the stems
    85    for the query terms available in queryinfo
    86 
    87    Revision 1.6  1999/06/29 22:06:23  rjmcnab
    88    Added a couple of fields to queryinfo to handle a special version
    89    of mg.
    90 
    91    Revision 1.5  1999/06/27 22:08:48  sjboddie
    92    now check for defaultindex, defaultsubcollection, and defaultlanguage
    93    entries in config files
    94 
    95    Revision 1.4  1999/06/16 02:03:25  sjboddie
    96    fixed bug in isApprox and set MAXDOCS to always be 500
    97 
    98    Revision 1.3  1999/04/19 23:56:09  rjmcnab
    99    Finished the gdbm metadata stuff
    100 
    101    Revision 1.2  1999/04/12 03:45:03  rjmcnab
    102    Finished the query filter.
    103 
    104    Revision 1.1  1999/04/06 22:22:09  rjmcnab
    105    Initial revision.
    106 
    107  */
    108 
    10925
    11026#include "mgqueryfilter.h"
     
    273189  while (query_here != query_end) {
    274190    queryresultsclass thisqueryresults;
    275    
     191
    276192    if (!mgsearchptr->search(*query_here, thisqueryresults)) {
    277193      // most likely a system problem
     
    401317  vector<queryparamclass> queryfilterparams;
    402318  parse_query_params (request, queryfilterparams, startresults,
    403               endresults, phrasematch, logout); 
    404  
     319              endresults, phrasematch, logout);
     320  // do any mg specific diddling with query parameters that may be required
     321  mg_parse_query_params (request, queryfilterparams, startresults,
     322             endresults, phrasematch, logout);
     323
     324
    405325  // do query
    406326  queryresultsclass queryresults;
     
    419339    vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
    420340
     341    // documents containing matching phrases will be sorted to the top so
     342    // we can break out once we're past those that match the PhraseMatch
     343    // option -- "all_phrases" = return only those documents containing all
     344    //                       phrases in query string
     345    //           "some_phrases" = return only those documents containing
     346    //                            at least 1 of the phrases in the document
     347    //           "all_docs" = return all documents regardless
     348    if (num_phrases > 0) {
     349      int numdocs = 0;
     350      while (docorder_here != docorder_end) {
     351    docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
     352   
     353    if (((phrasematch == "all_phrases") && ((*docset_here).second.num_phrase_match < num_phrases)) ||
     354        ((phrasematch == "some_phrases") && ((*docset_here).second.num_phrase_match < 1))) {
     355      queryresults.docs_matched = numdocs;
     356      break;
     357    }
     358    numdocs ++;
     359    docorder_here ++;
     360      }
     361    }
     362
    421363    if (endresults == -1) endresults = MAXNUMDOCS;
     364    docorder_here = queryresults.docs.docorder.begin();
    422365    while (docorder_here != docorder_end) {
    423       if (resultnum > endresults) break;
     366      if (resultnum > endresults || resultnum > queryresults.docs_matched) break;
    424367     
    425368      // translate the document number
     
    431374      } else {
    432375    docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
    433    
    434     // documents containing matching phrases will be sorted to the top so
    435     // we can break out once we're past those that match the PhraseMatch
    436     // option -- "all_phrases" = return only those documents containing all
    437     //                       phrases in query string
    438     //           "some_phrases" = return only those documents containing
    439     //                            at least 1 of the phrases in the document
    440     //           "all_docs" = return all documents regardless
    441     if (num_phrases > 0) {
    442       if ((phrasematch == "all_phrases") && ((*docset_here).second.num_phrase_match < num_phrases)) {
    443         queryresults.docs_matched = response.docInfo.size();
    444         break;
    445       }
    446       if ((phrasematch == "some_phrases") && ((*docset_here).second.num_phrase_match < 1)) {
    447         queryresults.docs_matched = response.docInfo.size();
    448         break;
    449       }
    450     }
    451376
    452377    // see if there is a result for this number,
     
    508433}
    509434
     435void mgqueryfilterclass::mg_parse_query_params (const FilterRequest_t &/*request*/,
     436                        vector<queryparamclass> &query_params,
     437                        int &/*startresults*/, int &/*endresults*/,
     438                        text_t &/*phrasematch*/, ostream &/*logout*/) {
     439
     440  //  outconvertclass text_t2ascii;
     441 
     442  vector<queryparamclass>::iterator query_here = query_params.begin();
     443  vector<queryparamclass>::iterator query_end = query_params.end();
     444  while (query_here != query_end) {
     445
     446    // if we're doing a phrase search we want to maximise hits by making it a boolean
     447    // search on the index with the finest granularity
     448    // we're deciding it's a phrase search based on if the querystring
     449    // contains at least 2 double quotes (not very scientific but
     450    // then neither is the rest of the mg phrase searching functionality :-)
     451    if (countchar ((*query_here).querystring.begin(), (*query_here).querystring.end(), '"') > 1) {
     452      (*query_here).search_type = 0;
     453
     454      // Get the long version of the index and test to see if any indexes with
     455      // finer granularity exist. Indexes must be the same type (i.e. same metadata
     456      // or "text").
     457      text_t longindex; text_tarray splitindex;
     458      indexmap.to2from ((*query_here).index, longindex);
     459      splitchar (longindex.begin(), longindex.end(), ':', splitindex);
     460      text_t &granularity = splitindex[0];
     461      text_t &indextype = splitindex[1];
     462      bool found = false;
     463      // currently supported granularity options are "document", "section" and "paragraph"
     464      if (granularity == "document" || granularity == "section") {
     465    text_t shortindex;
     466    if (indexmap.fromexists ("paragraph:" + indextype)) {
     467      //      logout << text_t2ascii << "changing index from " << longindex << " to " << ("paragraph:" + indextype) << "\n";
     468      indexmap.from2to ("paragraph:" + indextype, shortindex);
     469      (*query_here).index = shortindex;
     470      found = true;
     471    }
     472    if (!found && granularity == "document" && indexmap.fromexists ("section:" + indextype)) {
     473      //      logout << text_t2ascii << "changing index from " << longindex << " to " << ("section:" + indextype) << "\n";
     474      indexmap.from2to ("section:" + indextype, shortindex);
     475      (*query_here).index = shortindex;
     476    }
     477      }
     478    }
     479
     480    query_here ++;
     481  }
     482}
     483   
  • trunk/gsdl/src/colservr/mgqueryfilter.h

    r1324 r1662  
    6969                 docresultsclass &docs);
    7070
     71  void mg_parse_query_params (const FilterRequest_t &request,
     72                  vector<queryparamclass> &query_params,
     73                  int &startresults, int &endresults,
     74                  text_t &phrasematch, ostream &logout);
     75
    7176 
    7277public:
Note: See TracChangeset for help on using the changeset viewer.