Changeset 2134


Ignore:
Timestamp:
2001-03-08T14:11:48+13:00 (23 years ago)
Author:
sjboddie
Message:

mg phrase searching now always sets maxdocs to -1 (all) - this means that
a phrase search is guaranteed always to hit any document that contains the
phrase but also means that bad phrase searches (like "and the") will take
a very long time, especially on a large collection.

also added a bit of a hack to handle program number indexes for various
bbc collections.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/mgqueryfilter.cpp

    r1721 r2134  
    446446  while (query_here != query_end) {
    447447
    448     // if we're doing a phrase search we want to maximise hits by making it a boolean
    449     // search on the index with the finest granularity
     448    // if we're doing a phrase search we want to maximise hits by making it
     449    // a boolean search on the index with the finest granularity - we'll
     450    // also set maxdocs to "all" (realizing that this will cause searches
     451    // like "and the" on a large collection to take a very very long time).
     452
    450453    // we're deciding it's a phrase search based on if the querystring
    451454    // contains at least 2 double quotes (not very scientific but
     
    453456    if (countchar ((*query_here).querystring.begin(), (*query_here).querystring.end(), '"') > 1) {
    454457      (*query_here).search_type = 0;
     458
     459      // set maxdocs to "all"
     460      (*query_here).maxdocs = -1;
    455461
    456462      // Get the long version of the index and test to see if any indexes with
     
    467473    text_t shortindex;
    468474    if (indexmap.fromexists ("paragraph:" + indextype)) {
    469       //      logout << text_t2ascii << "changing index from " << longindex << " to " << ("paragraph:" + indextype) << "\n";
    470475      indexmap.from2to ("paragraph:" + indextype, shortindex);
    471476      (*query_here).index = shortindex;
     
    473478    }
    474479    if (!found && granularity == "document" && indexmap.fromexists ("section:" + indextype)) {
    475       //      logout << text_t2ascii << "changing index from " << longindex << " to " << ("section:" + indextype) << "\n";
    476480      indexmap.from2to ("section:" + indextype, shortindex);
    477481      (*query_here).index = shortindex;
     
    480484    }
    481485
     486#ifdef GSDL_BBC_COLLECTION
     487    // This is a special hack for the BBC collection's ProgNumber index
     488
     489    // if we're searching a ProgNumber index we want to:
     490    // 1. Remove all non-alphanumeric characters from the query string
     491    // 2. Make it a boolean search
     492    // 3. Turn off case-folding
     493    text_t longindex; text_tarray splitindex;
     494    indexmap.to2from ((*query_here).index, longindex);
     495    splitchar (longindex.begin(), longindex.end(), ':', splitindex);
     496    text_t &indextype = splitindex[1];
     497    if (indextype == "ProgNumber") {
     498      (*query_here).search_type = 0;
     499      (*query_here).casefolding = 0;
     500      text_t new_querystring;
     501      text_t::const_iterator here = (*query_here).querystring.begin();
     502      text_t::const_iterator end = (*query_here).querystring.end();
     503      while (here != end) {
     504        if ((*here >= 'a' && *here <= 'z') || (*here >= 'A' && *here <= 'Z') ||
     505            (*here >= '0' && *here <= '9')) {
     506          new_querystring.push_back (*here);
     507        }
     508        here ++;
     509      }
     510      (*query_here).querystring = new_querystring;
     511    }
     512#endif
     513
    482514    query_here ++;
    483515  }
Note: See TracChangeset for help on using the changeset viewer.