Changeset 20602 for gsdl/trunk


Ignore:
Timestamp:
2009-09-14T15:17:01+12:00 (15 years ago)
Author:
kjdon
Message:

get_plain_query_terms: first pass through to remove TI:(...) and [...]:TI, and AND,OR,NOT for lucene, then remove term modifiers etc

Location:
gsdl/trunk/runtime-src/src/recpt
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/runtime-src/src/recpt/querytools.cpp

    r20481 r20602  
    334334// turn query string into terms separated by spaces.
    335335// still working on this...
    336 text_t get_plain_query_terms(const text_t &querystring) {
    337 
     336text_t get_plain_query_terms(const text_t &querystring, const text_t &arg_ct) {
    338337  text_t::const_iterator here = querystring.begin();
    339338  text_t::const_iterator end = querystring.end();
     339
     340  // lets look for [] and () first - these are a pain.
     341  text_t::const_iterator bracket;
     342  text_t query_no_brackets = "";
     343
     344  // mgpp brackets: [xxx]:TI
     345  if (findchar(here, end, '[') != end) {
     346    while ((bracket = findchar(here, end, '[')) != end) {
     347      // get the first bit
     348      query_no_brackets += substr(here, bracket);
     349      bracket++;
     350      here = bracket;
     351      // get the end bracket
     352      bracket = findchar(here, end, ']');
     353      query_no_brackets += substr(here, bracket);
     354      // skip the :TI bits
     355      while (*bracket != ' ' && bracket != end) { bracket++;}
     356      here = bracket;
     357    }
     358    if (here != end) {
     359      query_no_brackets += substr(here,end);
     360    }
     361  } else if (findchar(here, end, '(') != end) {
     362    // lucene brackets TI:(xxx)
     363    while ((bracket = findchar(here, end, '(')) != end) {
     364      // back up the field name
     365      text_t::const_iterator old_bracket = bracket;
     366      while (*bracket != ' ' && bracket != here) {
     367    --bracket;
     368      }
     369      if (bracket != here) {
     370    // get the first bit
     371    query_no_brackets += substr(here, bracket+1);
     372      }
     373      here = old_bracket +1;
     374      // get the end bracket
     375      bracket = findchar(here, end, ')');
     376      query_no_brackets += substr(here, bracket);
     377      if (bracket != end) {
     378    here = bracket+1;
     379      }
     380    }
     381    if (here != end) {
     382      query_no_brackets += substr(here,end);
     383    }
     384  } else {
     385    // was no brackets
     386    query_no_brackets = querystring;
     387  }
     388 
     389 
     390  if (arg_ct == "2") { // lucene
     391    // look for AND OR NOT and remove
     392    here = query_no_brackets.begin();
     393    end = query_no_brackets.end();
     394    text_tlist terms;
     395    splitword(here, end, "AND", terms);
     396    joinchar(terms, ' ', query_no_brackets);
     397    here = query_no_brackets.begin();
     398    end = query_no_brackets.end();
     399    splitword(here, end, "OR", terms);
     400    joinchar(terms, ' ', query_no_brackets);
     401    here = query_no_brackets.begin();
     402    end = query_no_brackets.end();
     403    splitword(here, end, "NOT", terms);
     404    joinchar(terms, ' ', query_no_brackets);
     405   
     406  }
    340407  text_t terms = "";
    341408  bool space = false;
     409  here = query_no_brackets.begin();
     410  end = query_no_brackets.end();
     411 
    342412  while (here != end) {
    343413    if (*here ==  '#' || *here == '/') {
     
    348418      }
    349419      if (here == end) break;
    350     }
    351     if (*here == '[') {
    352       // get the text out and ignore the :TI after
    353420    }
    354421    if (is_unicode_letdig(*here)) {
  • gsdl/trunk/runtime-src/src/recpt/querytools.h

    r20481 r20602  
    4949
    5050// get a plain version of the query string - terms, separated by space
    51 text_t get_plain_query_terms(const text_t &querystring);
     51text_t get_plain_query_terms(const text_t &querystring, const text_t &arg_ct);
    5252
    5353// search history tool
Note: See TracChangeset for help on using the changeset viewer.