Changeset 11765


Ignore:
Timestamp:
2006-05-03T16:06:16+12:00 (18 years ago)
Author:
kjdon
Message:

made mgpp query parsing better for plain searching. now handles bool operators and NEAR/WITHIN as part of the query. also, plain advanced searching, default operator is now always OR

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/recpt/querytools.cpp

    r11004 r11765  
    4646
    4747  option.name = "MatchMode";
    48   option.value = (args.getintarg("t")) ? "some" : "all";
     48  // mgpp in advanced mode, always use some query
     49  if (args.getintarg("ct") !=0 && args.getintarg("b") == 1) {
     50    option.value = "some";
     51  } else {
     52    option.value = (args.getintarg("t")) ? "some" : "all";
     53  }
    4954  request.filterOptions.push_back (option);
    5055
     
    285290}
    286291
    287 // some query form parsing functions for use with mgpp
     292// some query form parsing functions for use with mgpp & lucene
    288293
    289294void parse_reg_query_form(text_t &querystring, cgiargsclass &args)
     
    606611}
    607612
    608 void format_field_info(text_t &querystring, cgiargsclass &args) {
    609  
     613void format_field_info_lucene(text_t &querystring, cgiargsclass &args) {
    610614  text_t tag = args["fqf"];
    611615  if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
    612 
    613   int argct = args.getintarg("ct");
    614   bool mgpp = (argct == 1);
    615   bool lucene = (argct == 2);
    616 
    617   if (mgpp && tag == "") {
    618     return; // no field specifier: do nothing
    619   }
    620  
     616  int type = 2; //lucene
    621617  int argt = args.getintarg("t");// t=0 -and, t=1 - or
    622618  int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
    623619
    624   bool simple_AND_search = (argb==0 && argt==0);
    625   bool simple_OR_search = (argb==0 && argt==1);
    626 
    627   if (mgpp && simple_AND_search) {
    628     // mgpp, simple AND search, tag the whole query string
    629     add_field_info(querystring, tag, argct);
     620  // lucene simple OR - the string stays as is, but may need field tag
     621  if (argb==0 && argt == 1) {
     622    // just tag the entire thing
     623    if (tag != "") {
     624      add_field_info(querystring, tag, type);
     625    }
    630626    return;
    631627  }
    632   // resulting mgpp case - we need to tag each individual term or phrase
    633   // TODO - allow AND. OR in query string and don't tag these words
    634 
    635   if (lucene && (simple_OR_search || argb == 1)) {
    636       // OR search or advanced search (here we assume that the user has added their term mods - don't need to add term mods
    637     if (tag != "") {
    638       // tag the whole string
    639       add_field_info(querystring, tag, argct);
    640     }
    641     return;
    642   }
    643  
    644 
    645   // if we have got here, we need to add in combiners (lucene) or
    646   // we need to tag each individual word (mgpp OR search - mgpp can't do OR inside a field)
    647 
    648   text_t combine = ((lucene)? "+" : "");
    649  
    650   text_t processed_querystring = "";
    651   text_t queryelement = "";
    652  
    653628  bool in_phrase = false;
     629 
     630  text_t queryelem = "";
     631  text_t finalquery = "";
     632 
     633  // only add in + for simple AND search
     634  text_t combine = ((argb==0)? "+" : "");
     635
     636  // for lucene, we need to change & to && and | to || if advanced search
     637  // we need to tag the entire string, if we have a field
     638  // if we are simple and search, then we put && in between words
     639 
    654640  text_t::const_iterator here = querystring.begin();
    655641  text_t::const_iterator end = querystring.end();
    656642  while (here != end) {
    657     if (is_unicode_letdig(*here) || is_special_character(argct, *here)) {
    658       queryelement.push_back(*here);
     643    if (is_unicode_letdig(*here) || is_special_character(type, *here)) {
     644      queryelem.push_back(*here);
    659645    }
    660646
    661647    // Detect phrase starts/finishes
    662648    else if (*here == '"') {
    663       queryelement.push_back(*here);
     649      queryelem.push_back(*here);
    664650      if (in_phrase == false) in_phrase = true;
    665651      else {
    666     if (mgpp) {add_field_info(queryelement, tag, argct);}
    667     processed_querystring += combine + queryelement;
    668     queryelement.clear();
     652    finalquery += combine + queryelem;
     653    queryelem.clear();
    669654    in_phrase = false;
    670655      }
     
    673658    // Found word boundary, in a phrase
    674659    else if (in_phrase) {
    675       queryelement.push_back(*here);
     660      queryelem.push_back(*here);
    676661    }
    677662    // Word boundary, but not in a phrase
    678663    else {
    679       if (!queryelement.empty()) {
    680     if (mgpp) {add_field_info(queryelement, tag, argct);}
    681     processed_querystring += combine + queryelement;
    682     queryelement.clear();
    683       }
    684       processed_querystring.push_back(*here);
     664      if (*here == '&') {
     665    queryelem.push_back('&');
     666    queryelem.push_back('&');
     667      } else if (*here == '|') {
     668    queryelem.push_back('|');
     669    queryelem.push_back('|');
     670      } else {
     671    if (!queryelem.empty()) {
     672      finalquery += combine + queryelem;
     673      queryelem.clear();
     674    }
     675    finalquery.push_back(*here);
     676      }
    685677    }
    686678
     
    689681
    690682  // Get last element
    691   if (!queryelement.empty()) {
    692     if (mgpp) {add_field_info(queryelement, tag, argct);}
    693     processed_querystring += combine + queryelement;
    694   }
    695 
    696   querystring = processed_querystring;
     683  if (!queryelem.empty()) {
     684    finalquery += combine + queryelem;
     685  }
     686
     687  add_field_info(finalquery, tag, type);
     688  querystring = finalquery;
     689 cerr << "final query = "<<finalquery<<endl;
     690}
     691
     692void format_field_info_mgpp(text_t &querystring, cgiargsclass &args) {
     693 
     694  text_t tag = args["fqf"];
     695  if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
    697696 
    698   if (lucene) {
    699     // tag the whole query string
    700     add_field_info(querystring, tag, argct);
    701   }
    702 }
    703 
    704 
     697  int argt = args.getintarg("t");// t=0 -and, t=1 - or
     698  int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
     699
     700  if (tag == "" && argb ==1) {
     701    return; // no field specifier, advanced mode, the query stays as written
     702  }
     703
     704  int type = 1; // mgpp
     705
     706  bool simple_and = (argb==0 && argt==0);
     707  text_t finalquery = "";
     708  text_t fieldpart ="";
     709  text_t queryelem = "";
     710  bool in_phrase = false;
     711  bool in_field = false;
     712
     713  text_t::const_iterator here = querystring.begin();
     714  text_t::const_iterator end = querystring.end();
     715  while (here != end) {
     716    if (is_unicode_letdig(*here)  || *here == '&' || is_special_character(type, *here)) {
     717      queryelem.push_back(*here);
     718    }
     719    else if (*here == '|') {
     720      in_field = false;
     721    }
     722    else if (*here == '!' || *here == '(' || *here == ')') {
     723      if (!in_phrase) { // ignore these if in_phrase
     724    // output field, then output operator
     725    in_field = false;
     726    if (!queryelem.empty()) {
     727      if (!simple_and && !fieldpart.empty()) {
     728        add_field_info(fieldpart, tag, type);
     729        finalquery += fieldpart;
     730        finalquery.push_back(' ');
     731        fieldpart.clear();
     732      }
     733      fieldpart += queryelem;
     734    }
     735    if (!fieldpart.empty()) {
     736      add_field_info(fieldpart, tag, type);
     737      finalquery += fieldpart;
     738      finalquery.push_back(' ');
     739    }
     740    fieldpart.clear();
     741    queryelem.clear();
     742    finalquery.push_back(*here);
     743    finalquery.push_back(' ');
     744      }
     745    }
     746    else if (*here == '"') {
     747      queryelem.push_back(*here);
     748      if (in_phrase == false) in_phrase = true;
     749      else {
     750    in_phrase = false;
     751      }
     752    }
     753
     754    // Found word boundary, in a phrase
     755    else if (in_phrase) {
     756      queryelem.push_back(*here);
     757    }
     758    // Found a word boundary
     759    else {
     760      if (!queryelem.empty()) {
     761    if (queryelem == "&") {
     762      in_field = true;
     763      queryelem.clear();
     764    }
     765    else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
     766     
     767      if (argb==1) {
     768        // simple search, these not allowed
     769        in_field = true;
     770        fieldpart += queryelem;
     771        fieldpart.push_back(' ');
     772      }
     773      queryelem.clear();
     774     
     775    }
     776    else {
     777      if (!simple_and && !in_field) {
     778        if (!fieldpart.empty()) {
     779          add_field_info(fieldpart, tag, type);
     780          finalquery += fieldpart;
     781          finalquery.push_back(' ');
     782          fieldpart.clear();
     783        }
     784      }
     785     
     786      fieldpart += queryelem;
     787      fieldpart.push_back(' ');
     788      queryelem.clear();
     789    }
     790      }
     791    }
     792    ++here;
     793  }
     794  // at the end
     795  if (!queryelem.empty()) {
     796    if (!simple_and && !in_field && !fieldpart.empty()) {
     797      add_field_info(fieldpart, tag, type);
     798      finalquery += fieldpart;
     799      finalquery.push_back(' ');
     800      fieldpart.clear();
     801    }
     802    fieldpart += queryelem;
     803  }
     804  if (!fieldpart.empty()) {
     805    add_field_info(fieldpart, tag, type);
     806    finalquery += fieldpart;
     807    fieldpart.clear();
     808    finalquery.push_back(' ');
     809  }
     810
     811  querystring  = finalquery;
     812  cerr << "final query = "<<finalquery<<endl;
     813}
     814
     815void format_field_info(text_t &querystring, cgiargsclass &args) {
     816  int argct = args.getintarg("ct");
     817  if (argct == 1) {
     818    format_field_info_mgpp(querystring, args);
     819  } else if (argct == 2) {
     820    format_field_info_lucene(querystring, args);
     821  }
     822}
     823
Note: See TracChangeset for help on using the changeset viewer.