Changeset 6584 for trunk/gsdl/src


Ignore:
Timestamp:
2004-01-22T14:17:30+13:00 (20 years ago)
Author:
kjdon
Message:

Fiddled around with segmenting for chinese text. Haven't changed how the
segmentation is done, or what character ranges are used.
But when its done is now controlled by the collect.cfg. There is a new
option, separate_cjk, values true or false, default false. Segmentation
is only done if this is set to true. This is passed as a global option to
all plugins by the import.pl script, so the user just needs to add it
once to the config file, not as an option to all plugins.
The queryaction uses this option too to determine whether or not to segment
the query.

Location:
trunk/gsdl/src
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/collectserver.cpp

    r5868 r6584  
    101101      collectinfo.searchTypes = cfgline;
    102102    }
    103 
     103    else if (key == "separate_cjk") {
     104      if (value == "true") collectinfo.isSegmented = true;
     105      else collectinfo.isSegmented = false;
     106    }
    104107    // What have we set in our collect.cfg file :  document or collection ?
    105108    else if (key == "authenticate") collectinfo.authenticate = value;
  • trunk/gsdl/src/recpt/comtypes.cpp

    r5024 r6584  
    5353  isPublic=true;
    5454  isBeta=false;
     55  isSegmented=false;
    5556  languages.erase(languages.begin(), languages.end());
    5657  ccsCols.erase(ccsCols.begin(), ccsCols.end());
  • trunk/gsdl/src/recpt/comtypes.h

    r5024 r6584  
    9090   bool isPublic;
    9191   bool isBeta;
     92  bool isSegmented;
    9293   unsigned long buildDate;
    9394   text_tarray ccsCols;    // empty if collection does not use cross-collection searching
  • trunk/gsdl/src/recpt/documentaction.cpp

    r5917 r6584  
    10081008 
    10091009  if (!args["q"].empty() && args.getintarg("hl")) {
     1010
     1011    ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout);
     1012    bool segment = false;
     1013    if (cinfo != NULL) {
     1014      segment = cinfo->isSegmented;
     1015    }
    10101016    FilterRequest_t request;
    10111017    comerror_t err;
    10121018    request.filterResultOptions = FRmatchTerms;
    10131019    text_t formattedstring = args["q"];
    1014     format_querystring (formattedstring, args.getintarg("b"));
     1020    format_querystring (formattedstring, args.getintarg("b"), segment);
    10151021    set_queryfilter_options (request, formattedstring, args);
    10161022    collectproto->filter (args["c"], request, queryresponse, err, logout);
  • trunk/gsdl/src/recpt/queryaction.cpp

    r5762 r6584  
    632632
    633633// sets the selection box macros _hselection_, _jselection_, _nselection_ _gselection_, fqfselection_
    634 void queryaction::set_option_macro (const text_t &macroname, text_t current_value, bool display_single, bool add_js_update,
    635                     const FilterOption_t &option, displayclass &disp) {
     634void queryaction::set_option_macro (const text_t &macroname,
     635                    text_t current_value,
     636                    bool display_single,
     637                    bool add_js_update,
     638                    const FilterOption_t &option,
     639                    displayclass &disp) {
    636640 
    637641  if (option.validValues.empty()) return;
     
    10821086  isapprox isApprox = Exact;
    10831087
     1088  // what to do about segmentation for multiple colls??
     1089  bool segment = false;
    10841090  text_t formattedstring = "";
    1085   get_formatted_query_string(formattedstring, args, disp, logout);
     1091  get_formatted_query_string(formattedstring, segment, args, disp, logout);
    10861092
    10871093  if (formattedstring.empty()) {
     
    12881294  }
    12891295   
     1296  bool segment = cinfo->isSegmented;
    12901297  browserclass *bptr = browsers->getbrowser (browsertype);
    12911298
     
    13071314  request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
    13081315  text_t formattedstring = "";
    1309   get_formatted_query_string(formattedstring, args, disp, logout);
     1316  get_formatted_query_string(formattedstring, segment, args, disp, logout);
    13101317
    13111318
     
    13541361// also adds dates if appropriate in text search
    13551362void queryaction::get_formatted_query_string (text_t &formattedstring,
     1363                          bool segment,
    13561364                          cgiargsclass &args,
    13571365                          displayclass &disp,
     
    13601368    formattedstring = args["q"];
    13611369    // remove & | ! for simple search, insert spaces for chinese
    1362     format_querystring (formattedstring, args.getintarg("b"));
     1370    format_querystring (formattedstring, args.getintarg("b"), segment);
    13631371    if (args["ct"]=="1") { // mgpp - we need to add in the field info
    13641372      format_field_info(formattedstring, args["fqf"]);
  • trunk/gsdl/src/recpt/queryaction.h

    r4937 r6584  
    7373                int numDocs, isapprox isApprox);
    7474
    75   void get_formatted_query_string (text_t &formattedstring, cgiargsclass &args,
     75  void get_formatted_query_string (text_t &formattedstring, bool segment,
     76                   cgiargsclass &args,
    7677                   displayclass &disp, ostream &logout);
    7778  void define_query_interface(displayclass &disp, cgiargsclass &args,
  • trunk/gsdl/src/recpt/querytools.cpp

    r4757 r6584  
    154154}
    155155
    156 void format_querystring (text_t &querystring, int querymode) {
     156void format_querystring (text_t &querystring, int querymode, bool segment) {
    157157  text_t formattedstring;
    158158
     159  if (querymode == 1 && !segment) return;
     160 
    159161  text_t::const_iterator here = querystring.begin();
    160162  text_t::const_iterator end = querystring.end();
     
    171173                 *here == '!' || *here == '&')) {
    172174      formattedstring.push_back(' ');
    173     } else {
     175    } else if (segment) {
    174176      if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
    175177      (*here >= 0xf900 && *here <= 0xfa2d)) {
     
    184186    space = false;
    185187      }
     188   
     189    } else {
     190      formattedstring.push_back (*here);
    186191    }
    187192    here ++;
  • trunk/gsdl/src/recpt/querytools.h

    r4757 r6584  
    4040void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args);
    4141
    42 void format_querystring (text_t &querystring, int querymode);
     42void format_querystring (text_t &querystring, int querymode, bool segment);
    4343
    4444void add_dates(text_t &querystring, int startdate, int enddate,
Note: See TracChangeset for help on using the changeset viewer.