root/gsdl/trunk/runtime-src/src/recpt/querytools.cpp @ 18459

Revision 18459, 24.4 KB (checked in by davidb, 11 years ago)

a space is being added to end of an mgpp query when it if formatted. This used to cause mgpp code to crash. Have fixed the mgpp code to be more tolerant, and marked the point in querytools.cpp where I believe the space is being unnessarily added (but haven't removed the line in case there some other reason why the space is needed)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30// sets the ct, qt, qto arguments
31void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
32
33  if (args["ct"].empty()) {
34    text_t build_type = cinfo->buildType;
35    if (build_type == "mgpp") {
36      args["ct"] = "1";
37    } else if (build_type == "lucene") {
38      args["ct"] = "2";
39    } else {
40      args["ct"] = "0";
41    }
42  }
43  text_t arg_ct = args["ct"];
44  if (arg_ct == "0") {
45    // mg
46    args["qt"] = "0";
47    args["qto"] = "0";
48    return;
49  }
50
51  if (!args["qt"].empty() && !args["qto"].empty()) {
52    return;
53  }
54 
55  text_tmap::iterator check = cinfo->format.find("SearchTypes");
56  text_t search_types;
57  if(check != cinfo->format.end() && !(*check).second.empty()){
58    search_types = (*check).second;
59  } else {
60    // assume plain,form
61    if (args["qto"].empty()) args["qto"] = "3";
62    if (args["qt"].empty()) {
63      int arg_qto = args.getintarg("qto");
64      if (arg_qto == 2) {
65    args["qt"] = "1";
66      } else {
67    args["qt"] = "0";
68      }
69    }
70    return;
71  }
72 
73 
74  if (args["qto"].empty()) {
75    unsigned int type = 0;
76    if (findword(search_types.begin(), search_types.end(), "form") != search_types.end()) {
77      type |= 2;
78    }
79    if (findword(search_types.begin(), search_types.end(), "plain") != search_types.end()) {
80      type |= 1;
81    }
82    args.setintarg("qto", type);
83  }
84 
85  if (args["qt"].empty()) {
86    int arg_qto = args.getintarg("qto");
87    if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) {
88      args["qt"] = "1";
89    } else {
90      args["qt"] = "0";
91    }
92  }
93}
94
95// sets the ks, ss, afs (casesupport, stemsupport, accentfoldsupport) args
96void set_stem_index_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
97  int stemIndexes = cinfo->stemIndexes;
98
99  if (stemIndexes & SIcasefold) {
100    args["ks"] = 1;
101  }
102  if (stemIndexes & SIstem) {
103    args["ss"] = 1;
104  }
105  if (stemIndexes & SIaccentfold) {
106    args["afs"] = 1;
107  }
108
109}
110
111// request.filterResultOptions and request.fields (if required) should
112// be set from the calling code
113void set_queryfilter_options (FilterRequest_t &request,
114                  const text_t &querystring,
115                  cgiargsclass &args) {
116
117  request.filterName = "QueryFilter";
118
119  OptionValue_t option;
120
121  option.name = "Term";
122  option.value = querystring;
123  request.filterOptions.push_back (option);
124
125  option.name = "QueryType";
126  option.value = (args.getintarg("t")) ? "ranked" : "boolean";
127  request.filterOptions.push_back (option);
128
129  option.name = "MatchMode";
130  // mgpp in advanced mode, always use some query
131  if (args.getintarg("ct") == 1 && args.getintarg("b") == 1) {
132    option.value = "some";
133  } else {
134    option.value = (args.getintarg("t")) ? "some" : "all";
135  }
136  request.filterOptions.push_back (option);
137
138  option.name = "Casefold";
139  option.value = (args.getintarg("k")) ? "true" : "false";
140  request.filterOptions.push_back (option);
141
142  option.name = "Stem";
143  option.value = (args.getintarg("s")) ? "true" : "false";
144  request.filterOptions.push_back (option);
145
146  option.name = "AccentFold";
147  option.value = (args.getintarg("af")) ? "true" : "false";
148  request.filterOptions.push_back (option);
149 
150  if (!args["h"].empty()) {
151    option.name = "Index";
152    option.value = args["h"];
153    request.filterOptions.push_back (option);
154  }
155
156  if (!args["j"].empty()) {
157    option.name = "Subcollection";
158    option.value = args["j"];
159    request.filterOptions.push_back (option);
160  }
161
162  if (!args["n"].empty()) {
163    option.name = "Language";
164    option.value = args["n"];
165    request.filterOptions.push_back (option);
166  }
167 
168  if (!args["g"].empty()) { // granularity for mgpp
169    option.name = "Level";
170    option.value = args["g"];
171    request.filterOptions.push_back (option);
172  }
173
174  if (!args["fs"].empty()) { // filter string for lucene
175    option.name = "FilterString";
176    option.value = args["fs"];
177    request.filterOptions.push_back (option);
178  }
179
180  if (!args["sf"].empty()) { // sort field for lucene
181    option.name = "SortField";
182    option.value = args["sf"];
183    request.filterOptions.push_back (option);
184  }
185
186  if (!args["fuzziness"].empty() && args["fuzziness"] != "100") { // fuzziness value for lucene
187    option.name = "Fuzziness";
188    option.value = (text_t) "0." + args["fuzziness"];
189    request.filterOptions.push_back (option);
190  }
191
192  set_more_queryfilter_options (request, args);
193}
194
195void set_queryfilter_options (FilterRequest_t &request,
196                  const text_t &querystring1,
197                  const text_t &querystring2, cgiargsclass &args) {
198
199  set_queryfilter_options (request, querystring1, args);
200
201  // fill in the second query if needed
202  if (!args["cq2"].empty()) {
203    OptionValue_t option;
204
205    option.name = "CombineQuery";
206    option.value = args["cq2"];
207    request.filterOptions.push_back (option);
208   
209    option.name = "Term";
210    option.value = querystring2;
211    request.filterOptions.push_back (option);
212   
213    option.name = "QueryType";
214    option.value = (args.getintarg("t")) ? "ranked" : "boolean";
215    request.filterOptions.push_back (option);
216
217    option.name = "Casefold";
218    option.value = (args.getintarg("k")) ? "true" : "false";
219    request.filterOptions.push_back (option);
220
221    option.name = "Stem";
222    option.value = (args.getintarg("s")) ? "true" : "false";
223    request.filterOptions.push_back (option);
224
225    option.name = "AccentFold";
226    option.value = (args.getintarg("af")) ? "true" : "false";
227    request.filterOptions.push_back (option);
228
229    if (!args["h2"].empty()) {
230      option.name = "Index";
231      option.value = args["h2"];
232      request.filterOptions.push_back (option);
233    }
234
235    if (!args["j2"].empty()) {
236      option.name = "Subcollection";
237      option.value = args["j2"];
238      request.filterOptions.push_back (option);
239    }
240
241    if (!args["n2"].empty()) {
242      option.name = "Language";
243      option.value = args["n2"];
244      request.filterOptions.push_back (option);
245    }
246  }
247  set_more_queryfilter_options (request, args);
248}
249
250void set_more_queryfilter_options (FilterRequest_t &request,
251                   cgiargsclass &args) {
252
253  OptionValue_t option;
254  int arg_m = args.getintarg("m");
255 
256  option.name = "Maxdocs";
257  option.value = arg_m;
258  request.filterOptions.push_back (option);
259
260  //  option.name = "StartResults";
261  //  option.value = args["r"];
262  //  request.filterOptions.push_back (option);
263
264  //  option.name = "EndResults";
265  //  int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
266  //  if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
267  //  option.value = endresults;
268  //  request.filterOptions.push_back (option);
269}
270
271bool is_special_character(int indexer_type, unsigned short character) {
272  // mgpp
273  if (indexer_type == 1) {
274    return (character == '#' || character == '/' || character == '*');
275  }
276  // lucene
277  else if (indexer_type == 2) {
278    return (character == '?' || character == '*' || character == '~' ||
279        character == '^');
280  }
281  return false;
282}
283
284// This function removes boolean operators from simple searches, and segments
285// chinese characters if segment=true
286void format_querystring (text_t &querystring, int querymode, bool segment) {
287  text_t formattedstring;
288
289  // advanced search, no segmenting, don't need to do anything
290  if (querymode == 1 && !segment) return;
291 
292  text_t::const_iterator here = querystring.begin();
293  text_t::const_iterator end = querystring.end();
294
295  // space is used to insert spaces between Chinese
296  // characters. No space is needed before the first
297  // Chinese character.
298  bool space = false;
299
300  // want to remove ()|!& from querystring so boolean queries are just
301  // "all the words" queries (unless querymode is advanced)
302  while (here != end) {
303    if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
304                 *here == '!' || *here == '&')) {
305      formattedstring.push_back(' ');
306    } else if (segment) {
307      if ((*here >= 0x2e80 && *here <= 0xd7a3) ||
308      ( *here >= 0xf900 && *here <= 0xfa6a)) {
309    /* text_t not big enough to handle these. */
310    /*    (*here >= 0x20000 && *here <= 0x2a6d6) ||
311      (*here >= 0x2f800 && *here <= 0x2fa1d)) { */
312   
313    // CJK character
314    if (!space) formattedstring.push_back (0x200b); // zero width space
315    formattedstring.push_back (*here);
316    formattedstring.push_back (0x200b);
317    space = true;
318      } else {
319   
320    // non-Chinese character
321    formattedstring.push_back (*here);
322    space = false;
323   
324      }
325   
326    } else {
327      formattedstring.push_back (*here);
328    }
329    ++here;
330  }
331  querystring = formattedstring;
332}
333
334
335   
336
337// search history tool
338// also used for form query macros
339text_t escape_quotes(const text_t &querystring) {
340
341  text_t::const_iterator here = querystring.begin();
342  text_t::const_iterator end = querystring.end();
343 
344  text_t escquery = "";
345  while (here != end) {
346    if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
347    else if (*here == '\n' || *here == '\r') {
348      escquery.push_back(' ');
349    } else {
350      escquery +="\\\\";
351      escquery.push_back(*here);
352    }
353
354    ++here;
355  }
356  return escquery;
357
358}
359
360// Parses the terms into words, and adds #si if necessary
361text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,
362           const int indexer_type) {
363 
364  // the default stem and case are set to 0 if this is being used, so we are only adding on qualifiers if stem or fold is 1.
365  if (stem == "0" && fold == "0") {
366    return terms;
367  }
368  // this is only for mgpp collections, shouldn't be called for anything else
369  if (indexer_type != 1) {
370    return terms;
371  }
372 
373  text_t outtext;
374  text_t word;
375
376  text_t::const_iterator here = terms.begin();
377  text_t::const_iterator end = terms.end();
378
379  while (here !=end) {
380
381    if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {
382      // not word boundary
383      word.push_back(*here);
384      ++here;   
385    }
386    else {
387      // found word boundary   
388      if (!word.empty() ) {
389    if (starts_with(word, "NEAR") || starts_with(word, "WITHIN")) {
390      outtext += word;
391      word.clear();
392    }
393    else {
394      word += "#";
395      if (stem == "1") word += "s";
396      if (fold == "1") word += "i";
397      outtext += word;
398      word.clear();
399    }
400      }
401      // this only used in advanced form, so we leave in boolean operators
402      if (*here == '\"' || *here == '&' || *here == '|' || *here == '!' ||
403      *here == '(' || *here == ')' || is_unicode_space(*here)) {
404    outtext.push_back(*here);
405      }
406      ++here;
407    }
408  }
409   
410  // get last word
411  if (!word.empty()) {
412    word += "#";
413    if (stem == "1") word += "s";
414    if (fold == "1") word += "i";
415    word += " ";
416    outtext += word;
417  }
418  return outtext;
419}
420
421
422// some query form parsing functions for use with mgpp & lucene
423
424void parse_reg_query_form(text_t &querystring, cgiargsclass &args, bool segment)
425{
426  querystring.clear();
427
428  int argct = args.getintarg("ct");
429  int argt = args.getintarg("t");// t=0 -and, t=1 - or
430  int argb = args.getintarg("b");
431   
432  text_t combine;
433
434  // lucene uses global combine, so only need this for mgpp
435  if (argct==1) {
436    if (argt == 0) combine = "&";
437    else combine = "|";
438  }
439 
440  text_t field = args["fqf"];
441  if (field.empty()) return; // no query
442  text_tarray fields;
443  splitchar(field.begin(), field.end(), ',', fields);
444 
445  text_t value = args["fqv"];
446  if (value.empty()) return; // somethings wrong
447  text_tarray values;
448  splitchar(value.begin(), value.end(), ',', values);
449
450
451  for (int i=0; i< values.size(); ++i) {
452    if (!values[i].empty()) {
453      text_t this_value = values[i];
454      // remove operators for simple search, segments text if necessary
455      format_querystring(this_value, argb, segment);
456      // add tag info for this field (and other processing)
457      format_field_info(this_value, fields[i], argct, argt, argb);
458      // add into query string
459      if (argct == 2) {
460    // lucene
461    // we don't worry about AND/OR, cos this is done by defaultcombineoperator
462    querystring += this_value+" ";
463      } else {
464    // mgpp
465    if (!querystring.empty()) {
466      querystring += " "+ combine+ " ";
467    }
468    querystring += this_value;
469      }
470    }
471  }
472}
473
474
475void parse_adv_query_form(text_t &querystring, cgiargsclass &args, bool segment){
476  querystring.clear();
477
478  const int argct = args.getintarg("ct");
479  int argt = 0;// arg t is either not used (lucene) or used for natural/ranked (mgpp), so we set it to 0 = AND, by default
480  int argb = args.getintarg("b");
481  text_t combine;
482  if (argct==1) {
483    combine = "&";
484  }
485  else { // lucene
486    combine = "AND";
487  }
488
489  text_t field = args["fqf"];
490  if (field.empty()) return; // no query
491  text_tarray fields;
492  splitchar(field.begin(), field.end(), ',', fields);
493 
494  text_t value = args["fqv"];
495  if (value.empty()) return; // somethings wrong
496  text_tarray values;
497  splitchar(value.begin(), value.end(), ',', values);
498
499  text_t comb = args["fqc"];
500  if (comb.empty()) return; //somethings wrong
501  text_tarray combs;
502  splitchar(comb.begin(), comb.end(), ',', combs);
503
504  text_tarray stems;
505  text_tarray folds;
506  if (argct == 1) {// mgpp - lucene doesn't do stem/case
507    text_t stem = args["fqs"];
508    if (stem.empty()) return; // somethings wrong
509    splitchar(stem.begin(), stem.end(), ',', stems);
510   
511    text_t fold = args["fqk"];
512    if (fold.empty()) return; // somethings wrong
513    splitchar(fold.begin(), fold.end(), ',', folds);
514  }
515 
516  for(int i=0; i< values.size(); ++i) {
517    if (!values[i].empty()) {
518      if (i!=0) {
519    if (argct==1) {
520      if (combs[i-1]=="and") combine = "&";
521      else if (combs[i-1]=="or")combine = "|";
522      else if (combs[i-1]=="not")combine = "!";
523    }
524    else { // lucene
525      if (combs[i-1]=="and") combine = "AND";
526      else if (combs[i-1]=="or")combine = "OR";
527      else if (combs[i-1]=="not")combine = "NOT";
528    }
529      }
530      text_t this_value = values[i];
531      // remove operators for simple search, segments text if necessary
532      format_querystring(this_value, argb, segment);
533      if (argct == 1) { // mgpp only
534    this_value = addstemcase(this_value, stems[i], folds[i], argct);
535      }
536      // add tag info for this field (and other processing)
537      format_field_info(this_value, fields[i], argct, argt, argb);
538      // add into query string
539      if (!querystring.empty()) {
540    querystring += " "+ combine+ " ";
541      }
542      querystring += this_value;
543     
544    }
545  }
546}
547
548// Extended addqueryelem for Human Info project
549void addqueryelem_ex(text_t &querystring, const text_t &tag,
550             const text_t &terms, const text_t &stem,
551             const text_t &fold,
552             const text_t& combine, const text_t& word_combine) {
553
554  if (!querystring.empty()) { // have to put and/or
555    querystring += " " + combine + " ";
556  }
557  text_t outtext; outtext.reserve(512);
558  text_t word; word.reserve(100);
559  //unsigned short c;                                                           
560  text_t::const_iterator here = terms.begin();
561  text_t::const_iterator end = terms.end();
562  bool inquote = false, firstword = true;
563
564  text_t word2; word2.reserve(256);
565   
566  while (here !=end) {
567    if (is_unicode_space(*here)) {
568      if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
569      else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
570      else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
571      else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
572      else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
573      if (inquote) {
574    word2.push_back(*here);
575      }
576      word.append(word2); word2.clear();
577           
578      if (!inquote && !word.empty() ) {
579    // found word boundary   
580               
581    if (stem == "1" || fold =="1") {
582      word += "#";
583      if (stem == "1") word += "s";
584      //else word += "u";
585                   
586      if (fold == "1") word += "i";
587      //else word += "c";
588    }
589    if (firstword) {
590      firstword = false;
591    } else {
592      outtext += " " + word_combine + " ";
593    }
594    outtext += "[" + word + "]:"+tag;
595    word.clear();
596      }
597      ++here;
598    } else if (*here == '\"') {
599      word2.push_back(*here);
600      inquote = !inquote;
601      ++here;
602    } else {
603      // not word boundary
604      word2.push_back(*here);
605      ++here;   
606    }
607  }
608   
609  // get last word
610  if (!word2.empty()) {
611    if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
612    else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
613    else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
614    else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
615    else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
616    word.append(word2); word2.clear();
617       
618    if (stem == "1"|| fold == "1") {
619      word += "#";
620      if (stem == "1") word += "s";
621      //else word += "u";
622           
623      if (fold == "1") word += "i";
624      //else word += "c";
625    }
626    if (!outtext.empty()) outtext += " " + word_combine + " ";
627    outtext += "[" + word + "]:"+tag;
628  }
629  querystring += "(" + outtext + ")";
630}
631
632void add_field_info(text_t &querystring, const text_t &tag, int type) {
633
634  if (tag == "") return; // do nothing
635  if (tag == "ZZ" && type == 1) return;  // mgpp doesn't use ZZ tag internally
636  if (type == 1) { //mgpp
637    querystring = "["+querystring+"]:"+tag;
638  } else if (type == 2) { // lucene
639    querystring = tag+":("+querystring+")";
640  }
641   
642}
643
644
645void format_field_info_lucene(text_t &querystring, text_t &tag, int argt, int argb) {
646
647  int type = 2; //lucene
648
649  if (argb==0) { // simple
650    // there will be no & or | as they should have already been removed
651    // just tag the entire thing
652    if (tag != "") {
653      add_field_info(querystring, tag, type);
654    }
655    return;
656  }
657
658  // need to replace & with &&, | with ||
659  text_t::const_iterator here = querystring.begin();
660  text_t::const_iterator end = querystring.end();
661
662  text_t finalquery = "";
663  while (here != end) {
664    if (*here ==  '&') {
665      finalquery.push_back('&');
666      finalquery.push_back('&');
667      while (*(here+1) == '&') {
668    ++here;
669      }
670    }
671    else if (*here == '|') {
672      finalquery.push_back('|');
673      finalquery.push_back('|');
674      while (*(here+1) == '|') {
675    ++here;
676      }
677    }
678    else {
679      finalquery.push_back(*here);
680    }
681    ++here;
682  }
683  querystring = finalquery;
684  add_field_info(querystring, tag, type);
685}
686
687
688void format_field_info_mgpp(text_t &querystring, text_t tag, int argt, int argb) {
689
690  if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
691  if (tag == "" && argb == 1) {
692    return; // no field specifier, advanced mode, the query stays as written
693  }
694
695  int type = 1; // mgpp
696
697  bool simple_and = (argb==0 && argt==0);
698  text_t finalquery = "";
699  text_t fieldpart ="";
700  text_t queryelem = "";
701  bool in_phrase = false;
702  bool in_field = false;
703
704  text_t::const_iterator here = querystring.begin();
705  text_t::const_iterator end = querystring.end();
706  while (here != end) {
707    if (is_unicode_letdig(*here)  || *here == '&' || is_special_character(type, *here)) {
708      queryelem.push_back(*here);
709    }
710    else if (*here == '|') {
711      in_field = false;
712    }
713    else if (*here == '!' || *here == '(' || *here == ')') {
714      if (!in_phrase) { // ignore these if in_phrase
715    // output field, then output operator
716    in_field = false;
717    if (!queryelem.empty()) {
718      if (!simple_and && !fieldpart.empty()) {
719        add_field_info(fieldpart, tag, type);
720        finalquery += fieldpart;
721        finalquery.push_back(' ');
722        fieldpart.clear();
723      }
724      fieldpart += queryelem;
725    }
726    if (!fieldpart.empty()) {
727      add_field_info(fieldpart, tag, type);
728      finalquery += fieldpart;
729      finalquery.push_back(' ');
730    }
731    fieldpart.clear();
732    queryelem.clear();
733    finalquery.push_back(*here);
734    finalquery.push_back(' ');
735      }
736    }
737    else if (*here == '"') {
738      queryelem.push_back(*here);
739      if (in_phrase == false) in_phrase = true;
740      else {
741    in_phrase = false;
742      }
743    }
744
745    // Found word boundary, in a phrase
746    else if (in_phrase) {
747      queryelem.push_back(*here);
748    }
749    // Found a word boundary
750    else {
751      if (!queryelem.empty()) {
752    if (queryelem == "&") {
753      in_field = true;
754      queryelem.clear();
755    }
756    else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
757     
758      if (argb==1) {
759        // simple search, these not allowed
760        in_field = true;
761        fieldpart += queryelem;
762        fieldpart.push_back(' ');
763      }
764      queryelem.clear();
765     
766    }
767    else {
768      if (!simple_and && !in_field) {
769        if (!fieldpart.empty()) {
770          add_field_info(fieldpart, tag, type);
771          finalquery += fieldpart;
772          finalquery.push_back(' ');
773          fieldpart.clear();
774        }
775      }
776     
777      fieldpart += queryelem;
778      fieldpart.push_back(' ');
779      queryelem.clear();
780    }
781      }
782    }
783    ++here;
784  }
785  // at the end
786  if (!queryelem.empty()) {
787    if (!simple_and && !in_field && !fieldpart.empty()) {
788      add_field_info(fieldpart, tag, type);
789      finalquery += fieldpart;
790      finalquery.push_back(' ');
791      fieldpart.clear();
792    }
793    fieldpart += queryelem;
794  }
795  if (!fieldpart.empty()) {
796    add_field_info(fieldpart, tag, type);
797    finalquery += fieldpart;
798    fieldpart.clear();
799
800    // doesn't the following just leave a dangling space at the end ?? (used to make mgpp crash)
801    // consider cutting this line
802    finalquery.push_back(' ');
803  }
804
805  querystring  = finalquery;
806}
807
808
809void format_field_info(text_t &querystring, text_t tag, int argct, int argt, int argb) { 
810  if (argct == 1) {
811    format_field_info_mgpp(querystring, tag, argt, argb);
812  } else if (argct == 2) {
813    format_field_info_lucene(querystring, tag, argt, argb);
814  }
815}
816
817void mgpp_adddateelem(text_t& querystring, const int date)
818{
819  querystring.appendcstr(" [");
820  if(date<0) {
821      querystring.appendcstr("bc");
822      querystring.appendint((date*-1));
823  }
824  else {
825    querystring.appendint(date);
826  }
827  querystring.appendcstr("]:CV");
828}
829
830void lucene_adddateelem(text_t& querystring, const int date)
831{
832  querystring.appendcstr(" CV:(");
833  if(date<0) {
834      querystring.appendcstr("bc");
835      querystring.appendint((date*-1));
836  }
837  else {
838    querystring.appendint(date);
839  }
840  querystring.appendcstr(")");
841}
842
843
844void add_dates(text_t &querystring, int startdate, int enddate,
845           int startbc, int endbc, int ct)
846{
847  if(startdate)
848    {
849      int querystringis = 0;
850      text_t::const_iterator here = querystring.begin();
851      text_t::const_iterator end = querystring.end();
852      while(here!=end)
853    {
854      if(!(isspace((*here)))){
855        here = end;
856        querystringis = 1;
857      }
858      else
859        ++here;
860    }
861      //converting BCE dates
862      if(startbc && startdate > 0)
863    {
864      startdate *= -1;
865    }
866      if(endbc && enddate > 0)
867    {
868      enddate *= -1;
869    }
870       if(enddate != 0 && enddate<startdate)
871    {
872      cout<<"enddate too small"<<endl;
873      return;
874    }
875       if(querystringis)
876     querystring.appendcstr(" AND");
877       if(!enddate)
878     {
879       if (ct==1) {
880         mgpp_adddateelem(querystring,startdate);
881       }
882       else { // lucene
883         lucene_adddateelem(querystring,startdate);
884       }
885     }
886       else{
887     int nextdate = startdate;
888     querystring.appendcstr(" (");
889     while(nextdate<=enddate)
890       {
891         if(nextdate!=0) {
892           if (ct==1) {
893         mgpp_adddateelem(querystring,nextdate);
894           }
895           else { // lucene
896         lucene_adddateelem(querystring,nextdate);
897           }
898         }
899         ++nextdate;
900       }
901     querystring.appendcstr(" )");
902       }
903    }
904
905}
Note: See TracBrowser for help on using the browser.