root/gsdl/trunk/runtime-src/src/recpt/querytools.cpp @ 17796

Revision 17796, 24.2 KB (checked in by kjdon, 12 years ago)

In lucene, if you don't specify a tag to search on then it uses the default field (text?), so for searching in ZZ field, we do need to keep teh ZZ specifier, unlike for mgpp

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30// sets the ct, qt, qto arguments
31void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
32
33  if (args["ct"].empty()) {
34    text_t build_type = cinfo->buildType;
35    if (build_type == "mgpp") {
36      args["ct"] = "1";
37    } else if (build_type == "lucene") {
38      args["ct"] = "2";
39    } else {
40      args["ct"] = "0";
41    }
42  }
43  text_t arg_ct = args["ct"];
44  if (arg_ct == "0") {
45    // mg
46    args["qt"] = "0";
47    args["qto"] = "0";
48    return;
49  }
50
51  if (!args["qt"].empty() && !args["qto"].empty()) {
52    return;
53  }
54 
55  text_tmap::iterator check = cinfo->format.find("SearchTypes");
56  text_t search_types;
57  if(check != cinfo->format.end() && !(*check).second.empty()){
58    search_types = (*check).second;
59  } else {
60    // assume plain,form
61    if (args["qto"].empty()) args["qto"] = "3";
62    if (args["qt"].empty()) {
63      int arg_qto = args.getintarg("qto");
64      if (arg_qto == 2) {
65    args["qt"] = "1";
66      } else {
67    args["qt"] = "0";
68      }
69    }
70    return;
71  }
72 
73 
74  if (args["qto"].empty()) {
75    unsigned int type = 0;
76    if (findword(search_types.begin(), search_types.end(), "form") != search_types.end()) {
77      type |= 2;
78    }
79    if (findword(search_types.begin(), search_types.end(), "plain") != search_types.end()) {
80      type |= 1;
81    }
82    args.setintarg("qto", type);
83  }
84 
85  if (args["qt"].empty()) {
86    int arg_qto = args.getintarg("qto");
87    if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) {
88      args["qt"] = "1";
89    } else {
90      args["qt"] = "0";
91    }
92  }
93}
94
95// sets the ks, ss, afs (casesupport, stemsupport, accentfoldsupport) args
96void set_stem_index_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
97  int stemIndexes = cinfo->stemIndexes;
98
99  if (stemIndexes & SIcasefold) {
100    args["ks"] = 1;
101  }
102  if (stemIndexes & SIstem) {
103    args["ss"] = 1;
104  }
105  if (stemIndexes & SIaccentfold) {
106    args["afs"] = 1;
107  }
108
109}
110
111// request.filterResultOptions and request.fields (if required) should
112// be set from the calling code
113void set_queryfilter_options (FilterRequest_t &request,
114                  const text_t &querystring,
115                  cgiargsclass &args) {
116
117  request.filterName = "QueryFilter";
118
119  OptionValue_t option;
120
121  option.name = "Term";
122  option.value = querystring;
123  request.filterOptions.push_back (option);
124
125  option.name = "QueryType";
126  option.value = (args.getintarg("t")) ? "ranked" : "boolean";
127  request.filterOptions.push_back (option);
128
129  option.name = "MatchMode";
130  // mgpp in advanced mode, always use some query
131  if (args.getintarg("ct") == 1 && args.getintarg("b") == 1) {
132    option.value = "some";
133  } else {
134    option.value = (args.getintarg("t")) ? "some" : "all";
135  }
136  request.filterOptions.push_back (option);
137
138  option.name = "Casefold";
139  option.value = (args.getintarg("k")) ? "true" : "false";
140  request.filterOptions.push_back (option);
141
142  option.name = "Stem";
143  option.value = (args.getintarg("s")) ? "true" : "false";
144  request.filterOptions.push_back (option);
145
146  option.name = "AccentFold";
147  option.value = (args.getintarg("af")) ? "true" : "false";
148  request.filterOptions.push_back (option);
149 
150  if (!args["h"].empty()) {
151    option.name = "Index";
152    option.value = args["h"];
153    request.filterOptions.push_back (option);
154  }
155
156  if (!args["j"].empty()) {
157    option.name = "Subcollection";
158    option.value = args["j"];
159    request.filterOptions.push_back (option);
160  }
161
162  if (!args["n"].empty()) {
163    option.name = "Language";
164    option.value = args["n"];
165    request.filterOptions.push_back (option);
166  }
167 
168  if (!args["g"].empty()) { // granularity for mgpp
169    option.name = "Level";
170    option.value = args["g"];
171    request.filterOptions.push_back (option);
172  }
173
174  if (!args["fs"].empty()) { // filter string for lucene
175    option.name = "FilterString";
176    option.value = args["fs"];
177    request.filterOptions.push_back (option);
178  }
179
180  if (!args["sf"].empty()) { // sort field for lucene
181    option.name = "SortField";
182    option.value = args["sf"];
183    request.filterOptions.push_back (option);
184  }
185
186  if (!args["fuzziness"].empty() && args["fuzziness"] != "100") { // fuzziness value for lucene
187    option.name = "Fuzziness";
188    option.value = (text_t) "0." + args["fuzziness"];
189    request.filterOptions.push_back (option);
190  }
191
192  set_more_queryfilter_options (request, args);
193}
194
195void set_queryfilter_options (FilterRequest_t &request,
196                  const text_t &querystring1,
197                  const text_t &querystring2, cgiargsclass &args) {
198
199  set_queryfilter_options (request, querystring1, args);
200
201  // fill in the second query if needed
202  if (!args["cq2"].empty()) {
203    OptionValue_t option;
204
205    option.name = "CombineQuery";
206    option.value = args["cq2"];
207    request.filterOptions.push_back (option);
208   
209    option.name = "Term";
210    option.value = querystring2;
211    request.filterOptions.push_back (option);
212   
213    option.name = "QueryType";
214    option.value = (args.getintarg("t")) ? "ranked" : "boolean";
215    request.filterOptions.push_back (option);
216
217    option.name = "Casefold";
218    option.value = (args.getintarg("k")) ? "true" : "false";
219    request.filterOptions.push_back (option);
220
221    option.name = "Stem";
222    option.value = (args.getintarg("s")) ? "true" : "false";
223    request.filterOptions.push_back (option);
224
225    option.name = "AccentFold";
226    option.value = (args.getintarg("af")) ? "true" : "false";
227    request.filterOptions.push_back (option);
228
229    if (!args["h2"].empty()) {
230      option.name = "Index";
231      option.value = args["h2"];
232      request.filterOptions.push_back (option);
233    }
234
235    if (!args["j2"].empty()) {
236      option.name = "Subcollection";
237      option.value = args["j2"];
238      request.filterOptions.push_back (option);
239    }
240
241    if (!args["n2"].empty()) {
242      option.name = "Language";
243      option.value = args["n2"];
244      request.filterOptions.push_back (option);
245    }
246  }
247  set_more_queryfilter_options (request, args);
248}
249
250void set_more_queryfilter_options (FilterRequest_t &request,
251                   cgiargsclass &args) {
252
253  OptionValue_t option;
254  int arg_m = args.getintarg("m");
255 
256  option.name = "Maxdocs";
257  option.value = arg_m;
258  request.filterOptions.push_back (option);
259
260  //  option.name = "StartResults";
261  //  option.value = args["r"];
262  //  request.filterOptions.push_back (option);
263
264  //  option.name = "EndResults";
265  //  int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
266  //  if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
267  //  option.value = endresults;
268  //  request.filterOptions.push_back (option);
269}
270
271bool is_special_character(int indexer_type, unsigned short character) {
272  // mgpp
273  if (indexer_type == 1) {
274    return (character == '#' || character == '/' || character == '*');
275  }
276  // lucene
277  else if (indexer_type == 2) {
278    return (character == '?' || character == '*' || character == '~' ||
279        character == '^');
280  }
281  return false;
282}
283
284// This function removes boolean operators from simple searches, and segments
285// chinese characters if segment=true
286void format_querystring (text_t &querystring, int querymode, bool segment) {
287  text_t formattedstring;
288
289  // advanced search, no segmenting, don't need to do anything
290  if (querymode == 1 && !segment) return;
291 
292  text_t::const_iterator here = querystring.begin();
293  text_t::const_iterator end = querystring.end();
294
295  // space is used to insert spaces between Chinese
296  // characters. No space is needed before the first
297  // Chinese character.
298  bool space = false;
299
300  // want to remove ()|!& from querystring so boolean queries are just
301  // "all the words" queries (unless querymode is advanced)
302  while (here != end) {
303    if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
304                 *here == '!' || *here == '&')) {
305      formattedstring.push_back(' ');
306    } else if (segment) {
307      if ((*here >= 0x2e80 && *here <= 0xd7a3) ||
308      ( *here >= 0xf900 && *here <= 0xfa6a)) {
309    /* text_t not big enough to handle these. */
310    /*    (*here >= 0x20000 && *here <= 0x2a6d6) ||
311      (*here >= 0x2f800 && *here <= 0x2fa1d)) { */
312   
313    // CJK character
314    if (!space) formattedstring.push_back (0x200b); // zero width space
315    formattedstring.push_back (*here);
316    formattedstring.push_back (0x200b);
317    space = true;
318      } else {
319   
320    // non-Chinese character
321    formattedstring.push_back (*here);
322    space = false;
323   
324      }
325   
326    } else {
327      formattedstring.push_back (*here);
328    }
329    ++here;
330  }
331  querystring = formattedstring;
332}
333
334
335   
336
337// search history tool
338// also used for form query macros
339text_t escape_quotes(const text_t &querystring) {
340
341  text_t::const_iterator here = querystring.begin();
342  text_t::const_iterator end = querystring.end();
343 
344  text_t escquery = "";
345  while (here != end) {
346    if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
347    else if (*here == '\n' || *here == '\r') {
348      escquery.push_back(' ');
349    } else {
350      escquery +="\\\\";
351      escquery.push_back(*here);
352    }
353
354    ++here;
355  }
356  return escquery;
357
358}
359
360// Parses the terms into words, and adds #si if necessary
361text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,
362           const int indexer_type) {
363 
364  // the default stem and case are set to 0 if this is being used, so we are only adding on qualifiers if stem or fold is 1.
365  if (stem == "0" && fold == "0") {
366    return terms;
367  }
368  // this is only for mgpp collections, shouldn't be called for anything else
369  if (indexer_type != 1) {
370    return terms;
371  }
372 
373  text_t outtext;
374  text_t word;
375
376  text_t::const_iterator here = terms.begin();
377  text_t::const_iterator end = terms.end();
378
379  while (here !=end) {
380
381    if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {
382      // not word boundary
383      word.push_back(*here);
384      ++here;   
385    }
386    else {
387      // found word boundary   
388      if (!word.empty() ) {
389    if (starts_with(word, "NEAR") || starts_with(word, "WITHIN")) {
390      outtext += word;
391      word.clear();
392    }
393    else {
394      word += "#";
395      if (stem == "1") word += "s";
396      if (fold == "1") word += "i";
397      outtext += word;
398      word.clear();
399    }
400      }
401      // this only used in advanced form, so we leave in boolean operators
402      if (*here == '\"' || *here == '&' || *here == '|' || *here == '!' ||
403      *here == '(' || *here == ')' || is_unicode_space(*here)) {
404    outtext.push_back(*here);
405      }
406      ++here;
407    }
408  }
409   
410  // get last word
411  if (!word.empty()) {
412    word += "#";
413    if (stem == "1") word += "s";
414    if (fold == "1") word += "i";
415    word += " ";
416    outtext += word;
417  }
418  return outtext;
419}
420
421
422// some query form parsing functions for use with mgpp & lucene
423
424void parse_reg_query_form(text_t &querystring, cgiargsclass &args, bool segment)
425{
426  querystring.clear();
427
428  int argct = args.getintarg("ct");
429  int argt = args.getintarg("t");// t=0 -and, t=1 - or
430  int argb = args.getintarg("b");
431   
432  text_t combine;
433
434  // lucene uses global combine, so only need this for mgpp
435  if (argct==1) {
436    if (argt == 0) combine = "&";
437    else combine = "|";
438  }
439 
440  text_t field = args["fqf"];
441  if (field.empty()) return; // no query
442  text_tarray fields;
443  splitchar(field.begin(), field.end(), ',', fields);
444 
445  text_t value = args["fqv"];
446  if (value.empty()) return; // somethings wrong
447  text_tarray values;
448  splitchar(value.begin(), value.end(), ',', values);
449
450
451  for (int i=0; i< values.size(); ++i) {
452    if (!values[i].empty()) {
453      text_t this_value = values[i];
454      // remove operators for simple search, segments text if necessary
455      format_querystring(this_value, argb, segment);
456      // add tag info for this field (and other processing)
457      format_field_info(this_value, fields[i], argct, argt, argb);
458      // add into query string
459      if (argct == 2) {
460    // lucene
461    // we don't worry about AND/OR, cos this is done by defaultcombineoperator
462    querystring += this_value+" ";
463      } else {
464    // mgpp
465    if (!querystring.empty()) {
466      querystring += " "+ combine+ " ";
467    }
468    querystring += this_value;
469      }
470    }
471  }
472}
473
474
475void parse_adv_query_form(text_t &querystring, cgiargsclass &args, bool segment){
476  querystring.clear();
477
478  const int argct = args.getintarg("ct");
479  int argt = 0;// arg t is either not used (lucene) or used for natural/ranked (mgpp), so we set it to 0 = AND, by default
480  int argb = args.getintarg("b");
481  text_t combine;
482  if (argct==1) {
483    combine = "&";
484  }
485  else { // lucene
486    combine = "AND";
487  }
488
489  text_t field = args["fqf"];
490  if (field.empty()) return; // no query
491  text_tarray fields;
492  splitchar(field.begin(), field.end(), ',', fields);
493 
494  text_t value = args["fqv"];
495  if (value.empty()) return; // somethings wrong
496  text_tarray values;
497  splitchar(value.begin(), value.end(), ',', values);
498
499  text_t comb = args["fqc"];
500  if (comb.empty()) return; //somethings wrong
501  text_tarray combs;
502  splitchar(comb.begin(), comb.end(), ',', combs);
503
504  text_tarray stems;
505  text_tarray folds;
506  if (argct == 1) {// mgpp - lucene doesn't do stem/case
507    text_t stem = args["fqs"];
508    if (stem.empty()) return; // somethings wrong
509    splitchar(stem.begin(), stem.end(), ',', stems);
510   
511    text_t fold = args["fqk"];
512    if (fold.empty()) return; // somethings wrong
513    splitchar(fold.begin(), fold.end(), ',', folds);
514  }
515 
516  for(int i=0; i< values.size(); ++i) {
517    if (!values[i].empty()) {
518      if (i!=0) {
519    if (argct==1) {
520      if (combs[i-1]=="and") combine = "&";
521      else if (combs[i-1]=="or")combine = "|";
522      else if (combs[i-1]=="not")combine = "!";
523    }
524    else { // lucene
525      if (combs[i-1]=="and") combine = "AND";
526      else if (combs[i-1]=="or")combine = "OR";
527      else if (combs[i-1]=="not")combine = "NOT";
528    }
529      }
530      text_t this_value = values[i];
531      // remove operators for simple search, segments text if necessary
532      format_querystring(this_value, argb, segment);
533      if (argct == 1) { // mgpp only
534    this_value = addstemcase(this_value, stems[i], folds[i], argct);
535      }
536      // add tag info for this field (and other processing)
537      format_field_info(this_value, fields[i], argct, argt, argb);
538      // add into query string
539      if (!querystring.empty()) {
540    querystring += " "+ combine+ " ";
541      }
542      querystring += this_value;
543     
544    }
545  }
546}
547
548// Extended addqueryelem for Human Info project
549void addqueryelem_ex(text_t &querystring, const text_t &tag,
550             const text_t &terms, const text_t &stem,
551             const text_t &fold,
552             const text_t& combine, const text_t& word_combine) {
553
554  if (!querystring.empty()) { // have to put and/or
555    querystring += " " + combine + " ";
556  }
557  text_t outtext; outtext.reserve(512);
558  text_t word; word.reserve(100);
559  //unsigned short c;                                                           
560  text_t::const_iterator here = terms.begin();
561  text_t::const_iterator end = terms.end();
562  bool inquote = false, firstword = true;
563
564  text_t word2; word2.reserve(256);
565   
566  while (here !=end) {
567    if (is_unicode_space(*here)) {
568      if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
569      else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
570      else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
571      else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
572      else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
573      if (inquote) {
574    word2.push_back(*here);
575      }
576      word.append(word2); word2.clear();
577           
578      if (!inquote && !word.empty() ) {
579    // found word boundary   
580               
581    if (stem == "1" || fold =="1") {
582      word += "#";
583      if (stem == "1") word += "s";
584      //else word += "u";
585                   
586      if (fold == "1") word += "i";
587      //else word += "c";
588    }
589    if (firstword) {
590      firstword = false;
591    } else {
592      outtext += " " + word_combine + " ";
593    }
594    outtext += "[" + word + "]:"+tag;
595    word.clear();
596      }
597      ++here;
598    } else if (*here == '\"') {
599      word2.push_back(*here);
600      inquote = !inquote;
601      ++here;
602    } else {
603      // not word boundary
604      word2.push_back(*here);
605      ++here;   
606    }
607  }
608   
609  // get last word
610  if (!word2.empty()) {
611    if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
612    else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
613    else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
614    else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
615    else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
616    word.append(word2); word2.clear();
617       
618    if (stem == "1"|| fold == "1") {
619      word += "#";
620      if (stem == "1") word += "s";
621      //else word += "u";
622           
623      if (fold == "1") word += "i";
624      //else word += "c";
625    }
626    if (!outtext.empty()) outtext += " " + word_combine + " ";
627    outtext += "[" + word + "]:"+tag;
628  }
629  querystring += "(" + outtext + ")";
630}
631
632void add_field_info(text_t &querystring, const text_t &tag, int type) {
633
634  if (tag == "") return; // do nothing
635  if (tag == "ZZ" && type == 1) return;  // mgpp doesn't use ZZ tag internally
636  if (type == 1) { //mgpp
637    querystring = "["+querystring+"]:"+tag;
638  } else if (type == 2) { // lucene
639    querystring = tag+":("+querystring+")";
640  }
641   
642}
643
644
645void format_field_info_lucene(text_t &querystring, text_t &tag, int argt, int argb) {
646
647  int type = 2; //lucene
648
649  if (argb==0) { // simple
650    // there will be no & or | as they should have already been removed
651    // just tag the entire thing
652    if (tag != "") {
653      add_field_info(querystring, tag, type);
654    }
655    return;
656  }
657
658  // need to replace & with &&, | with ||
659  text_t::const_iterator here = querystring.begin();
660  text_t::const_iterator end = querystring.end();
661
662  text_t finalquery = "";
663  while (here != end) {
664    if (*here ==  '&') {
665      finalquery.push_back('&');
666      finalquery.push_back('&');
667      while (*(here+1) == '&') {
668    ++here;
669      }
670    }
671    else if (*here == '|') {
672      finalquery.push_back('|');
673      finalquery.push_back('|');
674      while (*(here+1) == '|') {
675    ++here;
676      }
677    }
678    else {
679      finalquery.push_back(*here);
680    }
681    ++here;
682  }
683  querystring = finalquery;
684  add_field_info(querystring, tag, type);
685}
686
687
688void format_field_info_mgpp(text_t &querystring, text_t tag, int argt, int argb) {
689
690  if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
691  if (tag == "" && argb == 1) {
692    return; // no field specifier, advanced mode, the query stays as written
693  }
694
695  int type = 1; // mgpp
696
697  bool simple_and = (argb==0 && argt==0);
698  text_t finalquery = "";
699  text_t fieldpart ="";
700  text_t queryelem = "";
701  bool in_phrase = false;
702  bool in_field = false;
703
704  text_t::const_iterator here = querystring.begin();
705  text_t::const_iterator end = querystring.end();
706  while (here != end) {
707    if (is_unicode_letdig(*here)  || *here == '&' || is_special_character(type, *here)) {
708      queryelem.push_back(*here);
709    }
710    else if (*here == '|') {
711      in_field = false;
712    }
713    else if (*here == '!' || *here == '(' || *here == ')') {
714      if (!in_phrase) { // ignore these if in_phrase
715    // output field, then output operator
716    in_field = false;
717    if (!queryelem.empty()) {
718      if (!simple_and && !fieldpart.empty()) {
719        add_field_info(fieldpart, tag, type);
720        finalquery += fieldpart;
721        finalquery.push_back(' ');
722        fieldpart.clear();
723      }
724      fieldpart += queryelem;
725    }
726    if (!fieldpart.empty()) {
727      add_field_info(fieldpart, tag, type);
728      finalquery += fieldpart;
729      finalquery.push_back(' ');
730    }
731    fieldpart.clear();
732    queryelem.clear();
733    finalquery.push_back(*here);
734    finalquery.push_back(' ');
735      }
736    }
737    else if (*here == '"') {
738      queryelem.push_back(*here);
739      if (in_phrase == false) in_phrase = true;
740      else {
741    in_phrase = false;
742      }
743    }
744
745    // Found word boundary, in a phrase
746    else if (in_phrase) {
747      queryelem.push_back(*here);
748    }
749    // Found a word boundary
750    else {
751      if (!queryelem.empty()) {
752    if (queryelem == "&") {
753      in_field = true;
754      queryelem.clear();
755    }
756    else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
757     
758      if (argb==1) {
759        // simple search, these not allowed
760        in_field = true;
761        fieldpart += queryelem;
762        fieldpart.push_back(' ');
763      }
764      queryelem.clear();
765     
766    }
767    else {
768      if (!simple_and && !in_field) {
769        if (!fieldpart.empty()) {
770          add_field_info(fieldpart, tag, type);
771          finalquery += fieldpart;
772          finalquery.push_back(' ');
773          fieldpart.clear();
774        }
775      }
776     
777      fieldpart += queryelem;
778      fieldpart.push_back(' ');
779      queryelem.clear();
780    }
781      }
782    }
783    ++here;
784  }
785  // at the end
786  if (!queryelem.empty()) {
787    if (!simple_and && !in_field && !fieldpart.empty()) {
788      add_field_info(fieldpart, tag, type);
789      finalquery += fieldpart;
790      finalquery.push_back(' ');
791      fieldpart.clear();
792    }
793    fieldpart += queryelem;
794  }
795  if (!fieldpart.empty()) {
796    add_field_info(fieldpart, tag, type);
797    finalquery += fieldpart;
798    fieldpart.clear();
799    finalquery.push_back(' ');
800  }
801
802  querystring  = finalquery;
803}
804
805
806void format_field_info(text_t &querystring, text_t tag, int argct, int argt, int argb) { 
807  if (argct == 1) {
808    format_field_info_mgpp(querystring, tag, argt, argb);
809  } else if (argct == 2) {
810    format_field_info_lucene(querystring, tag, argt, argb);
811  }
812}
813
814void mgpp_adddateelem(text_t& querystring, const int date)
815{
816  querystring.appendcstr(" [");
817  if(date<0) {
818      querystring.appendcstr("bc");
819      querystring.appendint((date*-1));
820  }
821  else {
822    querystring.appendint(date);
823  }
824  querystring.appendcstr("]:CV");
825}
826
827void lucene_adddateelem(text_t& querystring, const int date)
828{
829  querystring.appendcstr(" CV:(");
830  if(date<0) {
831      querystring.appendcstr("bc");
832      querystring.appendint((date*-1));
833  }
834  else {
835    querystring.appendint(date);
836  }
837  querystring.appendcstr(")");
838}
839
840
841void add_dates(text_t &querystring, int startdate, int enddate,
842           int startbc, int endbc, int ct)
843{
844  if(startdate)
845    {
846      int querystringis = 0;
847      text_t::const_iterator here = querystring.begin();
848      text_t::const_iterator end = querystring.end();
849      while(here!=end)
850    {
851      if(!(isspace((*here)))){
852        here = end;
853        querystringis = 1;
854      }
855      else
856        ++here;
857    }
858      //converting BCE dates
859      if(startbc && startdate > 0)
860    {
861      startdate *= -1;
862    }
863      if(endbc && enddate > 0)
864    {
865      enddate *= -1;
866    }
867       if(enddate != 0 && enddate<startdate)
868    {
869      cout<<"enddate too small"<<endl;
870      return;
871    }
872       if(querystringis)
873     querystring.appendcstr(" AND");
874       if(!enddate)
875     {
876       if (ct==1) {
877         mgpp_adddateelem(querystring,startdate);
878       }
879       else { // lucene
880         lucene_adddateelem(querystring,startdate);
881       }
882     }
883       else{
884     int nextdate = startdate;
885     querystring.appendcstr(" (");
886     while(nextdate<=enddate)
887       {
888         if(nextdate!=0) {
889           if (ct==1) {
890         mgpp_adddateelem(querystring,nextdate);
891           }
892           else { // lucene
893         lucene_adddateelem(querystring,nextdate);
894           }
895         }
896         ++nextdate;
897       }
898     querystring.appendcstr(" )");
899       }
900    }
901
902}
Note: See TracBrowser for help on using the browser.