source: gsdl/trunk/runtime-src/src/recpt/querytools.cpp@ 18459

Last change on this file since 18459 was 18459, checked in by davidb, 15 years ago

a space is being added to end of an mgpp query when it if formatted. This used to cause mgpp code to crash. Have fixed the mgpp code to be more tolerant, and marked the point in querytools.cpp where I believe the space is being unnessarily added (but haven't removed the line in case there some other reason why the space is needed)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.4 KB
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30// sets the ct, qt, qto arguments
31void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
32
33 if (args["ct"].empty()) {
34 text_t build_type = cinfo->buildType;
35 if (build_type == "mgpp") {
36 args["ct"] = "1";
37 } else if (build_type == "lucene") {
38 args["ct"] = "2";
39 } else {
40 args["ct"] = "0";
41 }
42 }
43 text_t arg_ct = args["ct"];
44 if (arg_ct == "0") {
45 // mg
46 args["qt"] = "0";
47 args["qto"] = "0";
48 return;
49 }
50
51 if (!args["qt"].empty() && !args["qto"].empty()) {
52 return;
53 }
54
55 text_tmap::iterator check = cinfo->format.find("SearchTypes");
56 text_t search_types;
57 if(check != cinfo->format.end() && !(*check).second.empty()){
58 search_types = (*check).second;
59 } else {
60 // assume plain,form
61 if (args["qto"].empty()) args["qto"] = "3";
62 if (args["qt"].empty()) {
63 int arg_qto = args.getintarg("qto");
64 if (arg_qto == 2) {
65 args["qt"] = "1";
66 } else {
67 args["qt"] = "0";
68 }
69 }
70 return;
71 }
72
73
74 if (args["qto"].empty()) {
75 unsigned int type = 0;
76 if (findword(search_types.begin(), search_types.end(), "form") != search_types.end()) {
77 type |= 2;
78 }
79 if (findword(search_types.begin(), search_types.end(), "plain") != search_types.end()) {
80 type |= 1;
81 }
82 args.setintarg("qto", type);
83 }
84
85 if (args["qt"].empty()) {
86 int arg_qto = args.getintarg("qto");
87 if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) {
88 args["qt"] = "1";
89 } else {
90 args["qt"] = "0";
91 }
92 }
93}
94
95// sets the ks, ss, afs (casesupport, stemsupport, accentfoldsupport) args
96void set_stem_index_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
97 int stemIndexes = cinfo->stemIndexes;
98
99 if (stemIndexes & SIcasefold) {
100 args["ks"] = 1;
101 }
102 if (stemIndexes & SIstem) {
103 args["ss"] = 1;
104 }
105 if (stemIndexes & SIaccentfold) {
106 args["afs"] = 1;
107 }
108
109}
110
111// request.filterResultOptions and request.fields (if required) should
112// be set from the calling code
113void set_queryfilter_options (FilterRequest_t &request,
114 const text_t &querystring,
115 cgiargsclass &args) {
116
117 request.filterName = "QueryFilter";
118
119 OptionValue_t option;
120
121 option.name = "Term";
122 option.value = querystring;
123 request.filterOptions.push_back (option);
124
125 option.name = "QueryType";
126 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
127 request.filterOptions.push_back (option);
128
129 option.name = "MatchMode";
130 // mgpp in advanced mode, always use some query
131 if (args.getintarg("ct") == 1 && args.getintarg("b") == 1) {
132 option.value = "some";
133 } else {
134 option.value = (args.getintarg("t")) ? "some" : "all";
135 }
136 request.filterOptions.push_back (option);
137
138 option.name = "Casefold";
139 option.value = (args.getintarg("k")) ? "true" : "false";
140 request.filterOptions.push_back (option);
141
142 option.name = "Stem";
143 option.value = (args.getintarg("s")) ? "true" : "false";
144 request.filterOptions.push_back (option);
145
146 option.name = "AccentFold";
147 option.value = (args.getintarg("af")) ? "true" : "false";
148 request.filterOptions.push_back (option);
149
150 if (!args["h"].empty()) {
151 option.name = "Index";
152 option.value = args["h"];
153 request.filterOptions.push_back (option);
154 }
155
156 if (!args["j"].empty()) {
157 option.name = "Subcollection";
158 option.value = args["j"];
159 request.filterOptions.push_back (option);
160 }
161
162 if (!args["n"].empty()) {
163 option.name = "Language";
164 option.value = args["n"];
165 request.filterOptions.push_back (option);
166 }
167
168 if (!args["g"].empty()) { // granularity for mgpp
169 option.name = "Level";
170 option.value = args["g"];
171 request.filterOptions.push_back (option);
172 }
173
174 if (!args["fs"].empty()) { // filter string for lucene
175 option.name = "FilterString";
176 option.value = args["fs"];
177 request.filterOptions.push_back (option);
178 }
179
180 if (!args["sf"].empty()) { // sort field for lucene
181 option.name = "SortField";
182 option.value = args["sf"];
183 request.filterOptions.push_back (option);
184 }
185
186 if (!args["fuzziness"].empty() && args["fuzziness"] != "100") { // fuzziness value for lucene
187 option.name = "Fuzziness";
188 option.value = (text_t) "0." + args["fuzziness"];
189 request.filterOptions.push_back (option);
190 }
191
192 set_more_queryfilter_options (request, args);
193}
194
195void set_queryfilter_options (FilterRequest_t &request,
196 const text_t &querystring1,
197 const text_t &querystring2, cgiargsclass &args) {
198
199 set_queryfilter_options (request, querystring1, args);
200
201 // fill in the second query if needed
202 if (!args["cq2"].empty()) {
203 OptionValue_t option;
204
205 option.name = "CombineQuery";
206 option.value = args["cq2"];
207 request.filterOptions.push_back (option);
208
209 option.name = "Term";
210 option.value = querystring2;
211 request.filterOptions.push_back (option);
212
213 option.name = "QueryType";
214 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
215 request.filterOptions.push_back (option);
216
217 option.name = "Casefold";
218 option.value = (args.getintarg("k")) ? "true" : "false";
219 request.filterOptions.push_back (option);
220
221 option.name = "Stem";
222 option.value = (args.getintarg("s")) ? "true" : "false";
223 request.filterOptions.push_back (option);
224
225 option.name = "AccentFold";
226 option.value = (args.getintarg("af")) ? "true" : "false";
227 request.filterOptions.push_back (option);
228
229 if (!args["h2"].empty()) {
230 option.name = "Index";
231 option.value = args["h2"];
232 request.filterOptions.push_back (option);
233 }
234
235 if (!args["j2"].empty()) {
236 option.name = "Subcollection";
237 option.value = args["j2"];
238 request.filterOptions.push_back (option);
239 }
240
241 if (!args["n2"].empty()) {
242 option.name = "Language";
243 option.value = args["n2"];
244 request.filterOptions.push_back (option);
245 }
246 }
247 set_more_queryfilter_options (request, args);
248}
249
250void set_more_queryfilter_options (FilterRequest_t &request,
251 cgiargsclass &args) {
252
253 OptionValue_t option;
254 int arg_m = args.getintarg("m");
255
256 option.name = "Maxdocs";
257 option.value = arg_m;
258 request.filterOptions.push_back (option);
259
260 // option.name = "StartResults";
261 // option.value = args["r"];
262 // request.filterOptions.push_back (option);
263
264 // option.name = "EndResults";
265 // int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
266 // if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
267 // option.value = endresults;
268 // request.filterOptions.push_back (option);
269}
270
271bool is_special_character(int indexer_type, unsigned short character) {
272 // mgpp
273 if (indexer_type == 1) {
274 return (character == '#' || character == '/' || character == '*');
275 }
276 // lucene
277 else if (indexer_type == 2) {
278 return (character == '?' || character == '*' || character == '~' ||
279 character == '^');
280 }
281 return false;
282}
283
284// This function removes boolean operators from simple searches, and segments
285// chinese characters if segment=true
286void format_querystring (text_t &querystring, int querymode, bool segment) {
287 text_t formattedstring;
288
289 // advanced search, no segmenting, don't need to do anything
290 if (querymode == 1 && !segment) return;
291
292 text_t::const_iterator here = querystring.begin();
293 text_t::const_iterator end = querystring.end();
294
295 // space is used to insert spaces between Chinese
296 // characters. No space is needed before the first
297 // Chinese character.
298 bool space = false;
299
300 // want to remove ()|!& from querystring so boolean queries are just
301 // "all the words" queries (unless querymode is advanced)
302 while (here != end) {
303 if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
304 *here == '!' || *here == '&')) {
305 formattedstring.push_back(' ');
306 } else if (segment) {
307 if ((*here >= 0x2e80 && *here <= 0xd7a3) ||
308 ( *here >= 0xf900 && *here <= 0xfa6a)) {
309 /* text_t not big enough to handle these. */
310 /* (*here >= 0x20000 && *here <= 0x2a6d6) ||
311 (*here >= 0x2f800 && *here <= 0x2fa1d)) { */
312
313 // CJK character
314 if (!space) formattedstring.push_back (0x200b); // zero width space
315 formattedstring.push_back (*here);
316 formattedstring.push_back (0x200b);
317 space = true;
318 } else {
319
320 // non-Chinese character
321 formattedstring.push_back (*here);
322 space = false;
323
324 }
325
326 } else {
327 formattedstring.push_back (*here);
328 }
329 ++here;
330 }
331 querystring = formattedstring;
332}
333
334
335
336
337// search history tool
338// also used for form query macros
339text_t escape_quotes(const text_t &querystring) {
340
341 text_t::const_iterator here = querystring.begin();
342 text_t::const_iterator end = querystring.end();
343
344 text_t escquery = "";
345 while (here != end) {
346 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
347 else if (*here == '\n' || *here == '\r') {
348 escquery.push_back(' ');
349 } else {
350 escquery +="\\\\";
351 escquery.push_back(*here);
352 }
353
354 ++here;
355 }
356 return escquery;
357
358}
359
360// Parses the terms into words, and adds #si if necessary
361text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,
362 const int indexer_type) {
363
364 // the default stem and case are set to 0 if this is being used, so we are only adding on qualifiers if stem or fold is 1.
365 if (stem == "0" && fold == "0") {
366 return terms;
367 }
368 // this is only for mgpp collections, shouldn't be called for anything else
369 if (indexer_type != 1) {
370 return terms;
371 }
372
373 text_t outtext;
374 text_t word;
375
376 text_t::const_iterator here = terms.begin();
377 text_t::const_iterator end = terms.end();
378
379 while (here !=end) {
380
381 if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {
382 // not word boundary
383 word.push_back(*here);
384 ++here;
385 }
386 else {
387 // found word boundary
388 if (!word.empty() ) {
389 if (starts_with(word, "NEAR") || starts_with(word, "WITHIN")) {
390 outtext += word;
391 word.clear();
392 }
393 else {
394 word += "#";
395 if (stem == "1") word += "s";
396 if (fold == "1") word += "i";
397 outtext += word;
398 word.clear();
399 }
400 }
401 // this only used in advanced form, so we leave in boolean operators
402 if (*here == '\"' || *here == '&' || *here == '|' || *here == '!' ||
403 *here == '(' || *here == ')' || is_unicode_space(*here)) {
404 outtext.push_back(*here);
405 }
406 ++here;
407 }
408 }
409
410 // get last word
411 if (!word.empty()) {
412 word += "#";
413 if (stem == "1") word += "s";
414 if (fold == "1") word += "i";
415 word += " ";
416 outtext += word;
417 }
418 return outtext;
419}
420
421
422// some query form parsing functions for use with mgpp & lucene
423
424void parse_reg_query_form(text_t &querystring, cgiargsclass &args, bool segment)
425{
426 querystring.clear();
427
428 int argct = args.getintarg("ct");
429 int argt = args.getintarg("t");// t=0 -and, t=1 - or
430 int argb = args.getintarg("b");
431
432 text_t combine;
433
434 // lucene uses global combine, so only need this for mgpp
435 if (argct==1) {
436 if (argt == 0) combine = "&";
437 else combine = "|";
438 }
439
440 text_t field = args["fqf"];
441 if (field.empty()) return; // no query
442 text_tarray fields;
443 splitchar(field.begin(), field.end(), ',', fields);
444
445 text_t value = args["fqv"];
446 if (value.empty()) return; // somethings wrong
447 text_tarray values;
448 splitchar(value.begin(), value.end(), ',', values);
449
450
451 for (int i=0; i< values.size(); ++i) {
452 if (!values[i].empty()) {
453 text_t this_value = values[i];
454 // remove operators for simple search, segments text if necessary
455 format_querystring(this_value, argb, segment);
456 // add tag info for this field (and other processing)
457 format_field_info(this_value, fields[i], argct, argt, argb);
458 // add into query string
459 if (argct == 2) {
460 // lucene
461 // we don't worry about AND/OR, cos this is done by defaultcombineoperator
462 querystring += this_value+" ";
463 } else {
464 // mgpp
465 if (!querystring.empty()) {
466 querystring += " "+ combine+ " ";
467 }
468 querystring += this_value;
469 }
470 }
471 }
472}
473
474
475void parse_adv_query_form(text_t &querystring, cgiargsclass &args, bool segment){
476 querystring.clear();
477
478 const int argct = args.getintarg("ct");
479 int argt = 0;// arg t is either not used (lucene) or used for natural/ranked (mgpp), so we set it to 0 = AND, by default
480 int argb = args.getintarg("b");
481 text_t combine;
482 if (argct==1) {
483 combine = "&";
484 }
485 else { // lucene
486 combine = "AND";
487 }
488
489 text_t field = args["fqf"];
490 if (field.empty()) return; // no query
491 text_tarray fields;
492 splitchar(field.begin(), field.end(), ',', fields);
493
494 text_t value = args["fqv"];
495 if (value.empty()) return; // somethings wrong
496 text_tarray values;
497 splitchar(value.begin(), value.end(), ',', values);
498
499 text_t comb = args["fqc"];
500 if (comb.empty()) return; //somethings wrong
501 text_tarray combs;
502 splitchar(comb.begin(), comb.end(), ',', combs);
503
504 text_tarray stems;
505 text_tarray folds;
506 if (argct == 1) {// mgpp - lucene doesn't do stem/case
507 text_t stem = args["fqs"];
508 if (stem.empty()) return; // somethings wrong
509 splitchar(stem.begin(), stem.end(), ',', stems);
510
511 text_t fold = args["fqk"];
512 if (fold.empty()) return; // somethings wrong
513 splitchar(fold.begin(), fold.end(), ',', folds);
514 }
515
516 for(int i=0; i< values.size(); ++i) {
517 if (!values[i].empty()) {
518 if (i!=0) {
519 if (argct==1) {
520 if (combs[i-1]=="and") combine = "&";
521 else if (combs[i-1]=="or")combine = "|";
522 else if (combs[i-1]=="not")combine = "!";
523 }
524 else { // lucene
525 if (combs[i-1]=="and") combine = "AND";
526 else if (combs[i-1]=="or")combine = "OR";
527 else if (combs[i-1]=="not")combine = "NOT";
528 }
529 }
530 text_t this_value = values[i];
531 // remove operators for simple search, segments text if necessary
532 format_querystring(this_value, argb, segment);
533 if (argct == 1) { // mgpp only
534 this_value = addstemcase(this_value, stems[i], folds[i], argct);
535 }
536 // add tag info for this field (and other processing)
537 format_field_info(this_value, fields[i], argct, argt, argb);
538 // add into query string
539 if (!querystring.empty()) {
540 querystring += " "+ combine+ " ";
541 }
542 querystring += this_value;
543
544 }
545 }
546}
547
548// Extended addqueryelem for Human Info project
549void addqueryelem_ex(text_t &querystring, const text_t &tag,
550 const text_t &terms, const text_t &stem,
551 const text_t &fold,
552 const text_t& combine, const text_t& word_combine) {
553
554 if (!querystring.empty()) { // have to put and/or
555 querystring += " " + combine + " ";
556 }
557 text_t outtext; outtext.reserve(512);
558 text_t word; word.reserve(100);
559 //unsigned short c;
560 text_t::const_iterator here = terms.begin();
561 text_t::const_iterator end = terms.end();
562 bool inquote = false, firstword = true;
563
564 text_t word2; word2.reserve(256);
565
566 while (here !=end) {
567 if (is_unicode_space(*here)) {
568 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
569 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
570 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
571 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
572 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
573 if (inquote) {
574 word2.push_back(*here);
575 }
576 word.append(word2); word2.clear();
577
578 if (!inquote && !word.empty() ) {
579 // found word boundary
580
581 if (stem == "1" || fold =="1") {
582 word += "#";
583 if (stem == "1") word += "s";
584 //else word += "u";
585
586 if (fold == "1") word += "i";
587 //else word += "c";
588 }
589 if (firstword) {
590 firstword = false;
591 } else {
592 outtext += " " + word_combine + " ";
593 }
594 outtext += "[" + word + "]:"+tag;
595 word.clear();
596 }
597 ++here;
598 } else if (*here == '\"') {
599 word2.push_back(*here);
600 inquote = !inquote;
601 ++here;
602 } else {
603 // not word boundary
604 word2.push_back(*here);
605 ++here;
606 }
607 }
608
609 // get last word
610 if (!word2.empty()) {
611 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
612 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
613 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
614 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
615 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
616 word.append(word2); word2.clear();
617
618 if (stem == "1"|| fold == "1") {
619 word += "#";
620 if (stem == "1") word += "s";
621 //else word += "u";
622
623 if (fold == "1") word += "i";
624 //else word += "c";
625 }
626 if (!outtext.empty()) outtext += " " + word_combine + " ";
627 outtext += "[" + word + "]:"+tag;
628 }
629 querystring += "(" + outtext + ")";
630}
631
632void add_field_info(text_t &querystring, const text_t &tag, int type) {
633
634 if (tag == "") return; // do nothing
635 if (tag == "ZZ" && type == 1) return; // mgpp doesn't use ZZ tag internally
636 if (type == 1) { //mgpp
637 querystring = "["+querystring+"]:"+tag;
638 } else if (type == 2) { // lucene
639 querystring = tag+":("+querystring+")";
640 }
641
642}
643
644
645void format_field_info_lucene(text_t &querystring, text_t &tag, int argt, int argb) {
646
647 int type = 2; //lucene
648
649 if (argb==0) { // simple
650 // there will be no & or | as they should have already been removed
651 // just tag the entire thing
652 if (tag != "") {
653 add_field_info(querystring, tag, type);
654 }
655 return;
656 }
657
658 // need to replace & with &&, | with ||
659 text_t::const_iterator here = querystring.begin();
660 text_t::const_iterator end = querystring.end();
661
662 text_t finalquery = "";
663 while (here != end) {
664 if (*here == '&') {
665 finalquery.push_back('&');
666 finalquery.push_back('&');
667 while (*(here+1) == '&') {
668 ++here;
669 }
670 }
671 else if (*here == '|') {
672 finalquery.push_back('|');
673 finalquery.push_back('|');
674 while (*(here+1) == '|') {
675 ++here;
676 }
677 }
678 else {
679 finalquery.push_back(*here);
680 }
681 ++here;
682 }
683 querystring = finalquery;
684 add_field_info(querystring, tag, type);
685}
686
687
688void format_field_info_mgpp(text_t &querystring, text_t tag, int argt, int argb) {
689
690 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
691 if (tag == "" && argb == 1) {
692 return; // no field specifier, advanced mode, the query stays as written
693 }
694
695 int type = 1; // mgpp
696
697 bool simple_and = (argb==0 && argt==0);
698 text_t finalquery = "";
699 text_t fieldpart ="";
700 text_t queryelem = "";
701 bool in_phrase = false;
702 bool in_field = false;
703
704 text_t::const_iterator here = querystring.begin();
705 text_t::const_iterator end = querystring.end();
706 while (here != end) {
707 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) {
708 queryelem.push_back(*here);
709 }
710 else if (*here == '|') {
711 in_field = false;
712 }
713 else if (*here == '!' || *here == '(' || *here == ')') {
714 if (!in_phrase) { // ignore these if in_phrase
715 // output field, then output operator
716 in_field = false;
717 if (!queryelem.empty()) {
718 if (!simple_and && !fieldpart.empty()) {
719 add_field_info(fieldpart, tag, type);
720 finalquery += fieldpart;
721 finalquery.push_back(' ');
722 fieldpart.clear();
723 }
724 fieldpart += queryelem;
725 }
726 if (!fieldpart.empty()) {
727 add_field_info(fieldpart, tag, type);
728 finalquery += fieldpart;
729 finalquery.push_back(' ');
730 }
731 fieldpart.clear();
732 queryelem.clear();
733 finalquery.push_back(*here);
734 finalquery.push_back(' ');
735 }
736 }
737 else if (*here == '"') {
738 queryelem.push_back(*here);
739 if (in_phrase == false) in_phrase = true;
740 else {
741 in_phrase = false;
742 }
743 }
744
745 // Found word boundary, in a phrase
746 else if (in_phrase) {
747 queryelem.push_back(*here);
748 }
749 // Found a word boundary
750 else {
751 if (!queryelem.empty()) {
752 if (queryelem == "&") {
753 in_field = true;
754 queryelem.clear();
755 }
756 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
757
758 if (argb==1) {
759 // simple search, these not allowed
760 in_field = true;
761 fieldpart += queryelem;
762 fieldpart.push_back(' ');
763 }
764 queryelem.clear();
765
766 }
767 else {
768 if (!simple_and && !in_field) {
769 if (!fieldpart.empty()) {
770 add_field_info(fieldpart, tag, type);
771 finalquery += fieldpart;
772 finalquery.push_back(' ');
773 fieldpart.clear();
774 }
775 }
776
777 fieldpart += queryelem;
778 fieldpart.push_back(' ');
779 queryelem.clear();
780 }
781 }
782 }
783 ++here;
784 }
785 // at the end
786 if (!queryelem.empty()) {
787 if (!simple_and && !in_field && !fieldpart.empty()) {
788 add_field_info(fieldpart, tag, type);
789 finalquery += fieldpart;
790 finalquery.push_back(' ');
791 fieldpart.clear();
792 }
793 fieldpart += queryelem;
794 }
795 if (!fieldpart.empty()) {
796 add_field_info(fieldpart, tag, type);
797 finalquery += fieldpart;
798 fieldpart.clear();
799
800 // doesn't the following just leave a dangling space at the end ?? (used to make mgpp crash)
801 // consider cutting this line
802 finalquery.push_back(' ');
803 }
804
805 querystring = finalquery;
806}
807
808
809void format_field_info(text_t &querystring, text_t tag, int argct, int argt, int argb) {
810 if (argct == 1) {
811 format_field_info_mgpp(querystring, tag, argt, argb);
812 } else if (argct == 2) {
813 format_field_info_lucene(querystring, tag, argt, argb);
814 }
815}
816
817void mgpp_adddateelem(text_t& querystring, const int date)
818{
819 querystring.appendcstr(" [");
820 if(date<0) {
821 querystring.appendcstr("bc");
822 querystring.appendint((date*-1));
823 }
824 else {
825 querystring.appendint(date);
826 }
827 querystring.appendcstr("]:CV");
828}
829
830void lucene_adddateelem(text_t& querystring, const int date)
831{
832 querystring.appendcstr(" CV:(");
833 if(date<0) {
834 querystring.appendcstr("bc");
835 querystring.appendint((date*-1));
836 }
837 else {
838 querystring.appendint(date);
839 }
840 querystring.appendcstr(")");
841}
842
843
844void add_dates(text_t &querystring, int startdate, int enddate,
845 int startbc, int endbc, int ct)
846{
847 if(startdate)
848 {
849 int querystringis = 0;
850 text_t::const_iterator here = querystring.begin();
851 text_t::const_iterator end = querystring.end();
852 while(here!=end)
853 {
854 if(!(isspace((*here)))){
855 here = end;
856 querystringis = 1;
857 }
858 else
859 ++here;
860 }
861 //converting BCE dates
862 if(startbc && startdate > 0)
863 {
864 startdate *= -1;
865 }
866 if(endbc && enddate > 0)
867 {
868 enddate *= -1;
869 }
870 if(enddate != 0 && enddate<startdate)
871 {
872 cout<<"enddate too small"<<endl;
873 return;
874 }
875 if(querystringis)
876 querystring.appendcstr(" AND");
877 if(!enddate)
878 {
879 if (ct==1) {
880 mgpp_adddateelem(querystring,startdate);
881 }
882 else { // lucene
883 lucene_adddateelem(querystring,startdate);
884 }
885 }
886 else{
887 int nextdate = startdate;
888 querystring.appendcstr(" (");
889 while(nextdate<=enddate)
890 {
891 if(nextdate!=0) {
892 if (ct==1) {
893 mgpp_adddateelem(querystring,nextdate);
894 }
895 else { // lucene
896 lucene_adddateelem(querystring,nextdate);
897 }
898 }
899 ++nextdate;
900 }
901 querystring.appendcstr(" )");
902 }
903 }
904
905}
Note: See TracBrowser for help on using the repository browser.