source: trunk/gsdl/src/recpt/querytools.cpp@ 12770

Last change on this file since 12770 was 12770, checked in by mdewsnip, 18 years ago

Changed the Lucene "-fuzzy" argument to "-fuzziness <value>", for more accurate control.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.3 KB
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
31
32 if (args["ct"].empty()) {
33 text_t build_type = cinfo->buildType;
34 if (build_type == "mgpp") {
35 args["ct"] = "1";
36 } else if (build_type == "lucene") {
37 args["ct"] = "2";
38 } else {
39 args["ct"] = "0";
40 }
41 }
42 text_t arg_ct = args["ct"];
43 if (arg_ct == "0") {
44 // mg
45 args["qt"] = "0";
46 args["qto"] = "0";
47 return;
48 }
49
50 if (!args["qt"].empty() && !args["qto"].empty()) {
51 return;
52 }
53
54 text_tmap::iterator check = cinfo->format.find("SearchTypes");
55 text_t search_types = "plain,form";
56 if(check != cinfo->format.end()){
57 search_types = (*check).second;
58 if (search_types.empty()) {
59 search_types = "plain,form";
60 }
61 }
62
63 if (args["qto"].empty()) {
64 unsigned int type = 0;
65 if (findword(search_types.begin(), search_types.end(), "form") != search_types.end()) {
66 type |= 2;
67 }
68 if (findword(search_types.begin(), search_types.end(), "plain") != search_types.end()) {
69 type |= 1;
70 }
71 args.setintarg("qto", type);
72 }
73
74 if (args["qt"].empty()) {
75 bool form_default = false;
76 int arg_qto = args.getintarg("qto");
77 if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) {
78 args["qt"] = "1";
79 } else {
80 args["qt"] = "0";
81 }
82 }
83}
84
85// request.filterResultOptions and request.fields (if required) should
86// be set from the calling code
87void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring,
88 cgiargsclass &args) {
89
90 request.filterName = "QueryFilter";
91
92 OptionValue_t option;
93
94 option.name = "Term";
95 option.value = querystring;
96 request.filterOptions.push_back (option);
97
98 option.name = "QueryType";
99 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
100 request.filterOptions.push_back (option);
101
102 option.name = "MatchMode";
103 // mgpp in advanced mode, always use some query
104 if (args.getintarg("ct") == 1 && args.getintarg("b") == 1) {
105 option.value = "some";
106 } else {
107 option.value = (args.getintarg("t")) ? "some" : "all";
108 }
109 request.filterOptions.push_back (option);
110
111 option.name = "Casefold";
112 option.value = (args.getintarg("k")) ? "true" : "false";
113 request.filterOptions.push_back (option);
114
115 option.name = "Stem";
116 option.value = (args.getintarg("s")) ? "true" : "false";
117 request.filterOptions.push_back (option);
118
119 if (!args["h"].empty()) {
120 option.name = "Index";
121 option.value = args["h"];
122 request.filterOptions.push_back (option);
123 }
124
125 if (!args["j"].empty()) {
126 option.name = "Subcollection";
127 option.value = args["j"];
128 request.filterOptions.push_back (option);
129 }
130
131 if (!args["n"].empty()) {
132 option.name = "Language";
133 option.value = args["n"];
134 request.filterOptions.push_back (option);
135 }
136
137 if (!args["g"].empty()) { // granularity for mgpp
138 option.name = "Level";
139 option.value = args["g"];
140 request.filterOptions.push_back (option);
141 }
142
143 if (!args["fs"].empty()) { // filter string for lucene
144 option.name = "FilterString";
145 option.value = args["fs"];
146 request.filterOptions.push_back (option);
147 }
148
149 if (!args["sf"].empty()) { // sort field for lucene
150 option.name = "SortField";
151 option.value = args["sf"];
152 request.filterOptions.push_back (option);
153 }
154
155 if (!args["fuzziness"].empty()) { // fuzziness value for lucene
156 option.name = "Fuzziness";
157 option.value = args["fuzziness"];
158 request.filterOptions.push_back (option);
159 }
160
161 set_more_queryfilter_options (request, args);
162}
163
164void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1,
165 const text_t &querystring2, cgiargsclass &args) {
166
167 set_queryfilter_options (request, querystring1, args);
168
169 // fill in the second query if needed
170 if (!args["cq2"].empty()) {
171 OptionValue_t option;
172
173 option.name = "CombineQuery";
174 option.value = args["cq2"];
175 request.filterOptions.push_back (option);
176
177 option.name = "Term";
178 option.value = querystring2;
179 request.filterOptions.push_back (option);
180
181 option.name = "QueryType";
182 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
183 request.filterOptions.push_back (option);
184
185 option.name = "Casefold";
186 option.value = (args.getintarg("k")) ? "true" : "false";
187 request.filterOptions.push_back (option);
188
189 option.name = "Stem";
190 option.value = (args.getintarg("s")) ? "true" : "false";
191 request.filterOptions.push_back (option);
192
193 if (!args["h2"].empty()) {
194 option.name = "Index";
195 option.value = args["h2"];
196 request.filterOptions.push_back (option);
197 }
198
199 if (!args["j2"].empty()) {
200 option.name = "Subcollection";
201 option.value = args["j2"];
202 request.filterOptions.push_back (option);
203 }
204
205 if (!args["n2"].empty()) {
206 option.name = "Language";
207 option.value = args["n2"];
208 request.filterOptions.push_back (option);
209 }
210 }
211 set_more_queryfilter_options (request, args);
212}
213
214void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) {
215
216 OptionValue_t option;
217 int arg_m = args.getintarg("m");
218
219 option.name = "Maxdocs";
220 option.value = arg_m;
221 request.filterOptions.push_back (option);
222
223 // option.name = "StartResults";
224 // option.value = args["r"];
225 // request.filterOptions.push_back (option);
226
227 // option.name = "EndResults";
228 // int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
229 // if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
230 // option.value = endresults;
231 // request.filterOptions.push_back (option);
232}
233
234bool is_special_character(int indexer_type, unsigned short character) {
235 // mgpp
236 if (indexer_type == 1) {
237 return (character == '#' || character == '/' || character == '*');
238 }
239 // lucene
240 else if (indexer_type ==2) {
241 return (character == '?' || character == '*' || character == '~' ||
242 character == '^');
243 }
244 return false;
245}
246
247void format_querystring (text_t &querystring, int querymode, bool segment) {
248 text_t formattedstring;
249
250 if (querymode == 1 && !segment) return;
251
252 text_t::const_iterator here = querystring.begin();
253 text_t::const_iterator end = querystring.end();
254
255 // space is used to insert spaces between Chinese
256 // characters. No space is needed before the first
257 // Chinese character.
258 bool space = false;
259
260 // want to remove ()|!& from querystring so boolean queries are just
261 // "all the words" queries (unless querymode is advanced)
262 while (here != end) {
263 if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
264 *here == '!' || *here == '&')) {
265 formattedstring.push_back(' ');
266 } else if (segment) {
267 if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
268 (*here >= 0xf900 && *here <= 0xfa2d)) {
269 // Chinese character
270 if (!space) formattedstring.push_back (0x200b); // zero width space
271 formattedstring.push_back (*here);
272 formattedstring.push_back (0x200b);
273 space = true;
274 } else {
275
276 // non-Chinese character
277 formattedstring.push_back (*here);
278 space = false;
279
280 }
281
282 } else {
283 formattedstring.push_back (*here);
284 }
285 ++here;
286 }
287 querystring = formattedstring;
288}
289
290
291
292void add_dates(text_t &querystring, int startdate, int enddate,
293 int startbc, int endbc, int ct)
294{
295 if(startdate)
296 {
297 int querystringis = 0;
298 text_t::const_iterator here = querystring.begin();
299 text_t::const_iterator end = querystring.end();
300 while(here!=end)
301 {
302 if(!(isspace((*here)))){
303 here = end;
304 querystringis = 1;
305 }
306 else
307 ++here;
308 }
309 //converting BCE dates
310 if(startbc && startdate > 0)
311 {
312 startdate *= -1;
313 }
314 if(endbc && enddate > 0)
315 {
316 enddate *= -1;
317 }
318 if(enddate != 0 && enddate<startdate)
319 {
320 cout<<"enddate too small"<<endl;
321 return;
322 }
323 if(querystringis)
324 querystring.appendcstr(" AND");
325 if(!enddate)
326 {
327 if (ct==1) {
328 mgpp_adddateelem(querystring,startdate);
329 }
330 else { // lucene
331 lucene_adddateelem(querystring,startdate);
332 }
333 }
334 else{
335 int nextdate = startdate;
336 querystring.appendcstr(" (");
337 while(nextdate<=enddate)
338 {
339 if(nextdate!=0) {
340 if (ct==1) {
341 mgpp_adddateelem(querystring,nextdate);
342 }
343 else { // lucene
344 lucene_adddateelem(querystring,nextdate);
345 }
346 }
347 ++nextdate;
348 }
349 querystring.appendcstr(" )");
350 }
351 }
352
353}
354
355// search history tool
356// also used for form query macros
357text_t escape_quotes(const text_t &querystring) {
358
359 text_t::const_iterator here = querystring.begin();
360 text_t::const_iterator end = querystring.end();
361
362 text_t escquery = "";
363 while (here != end) {
364 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
365 else if (*here == '\n' || *here == '\r') {
366 escquery.push_back(' ');
367 } else {
368 escquery +="\\\\";
369 escquery.push_back(*here);
370 }
371
372 ++here;
373 }
374 return escquery;
375
376}
377
378// some query form parsing functions for use with mgpp & lucene
379
380void parse_reg_query_form(text_t &querystring, cgiargsclass &args)
381{
382 querystring.clear();
383
384 const int ct = args.getintarg("ct");
385 int argt = args.getintarg("t");// t=0 -and, t=1 - or
386
387 text_t combine;
388 if (ct==1) {
389 if (argt == 0) combine = "&";
390 else combine = "|";
391 }
392 else { // lucene
393 if (argt == 0) combine = "AND";
394 else combine = "OR";
395 }
396
397 text_t field = args["fqf"];
398 if (field.empty()) return; // no query
399 text_tarray fields;
400 splitchar(field.begin(), field.end(), ',', fields);
401
402 text_t value = args["fqv"];
403 if (value.empty()) return; // somethings wrong
404 text_tarray values;
405 splitchar(value.begin(), value.end(), ',', values);
406
407
408 for (int i=0; i< values.size(); ++i) {
409 if (!values[i].empty()) {
410 if (ct == 1) {
411 mgpp_addqueryelem(querystring, fields[i], values[i], combine);
412 }
413 else { // lucene
414 lucene_addqueryelem(querystring, fields[i], values[i], combine);
415 }
416 }
417 }
418
419}
420
421
422void parse_adv_query_form(text_t &querystring, cgiargsclass &args){
423
424 querystring.clear();
425
426 const int ct = args.getintarg("ct");
427 text_t combine;
428 if (ct==1) {
429 combine = "&";
430 }
431 else { // lucene
432 combine = "AND";
433 }
434
435 text_t field = args["fqf"];
436 if (field.empty()) return; // no query
437 text_tarray fields;
438 splitchar(field.begin(), field.end(), ',', fields);
439
440 text_t value = args["fqv"];
441 if (value.empty()) return; // somethings wrong
442 text_tarray values;
443 splitchar(value.begin(), value.end(), ',', values);
444
445 text_t stem = args["fqs"];
446 if (stem.empty()) return; // somethings wrong
447 text_tarray stems;
448 splitchar(stem.begin(), stem.end(), ',', stems);
449
450 text_t fold = args["fqk"];
451 if (fold.empty()) return; // somethings wrong
452 text_tarray folds;
453 splitchar(fold.begin(), fold.end(), ',', folds);
454
455 text_t comb = args["fqc"];
456 if (comb.empty()) return; //somethings wrong
457 text_tarray combs;
458 splitchar(comb.begin(), comb.end(), ',', combs);
459
460 for(int i=0; i< values.size(); ++i) {
461 if (!values[i].empty()) {
462 if (i!=0) {
463 if (ct==1) {
464 if (combs[i-1]=="and") combine = "&";
465 else if (combs[i-1]=="or")combine = "|";
466 else if (combs[i-1]=="not")combine = "!";
467 }
468 else { // lucene
469 if (combs[i-1]=="and") combine = "AND";
470 else if (combs[i-1]=="or")combine = "OR";
471 else if (combs[i-1]=="not")combine = "NOT";
472 }
473 }
474 text_t term = addstemcase(values[i], stems[i], folds[i], ct);
475 mgpp_addqueryelem(querystring, fields[i], term, combine);
476 }
477
478 }
479}
480
481text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,
482 const int indexer_type) {
483
484 text_t outtext;
485 text_t word;
486 //unsigned short c;
487 text_t::const_iterator here = terms.begin();
488 text_t::const_iterator end = terms.end();
489
490 while (here !=end) {
491
492 if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {
493 // not word boundary
494 word.push_back(*here);
495 ++here;
496 }
497 else {
498 // found word boundary
499 if (!word.empty() ) {
500 if (stem == "1" || fold =="1") {
501 word += "#";
502 if (stem == "1") word += "s";
503 //else word += "u";
504
505 if (fold == "1") word += "i";
506 //else word += "c";
507 }
508
509 word += " ";
510 outtext += word;
511 word.clear();
512 }
513 if (*here == '\"') {
514 outtext.push_back(*here);
515 }
516 ++here;
517 }
518 }
519
520 // get last word
521 if (!word.empty()) {
522 if (stem == "1"|| fold == "1") {
523 word += "#";
524 if (stem == "1") word += "s";
525 //else word += "u";
526
527 if (fold == "1") word += "i";
528 //else word += "c";
529 }
530 word += " ";
531 outtext += word;
532 }
533 return outtext;
534}
535
536
537void mgpp_adddateelem(text_t& querystring, const int date)
538{
539 querystring.appendcstr(" [");
540 if(date<0) {
541 querystring.appendcstr("bc");
542 querystring.appendint((date*-1));
543 }
544 else {
545 querystring.appendint(date);
546 }
547 querystring.appendcstr("]:CV");
548}
549
550void lucene_adddateelem(text_t& querystring, const int date)
551{
552 querystring.appendcstr(" CV:(");
553 if(date<0) {
554 querystring.appendcstr("bc");
555 querystring.appendint((date*-1));
556 }
557 else {
558 querystring.appendint(date);
559 }
560 querystring.appendcstr(")");
561}
562
563
564void mgpp_addqueryelem(text_t &querystring, text_t &tag,
565 text_t &query, text_t &combine) {
566 if (!querystring.empty()) { // have to put and/or
567 querystring += " " + combine + " ";
568
569 }
570 if (tag=="ZZ" || tag=="") { // just add onto querystring
571 querystring += query;
572 }
573 else {
574 querystring += "["+query+"]:"+tag;
575 }
576
577}
578
579void lucene_addqueryelem(text_t &querystring, text_t &tag,
580 text_t &query, text_t &combine) {
581 if (!querystring.empty()) { // have to put and/or
582 querystring += " " + combine + " ";
583
584 }
585 if (tag=="ZZ" || tag=="") { // just add onto querystring
586 querystring += query;
587 }
588 else {
589 querystring += tag+":("+query+")";
590 }
591}
592
593
594void addqueryelem_ex(text_t &querystring, const text_t &tag,
595 const text_t &terms, const text_t &stem, const text_t &fold,
596 const text_t& combine, const text_t& word_combine) {
597 if (!querystring.empty()) { // have to put and/or
598 querystring += " " + combine + " ";
599 }
600 text_t outtext; outtext.reserve(512);
601 text_t word; word.reserve(100);
602 //unsigned short c;
603 text_t::const_iterator here = terms.begin();
604 text_t::const_iterator end = terms.end();
605 bool inquote = false, firstword = true;
606
607 text_t word2; word2.reserve(256);
608
609 while (here !=end) {
610 if (is_unicode_space(*here)) {
611 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
612 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
613 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
614 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
615 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
616 if (inquote) {
617 word2.push_back(*here);
618 }
619 word.append(word2); word2.clear();
620
621 if (!inquote && !word.empty() ) {
622 // found word boundary
623
624 if (stem == "1" || fold =="1") {
625 word += "#";
626 if (stem == "1") word += "s";
627 //else word += "u";
628
629 if (fold == "1") word += "i";
630 //else word += "c";
631 }
632 if (firstword) {
633 firstword = false;
634 } else {
635 outtext += " " + word_combine + " ";
636 }
637 outtext += "[" + word + "]:"+tag;
638 word.clear();
639 }
640 ++here;
641 } else if (*here == '\"') {
642 word2.push_back(*here);
643 inquote = !inquote;
644 ++here;
645 } else {
646 // not word boundary
647 word2.push_back(*here);
648 ++here;
649 }
650 }
651
652 // get last word
653 if (!word2.empty()) {
654 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
655 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
656 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
657 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
658 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
659 word.append(word2); word2.clear();
660
661 if (stem == "1"|| fold == "1") {
662 word += "#";
663 if (stem == "1") word += "s";
664 //else word += "u";
665
666 if (fold == "1") word += "i";
667 //else word += "c";
668 }
669 if (!outtext.empty()) outtext += " " + word_combine + " ";
670 outtext += "[" + word + "]:"+tag;
671 }
672 querystring += "(" + outtext + ")";
673}
674
675
676void add_field_info(text_t &querystring, const text_t &tag, int type) {
677
678 if (tag == "") return; // do nothing
679 if (type == 1) { //mgpp
680 querystring = "["+querystring+"]:"+tag;
681 } else if (type == 2) { // lucene
682 querystring = tag+":("+querystring+")";
683 }
684
685}
686
687
688void format_field_info_lucene(text_t &querystring, cgiargsclass &args) {
689 text_t tag = args["fqf"];
690 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
691 int type = 2; //lucene
692 int argt = args.getintarg("t");// t=0 -and, t=1 - or
693 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
694
695 // lucene simple OR - the string stays as is, but may need field tag
696 if (argb==0 && argt == 1) {
697 // just tag the entire thing
698 if (tag != "") {
699 add_field_info(querystring, tag, type);
700 }
701 return;
702 }
703 bool in_phrase = false;
704
705 text_t queryelem = "";
706 text_t finalquery = "";
707
708 // only add in + for simple AND search
709 text_t combine = ((argb==0)? "+" : "");
710
711 // for lucene, we need to change & to && and | to || if advanced search
712 // we need to tag the entire string, if we have a field
713 // if we are simple and search, then we put && in between words
714
715 text_t::const_iterator here = querystring.begin();
716 text_t::const_iterator end = querystring.end();
717 while (here != end) {
718 if (is_unicode_letdig(*here) || is_special_character(type, *here)) {
719 queryelem.push_back(*here);
720 }
721
722 // Detect phrase starts/finishes
723 else if (*here == '"') {
724 queryelem.push_back(*here);
725 if (in_phrase == false) in_phrase = true;
726 else {
727 finalquery += combine + queryelem;
728 queryelem.clear();
729 in_phrase = false;
730 }
731 }
732
733 // Found word boundary, in a phrase
734 else if (in_phrase) {
735 queryelem.push_back(*here);
736 }
737 // Word boundary, but not in a phrase
738 else {
739 if (*here == '&') {
740 queryelem.push_back('&');
741 queryelem.push_back('&');
742 } else if (*here == '|') {
743 queryelem.push_back('|');
744 queryelem.push_back('|');
745 } else {
746 if (!queryelem.empty()) {
747 finalquery += combine + queryelem;
748 queryelem.clear();
749 }
750 finalquery.push_back(*here);
751 }
752 }
753
754 ++here;
755 }
756
757 // Get last element
758 if (!queryelem.empty()) {
759 finalquery += combine + queryelem;
760 }
761
762 add_field_info(finalquery, tag, type);
763 querystring = finalquery;
764}
765
766void format_field_info_mgpp(text_t &querystring, cgiargsclass &args) {
767 text_t tag = args["fqf"];
768 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
769
770 int argt = args.getintarg("t");// t=0 -and, t=1 - or
771 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
772
773 if (tag == "" && argb ==1) {
774 return; // no field specifier, advanced mode, the query stays as written
775 }
776
777 int type = 1; // mgpp
778
779 bool simple_and = (argb==0 && argt==0);
780 text_t finalquery = "";
781 text_t fieldpart ="";
782 text_t queryelem = "";
783 bool in_phrase = false;
784 bool in_field = false;
785
786 text_t::const_iterator here = querystring.begin();
787 text_t::const_iterator end = querystring.end();
788 while (here != end) {
789 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) {
790 queryelem.push_back(*here);
791 }
792 else if (*here == '|') {
793 in_field = false;
794 }
795 else if (*here == '!' || *here == '(' || *here == ')') {
796 if (!in_phrase) { // ignore these if in_phrase
797 // output field, then output operator
798 in_field = false;
799 if (!queryelem.empty()) {
800 if (!simple_and && !fieldpart.empty()) {
801 add_field_info(fieldpart, tag, type);
802 finalquery += fieldpart;
803 finalquery.push_back(' ');
804 fieldpart.clear();
805 }
806 fieldpart += queryelem;
807 }
808 if (!fieldpart.empty()) {
809 add_field_info(fieldpart, tag, type);
810 finalquery += fieldpart;
811 finalquery.push_back(' ');
812 }
813 fieldpart.clear();
814 queryelem.clear();
815 finalquery.push_back(*here);
816 finalquery.push_back(' ');
817 }
818 }
819 else if (*here == '"') {
820 queryelem.push_back(*here);
821 if (in_phrase == false) in_phrase = true;
822 else {
823 in_phrase = false;
824 }
825 }
826
827 // Found word boundary, in a phrase
828 else if (in_phrase) {
829 queryelem.push_back(*here);
830 }
831 // Found a word boundary
832 else {
833 if (!queryelem.empty()) {
834 if (queryelem == "&") {
835 in_field = true;
836 queryelem.clear();
837 }
838 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
839
840 if (argb==1) {
841 // simple search, these not allowed
842 in_field = true;
843 fieldpart += queryelem;
844 fieldpart.push_back(' ');
845 }
846 queryelem.clear();
847
848 }
849 else {
850 if (!simple_and && !in_field) {
851 if (!fieldpart.empty()) {
852 add_field_info(fieldpart, tag, type);
853 finalquery += fieldpart;
854 finalquery.push_back(' ');
855 fieldpart.clear();
856 }
857 }
858
859 fieldpart += queryelem;
860 fieldpart.push_back(' ');
861 queryelem.clear();
862 }
863 }
864 }
865 ++here;
866 }
867 // at the end
868 if (!queryelem.empty()) {
869 if (!simple_and && !in_field && !fieldpart.empty()) {
870 add_field_info(fieldpart, tag, type);
871 finalquery += fieldpart;
872 finalquery.push_back(' ');
873 fieldpart.clear();
874 }
875 fieldpart += queryelem;
876 }
877 if (!fieldpart.empty()) {
878 add_field_info(fieldpart, tag, type);
879 finalquery += fieldpart;
880 fieldpart.clear();
881 finalquery.push_back(' ');
882 }
883
884 querystring = finalquery;
885 cerr << "final query = "<<finalquery<<endl;
886}
887
888void format_field_info(text_t &querystring, cgiargsclass &args) {
889 int argct = args.getintarg("ct");
890 if (argct == 1) {
891 format_field_info_mgpp(querystring, args);
892 } else if (argct == 2) {
893 format_field_info_lucene(querystring, args);
894 }
895}
896
Note: See TracBrowser for help on using the repository browser.