source: trunk/gsdl/src/recpt/querytools.cpp@ 12525

Last change on this file since 12525 was 12428, checked in by mdewsnip, 18 years ago

Lucene now supports "some" and "all" queries, so look at t variable for ct == 2 also.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.4 KB
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30void set_query_type_args(ColInfoResponse_t *cinfo, cgiargsclass &args) {
31
32 if (args["ct"].empty()) {
33 text_t build_type = cinfo->buildType;
34 if (build_type == "mgpp") {
35 args["ct"] = "1";
36 } else if (build_type == "lucene") {
37 args["ct"] = "2";
38 } else {
39 args["ct"] = "0";
40 }
41 }
42 text_t arg_ct = args["ct"];
43 if (arg_ct == "0") {
44 // mg
45 args["qt"] = "0";
46 args["qto"] = "0";
47 return;
48 }
49
50 if (!args["qt"].empty() && !args["qto"].empty()) {
51 return;
52 }
53
54 text_tmap::iterator check = cinfo->format.find("SearchTypes");
55 text_t search_types = "plain,form";
56 if(check != cinfo->format.end()){
57 search_types = (*check).second;
58 if (search_types.empty()) {
59 search_types = "plain,form";
60 }
61 }
62
63 if (args["qto"].empty()) {
64 unsigned int type = 0;
65 if (findword(search_types.begin(), search_types.end(), "form") != search_types.end()) {
66 type |= 2;
67 }
68 if (findword(search_types.begin(), search_types.end(), "plain") != search_types.end()) {
69 type |= 1;
70 }
71 args.setintarg("qto", type);
72 }
73
74 if (args["qt"].empty()) {
75 bool form_default = false;
76 int arg_qto = args.getintarg("qto");
77 if (arg_qto == 2 || (arg_qto == 3 && starts_with(search_types, "form"))) {
78 args["qt"] = "1";
79 } else {
80 args["qt"] = "0";
81 }
82 }
83}
84
85// request.filterResultOptions and request.fields (if required) should
86// be set from the calling code
87void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring,
88 cgiargsclass &args) {
89
90 request.filterName = "QueryFilter";
91
92 OptionValue_t option;
93
94 option.name = "Term";
95 option.value = querystring;
96 request.filterOptions.push_back (option);
97
98 option.name = "QueryType";
99 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
100 request.filterOptions.push_back (option);
101
102 option.name = "MatchMode";
103 // mgpp in advanced mode, always use some query
104 if (args.getintarg("ct") == 1 && args.getintarg("b") == 1) {
105 option.value = "some";
106 } else {
107 option.value = (args.getintarg("t")) ? "some" : "all";
108 }
109 request.filterOptions.push_back (option);
110
111 option.name = "Casefold";
112 option.value = (args.getintarg("k")) ? "true" : "false";
113 request.filterOptions.push_back (option);
114
115 option.name = "Stem";
116 option.value = (args.getintarg("s")) ? "true" : "false";
117 request.filterOptions.push_back (option);
118
119 if (!args["h"].empty()) {
120 option.name = "Index";
121 option.value = args["h"];
122 request.filterOptions.push_back (option);
123 }
124
125 if (!args["j"].empty()) {
126 option.name = "Subcollection";
127 option.value = args["j"];
128 request.filterOptions.push_back (option);
129 }
130
131 if (!args["n"].empty()) {
132 option.name = "Language";
133 option.value = args["n"];
134 request.filterOptions.push_back (option);
135 }
136
137 if (!args["g"].empty()) { // granularity for mgpp
138 option.name = "Level";
139 option.value = args["g"];
140 request.filterOptions.push_back (option);
141 }
142
143 if (!args["fs"].empty()) { // filter string for lucene
144 option.name = "FilterString";
145 option.value = args["fs"];
146 request.filterOptions.push_back (option);
147 }
148
149 if (!args["sf"].empty()) { // sort field for lucene
150 option.name = "SortField";
151 option.value = args["sf"];
152 request.filterOptions.push_back (option);
153 }
154
155 // sort field for lucene
156 option.name = "FuzzySearch";
157 option.value = (args.getintarg("fuzzy")) ? "true" : "false";
158 request.filterOptions.push_back (option);
159
160 set_more_queryfilter_options (request, args);
161}
162
163void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1,
164 const text_t &querystring2, cgiargsclass &args) {
165
166 set_queryfilter_options (request, querystring1, args);
167
168 // fill in the second query if needed
169 if (!args["cq2"].empty()) {
170 OptionValue_t option;
171
172 option.name = "CombineQuery";
173 option.value = args["cq2"];
174 request.filterOptions.push_back (option);
175
176 option.name = "Term";
177 option.value = querystring2;
178 request.filterOptions.push_back (option);
179
180 option.name = "QueryType";
181 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
182 request.filterOptions.push_back (option);
183
184 option.name = "Casefold";
185 option.value = (args.getintarg("k")) ? "true" : "false";
186 request.filterOptions.push_back (option);
187
188 option.name = "Stem";
189 option.value = (args.getintarg("s")) ? "true" : "false";
190 request.filterOptions.push_back (option);
191
192 if (!args["h2"].empty()) {
193 option.name = "Index";
194 option.value = args["h2"];
195 request.filterOptions.push_back (option);
196 }
197
198 if (!args["j2"].empty()) {
199 option.name = "Subcollection";
200 option.value = args["j2"];
201 request.filterOptions.push_back (option);
202 }
203
204 if (!args["n2"].empty()) {
205 option.name = "Language";
206 option.value = args["n2"];
207 request.filterOptions.push_back (option);
208 }
209 }
210 set_more_queryfilter_options (request, args);
211}
212
213void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) {
214
215 OptionValue_t option;
216 int arg_m = args.getintarg("m");
217
218 option.name = "Maxdocs";
219 option.value = arg_m;
220 request.filterOptions.push_back (option);
221
222 // option.name = "StartResults";
223 // option.value = args["r"];
224 // request.filterOptions.push_back (option);
225
226 // option.name = "EndResults";
227 // int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
228 // if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
229 // option.value = endresults;
230 // request.filterOptions.push_back (option);
231}
232
233bool is_special_character(int indexer_type, unsigned short character) {
234 // mgpp
235 if (indexer_type == 1) {
236 return (character == '#' || character == '/' || character == '*');
237 }
238 // lucene
239 else if (indexer_type ==2) {
240 return (character == '?' || character == '*' || character == '~' ||
241 character == '^');
242 }
243 return false;
244}
245
246void format_querystring (text_t &querystring, int querymode, bool segment) {
247 text_t formattedstring;
248
249 if (querymode == 1 && !segment) return;
250
251 text_t::const_iterator here = querystring.begin();
252 text_t::const_iterator end = querystring.end();
253
254 // space is used to insert spaces between Chinese
255 // characters. No space is needed before the first
256 // Chinese character.
257 bool space = false;
258
259 // want to remove ()|!& from querystring so boolean queries are just
260 // "all the words" queries (unless querymode is advanced)
261 while (here != end) {
262 if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
263 *here == '!' || *here == '&')) {
264 formattedstring.push_back(' ');
265 } else if (segment) {
266 if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
267 (*here >= 0xf900 && *here <= 0xfa2d)) {
268 // Chinese character
269 if (!space) formattedstring.push_back (0x200b); // zero width space
270 formattedstring.push_back (*here);
271 formattedstring.push_back (0x200b);
272 space = true;
273 } else {
274
275 // non-Chinese character
276 formattedstring.push_back (*here);
277 space = false;
278
279 }
280
281 } else {
282 formattedstring.push_back (*here);
283 }
284 ++here;
285 }
286 querystring = formattedstring;
287}
288
289
290
291void add_dates(text_t &querystring, int startdate, int enddate,
292 int startbc, int endbc, int ct)
293{
294 if(startdate)
295 {
296 int querystringis = 0;
297 text_t::const_iterator here = querystring.begin();
298 text_t::const_iterator end = querystring.end();
299 while(here!=end)
300 {
301 if(!(isspace((*here)))){
302 here = end;
303 querystringis = 1;
304 }
305 else
306 ++here;
307 }
308 //converting BCE dates
309 if(startbc && startdate > 0)
310 {
311 startdate *= -1;
312 }
313 if(endbc && enddate > 0)
314 {
315 enddate *= -1;
316 }
317 if(enddate != 0 && enddate<startdate)
318 {
319 cout<<"enddate too small"<<endl;
320 return;
321 }
322 if(querystringis)
323 querystring.appendcstr(" AND");
324 if(!enddate)
325 {
326 if (ct==1) {
327 mgpp_adddateelem(querystring,startdate);
328 }
329 else { // lucene
330 lucene_adddateelem(querystring,startdate);
331 }
332 }
333 else{
334 int nextdate = startdate;
335 querystring.appendcstr(" (");
336 while(nextdate<=enddate)
337 {
338 if(nextdate!=0) {
339 if (ct==1) {
340 mgpp_adddateelem(querystring,nextdate);
341 }
342 else { // lucene
343 lucene_adddateelem(querystring,nextdate);
344 }
345 }
346 ++nextdate;
347 }
348 querystring.appendcstr(" )");
349 }
350 }
351
352}
353
354// search history tool
355// also used for form query macros
356text_t escape_quotes(const text_t &querystring) {
357
358 text_t::const_iterator here = querystring.begin();
359 text_t::const_iterator end = querystring.end();
360
361 text_t escquery = "";
362 while (here != end) {
363 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
364 else if (*here == '\n' || *here == '\r') {
365 escquery.push_back(' ');
366 } else {
367 escquery +="\\\\";
368 escquery.push_back(*here);
369 }
370
371 ++here;
372 }
373 return escquery;
374
375}
376
377// some query form parsing functions for use with mgpp & lucene
378
379void parse_reg_query_form(text_t &querystring, cgiargsclass &args)
380{
381 querystring.clear();
382
383 const int ct = args.getintarg("ct");
384 int argt = args.getintarg("t");// t=0 -and, t=1 - or
385
386 text_t combine;
387 if (ct==1) {
388 if (argt == 0) combine = "&";
389 else combine = "|";
390 }
391 else { // lucene
392 if (argt == 0) combine = "AND";
393 else combine = "OR";
394 }
395
396 text_t field = args["fqf"];
397 if (field.empty()) return; // no query
398 text_tarray fields;
399 splitchar(field.begin(), field.end(), ',', fields);
400
401 text_t value = args["fqv"];
402 if (value.empty()) return; // somethings wrong
403 text_tarray values;
404 splitchar(value.begin(), value.end(), ',', values);
405
406
407 for (int i=0; i< values.size(); ++i) {
408 if (!values[i].empty()) {
409 if (ct == 1) {
410 mgpp_addqueryelem(querystring, fields[i], values[i], combine);
411 }
412 else { // lucene
413 lucene_addqueryelem(querystring, fields[i], values[i], combine);
414 }
415 }
416 }
417
418}
419
420
421void parse_adv_query_form(text_t &querystring, cgiargsclass &args){
422
423 querystring.clear();
424
425 const int ct = args.getintarg("ct");
426 text_t combine;
427 if (ct==1) {
428 combine = "&";
429 }
430 else { // lucene
431 combine = "AND";
432 }
433
434 text_t field = args["fqf"];
435 if (field.empty()) return; // no query
436 text_tarray fields;
437 splitchar(field.begin(), field.end(), ',', fields);
438
439 text_t value = args["fqv"];
440 if (value.empty()) return; // somethings wrong
441 text_tarray values;
442 splitchar(value.begin(), value.end(), ',', values);
443
444 text_t stem = args["fqs"];
445 if (stem.empty()) return; // somethings wrong
446 text_tarray stems;
447 splitchar(stem.begin(), stem.end(), ',', stems);
448
449 text_t fold = args["fqk"];
450 if (fold.empty()) return; // somethings wrong
451 text_tarray folds;
452 splitchar(fold.begin(), fold.end(), ',', folds);
453
454 text_t comb = args["fqc"];
455 if (comb.empty()) return; //somethings wrong
456 text_tarray combs;
457 splitchar(comb.begin(), comb.end(), ',', combs);
458
459 for(int i=0; i< values.size(); ++i) {
460 if (!values[i].empty()) {
461 if (i!=0) {
462 if (ct==1) {
463 if (combs[i-1]=="and") combine = "&";
464 else if (combs[i-1]=="or")combine = "|";
465 else if (combs[i-1]=="not")combine = "!";
466 }
467 else { // lucene
468 if (combs[i-1]=="and") combine = "AND";
469 else if (combs[i-1]=="or")combine = "OR";
470 else if (combs[i-1]=="not")combine = "NOT";
471 }
472 }
473 text_t term = addstemcase(values[i], stems[i], folds[i], ct);
474 mgpp_addqueryelem(querystring, fields[i], term, combine);
475 }
476
477 }
478}
479
480text_t addstemcase(const text_t &terms, const text_t &stem, const text_t &fold,
481 const int indexer_type) {
482
483 text_t outtext;
484 text_t word;
485 //unsigned short c;
486 text_t::const_iterator here = terms.begin();
487 text_t::const_iterator end = terms.end();
488
489 while (here !=end) {
490
491 if (is_unicode_letdig(*here) || is_special_character(indexer_type, *here)) {
492 // not word boundary
493 word.push_back(*here);
494 ++here;
495 }
496 else {
497 // found word boundary
498 if (!word.empty() ) {
499 if (stem == "1" || fold =="1") {
500 word += "#";
501 if (stem == "1") word += "s";
502 //else word += "u";
503
504 if (fold == "1") word += "i";
505 //else word += "c";
506 }
507
508 word += " ";
509 outtext += word;
510 word.clear();
511 }
512 if (*here == '\"') {
513 outtext.push_back(*here);
514 }
515 ++here;
516 }
517 }
518
519 // get last word
520 if (!word.empty()) {
521 if (stem == "1"|| fold == "1") {
522 word += "#";
523 if (stem == "1") word += "s";
524 //else word += "u";
525
526 if (fold == "1") word += "i";
527 //else word += "c";
528 }
529 word += " ";
530 outtext += word;
531 }
532 return outtext;
533}
534
535
536void mgpp_adddateelem(text_t& querystring, const int date)
537{
538 querystring.appendcstr(" [");
539 if(date<0) {
540 querystring.appendcstr("bc");
541 querystring.appendint((date*-1));
542 }
543 else {
544 querystring.appendint(date);
545 }
546 querystring.appendcstr("]:CV");
547}
548
549void lucene_adddateelem(text_t& querystring, const int date)
550{
551 querystring.appendcstr(" CV:(");
552 if(date<0) {
553 querystring.appendcstr("bc");
554 querystring.appendint((date*-1));
555 }
556 else {
557 querystring.appendint(date);
558 }
559 querystring.appendcstr(")");
560}
561
562
563void mgpp_addqueryelem(text_t &querystring, text_t &tag,
564 text_t &query, text_t &combine) {
565 if (!querystring.empty()) { // have to put and/or
566 querystring += " " + combine + " ";
567
568 }
569 if (tag=="ZZ" || tag=="") { // just add onto querystring
570 querystring += query;
571 }
572 else {
573 querystring += "["+query+"]:"+tag;
574 }
575
576}
577
578void lucene_addqueryelem(text_t &querystring, text_t &tag,
579 text_t &query, text_t &combine) {
580 if (!querystring.empty()) { // have to put and/or
581 querystring += " " + combine + " ";
582
583 }
584 if (tag=="ZZ" || tag=="") { // just add onto querystring
585 querystring += query;
586 }
587 else {
588 querystring += tag+":("+query+")";
589 }
590}
591
592
593void addqueryelem_ex(text_t &querystring, const text_t &tag,
594 const text_t &terms, const text_t &stem, const text_t &fold,
595 const text_t& combine, const text_t& word_combine) {
596 if (!querystring.empty()) { // have to put and/or
597 querystring += " " + combine + " ";
598 }
599 text_t outtext; outtext.reserve(512);
600 text_t word; word.reserve(100);
601 //unsigned short c;
602 text_t::const_iterator here = terms.begin();
603 text_t::const_iterator end = terms.end();
604 bool inquote = false, firstword = true;
605
606 text_t word2; word2.reserve(256);
607
608 while (here !=end) {
609 if (is_unicode_space(*here)) {
610 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
611 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
612 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
613 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
614 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
615 if (inquote) {
616 word2.push_back(*here);
617 }
618 word.append(word2); word2.clear();
619
620 if (!inquote && !word.empty() ) {
621 // found word boundary
622
623 if (stem == "1" || fold =="1") {
624 word += "#";
625 if (stem == "1") word += "s";
626 //else word += "u";
627
628 if (fold == "1") word += "i";
629 //else word += "c";
630 }
631 if (firstword) {
632 firstword = false;
633 } else {
634 outtext += " " + word_combine + " ";
635 }
636 outtext += "[" + word + "]:"+tag;
637 word.clear();
638 }
639 ++here;
640 } else if (*here == '\"') {
641 word2.push_back(*here);
642 inquote = !inquote;
643 ++here;
644 } else {
645 // not word boundary
646 word2.push_back(*here);
647 ++here;
648 }
649 }
650
651 // get last word
652 if (!word2.empty()) {
653 if (word2 == "AND") { word2.clear(); word2.push_back(7527); word2.appendcarr("AND", 3); word2.push_back(7527); }
654 else if (word2 == "OR") { word2.clear(); word2.push_back(7527); word2.appendcarr("OR", 2); word2.push_back(7527); }
655 else if (word2 == "NOT") { word2.clear(); word2.push_back(7527); word2.appendcarr("NOT", 3); word2.push_back(7527); }
656 else if (word2 == "NEAR") { word2.clear(); word2.push_back(7527); word2.appendcarr("NEAR", 4); word2.push_back(7527); }
657 else if (word2 == "WITHIN") { word2.clear(); word2.push_back(7527); word2.appendcarr("WITHIN", 6); word2.push_back(7527); }
658 word.append(word2); word2.clear();
659
660 if (stem == "1"|| fold == "1") {
661 word += "#";
662 if (stem == "1") word += "s";
663 //else word += "u";
664
665 if (fold == "1") word += "i";
666 //else word += "c";
667 }
668 if (!outtext.empty()) outtext += " " + word_combine + " ";
669 outtext += "[" + word + "]:"+tag;
670 }
671 querystring += "(" + outtext + ")";
672}
673
674
675void add_field_info(text_t &querystring, const text_t &tag, int type) {
676
677 if (tag == "") return; // do nothing
678 if (type == 1) { //mgpp
679 querystring = "["+querystring+"]:"+tag;
680 } else if (type == 2) { // lucene
681 querystring = tag+":("+querystring+")";
682 }
683
684}
685
686
687void format_field_info_lucene(text_t &querystring, cgiargsclass &args) {
688 text_t tag = args["fqf"];
689 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
690 int type = 2; //lucene
691 int argt = args.getintarg("t");// t=0 -and, t=1 - or
692 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
693
694 // lucene simple OR - the string stays as is, but may need field tag
695 if (argb==0 && argt == 1) {
696 // just tag the entire thing
697 if (tag != "") {
698 add_field_info(querystring, tag, type);
699 }
700 return;
701 }
702 bool in_phrase = false;
703
704 text_t queryelem = "";
705 text_t finalquery = "";
706
707 // only add in + for simple AND search
708 text_t combine = ((argb==0)? "+" : "");
709
710 // for lucene, we need to change & to && and | to || if advanced search
711 // we need to tag the entire string, if we have a field
712 // if we are simple and search, then we put && in between words
713
714 text_t::const_iterator here = querystring.begin();
715 text_t::const_iterator end = querystring.end();
716 while (here != end) {
717 if (is_unicode_letdig(*here) || is_special_character(type, *here)) {
718 queryelem.push_back(*here);
719 }
720
721 // Detect phrase starts/finishes
722 else if (*here == '"') {
723 queryelem.push_back(*here);
724 if (in_phrase == false) in_phrase = true;
725 else {
726 finalquery += combine + queryelem;
727 queryelem.clear();
728 in_phrase = false;
729 }
730 }
731
732 // Found word boundary, in a phrase
733 else if (in_phrase) {
734 queryelem.push_back(*here);
735 }
736 // Word boundary, but not in a phrase
737 else {
738 if (*here == '&') {
739 queryelem.push_back('&');
740 queryelem.push_back('&');
741 } else if (*here == '|') {
742 queryelem.push_back('|');
743 queryelem.push_back('|');
744 } else {
745 if (!queryelem.empty()) {
746 finalquery += combine + queryelem;
747 queryelem.clear();
748 }
749 finalquery.push_back(*here);
750 }
751 }
752
753 ++here;
754 }
755
756 // Get last element
757 if (!queryelem.empty()) {
758 finalquery += combine + queryelem;
759 }
760
761 add_field_info(finalquery, tag, type);
762 querystring = finalquery;
763 cerr << "final query = "<<finalquery<<endl;
764}
765
766void format_field_info_mgpp(text_t &querystring, cgiargsclass &args) {
767 text_t tag = args["fqf"];
768 if (tag == "ZZ") tag = ""; // ZZ is a special tag meaning no tag (all fields)
769
770 int argt = args.getintarg("t");// t=0 -and, t=1 - or
771 int argb = args.getintarg("b"); // b=0 simple, b=1 advanced
772
773 if (tag == "" && argb ==1) {
774 return; // no field specifier, advanced mode, the query stays as written
775 }
776
777 int type = 1; // mgpp
778
779 bool simple_and = (argb==0 && argt==0);
780 text_t finalquery = "";
781 text_t fieldpart ="";
782 text_t queryelem = "";
783 bool in_phrase = false;
784 bool in_field = false;
785
786 text_t::const_iterator here = querystring.begin();
787 text_t::const_iterator end = querystring.end();
788 while (here != end) {
789 if (is_unicode_letdig(*here) || *here == '&' || is_special_character(type, *here)) {
790 queryelem.push_back(*here);
791 }
792 else if (*here == '|') {
793 in_field = false;
794 }
795 else if (*here == '!' || *here == '(' || *here == ')') {
796 if (!in_phrase) { // ignore these if in_phrase
797 // output field, then output operator
798 in_field = false;
799 if (!queryelem.empty()) {
800 if (!simple_and && !fieldpart.empty()) {
801 add_field_info(fieldpart, tag, type);
802 finalquery += fieldpart;
803 finalquery.push_back(' ');
804 fieldpart.clear();
805 }
806 fieldpart += queryelem;
807 }
808 if (!fieldpart.empty()) {
809 add_field_info(fieldpart, tag, type);
810 finalquery += fieldpart;
811 finalquery.push_back(' ');
812 }
813 fieldpart.clear();
814 queryelem.clear();
815 finalquery.push_back(*here);
816 finalquery.push_back(' ');
817 }
818 }
819 else if (*here == '"') {
820 queryelem.push_back(*here);
821 if (in_phrase == false) in_phrase = true;
822 else {
823 in_phrase = false;
824 }
825 }
826
827 // Found word boundary, in a phrase
828 else if (in_phrase) {
829 queryelem.push_back(*here);
830 }
831 // Found a word boundary
832 else {
833 if (!queryelem.empty()) {
834 if (queryelem == "&") {
835 in_field = true;
836 queryelem.clear();
837 }
838 else if (starts_with(queryelem, "NEAR") || starts_with(queryelem, "WITHIN")) {
839
840 if (argb==1) {
841 // simple search, these not allowed
842 in_field = true;
843 fieldpart += queryelem;
844 fieldpart.push_back(' ');
845 }
846 queryelem.clear();
847
848 }
849 else {
850 if (!simple_and && !in_field) {
851 if (!fieldpart.empty()) {
852 add_field_info(fieldpart, tag, type);
853 finalquery += fieldpart;
854 finalquery.push_back(' ');
855 fieldpart.clear();
856 }
857 }
858
859 fieldpart += queryelem;
860 fieldpart.push_back(' ');
861 queryelem.clear();
862 }
863 }
864 }
865 ++here;
866 }
867 // at the end
868 if (!queryelem.empty()) {
869 if (!simple_and && !in_field && !fieldpart.empty()) {
870 add_field_info(fieldpart, tag, type);
871 finalquery += fieldpart;
872 finalquery.push_back(' ');
873 fieldpart.clear();
874 }
875 fieldpart += queryelem;
876 }
877 if (!fieldpart.empty()) {
878 add_field_info(fieldpart, tag, type);
879 finalquery += fieldpart;
880 fieldpart.clear();
881 finalquery.push_back(' ');
882 }
883
884 querystring = finalquery;
885 cerr << "final query = "<<finalquery<<endl;
886}
887
888void format_field_info(text_t &querystring, cgiargsclass &args) {
889 int argct = args.getintarg("ct");
890 if (argct == 1) {
891 format_field_info_mgpp(querystring, args);
892 } else if (argct == 2) {
893 format_field_info_lucene(querystring, args);
894 }
895}
896
Note: See TracBrowser for help on using the repository browser.