source: trunk/gsdl/src/recpt/querytools.cpp@ 3149

Last change on this file since 3149 was 3149, checked in by jrm21, 22 years ago

removed a debugging statement for mgpp phrase search.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.2 KB
RevLine 
[270]1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
[533]6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
[270]9 *
[533]10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
[270]24 *********************************************************************/
25
26#include "querytools.h"
[1373]27#include <ctype.h>
[1914]28#include "unitool.h" // for is_unicode_letdig
[270]29
[759]30// request.filterResultOptions and request.fields (if required) should
31// be set from the calling code
32void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring,
33 cgiargsclass &args) {
[270]34
35 request.filterName = "QueryFilter";
36
37 OptionValue_t option;
[470]38
[270]39 option.name = "Term";
[759]40 option.value = querystring;
[270]41 request.filterOptions.push_back (option);
42
43 option.name = "QueryType";
44 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
45 request.filterOptions.push_back (option);
46
[1774]47 option.name = "MatchMode";
48 option.value = (args.getintarg("t")) ? "some" : "all";
49 request.filterOptions.push_back (option);
50
[270]51 option.name = "Casefold";
52 option.value = (args.getintarg("k")) ? "true" : "false";
53 request.filterOptions.push_back (option);
54
55 option.name = "Stem";
56 option.value = (args.getintarg("s")) ? "true" : "false";
57 request.filterOptions.push_back (option);
58
59 if (!args["h"].empty()) {
60 option.name = "Index";
61 option.value = args["h"];
62 request.filterOptions.push_back (option);
63 }
64
65 if (!args["j"].empty()) {
66 option.name = "Subcollection";
67 option.value = args["j"];
68 request.filterOptions.push_back (option);
69 }
70
71 if (!args["n"].empty()) {
72 option.name = "Language";
73 option.value = args["n"];
74 request.filterOptions.push_back (option);
75 }
[1329]76
77 if (!args["g"].empty()) { // granularity for mgpp
78 option.name = "Level";
79 option.value = args["g"];
80 request.filterOptions.push_back (option);
81 }
[270]82
[759]83 set_more_queryfilter_options (request, args);
84}
85
86void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1,
87 const text_t &querystring2, cgiargsclass &args) {
88
89 set_queryfilter_options (request, querystring1, args);
90
[349]91 // fill in the second query if needed
92 if (!args["cq2"].empty()) {
[759]93 OptionValue_t option;
94
[349]95 option.name = "CombineQuery";
96 option.value = args["cq2"];
97 request.filterOptions.push_back (option);
98
99 option.name = "Term";
[759]100 option.value = querystring2;
[349]101 request.filterOptions.push_back (option);
[759]102
[349]103 option.name = "QueryType";
104 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
105 request.filterOptions.push_back (option);
106
107 option.name = "Casefold";
108 option.value = (args.getintarg("k")) ? "true" : "false";
109 request.filterOptions.push_back (option);
110
111 option.name = "Stem";
112 option.value = (args.getintarg("s")) ? "true" : "false";
113 request.filterOptions.push_back (option);
114
115 if (!args["h2"].empty()) {
116 option.name = "Index";
117 option.value = args["h2"];
118 request.filterOptions.push_back (option);
119 }
120
121 if (!args["j2"].empty()) {
122 option.name = "Subcollection";
123 option.value = args["j2"];
124 request.filterOptions.push_back (option);
125 }
126
127 if (!args["n2"].empty()) {
128 option.name = "Language";
129 option.value = args["n2"];
130 request.filterOptions.push_back (option);
131 }
132 }
[759]133 set_more_queryfilter_options (request, args);
134}
[608]135
[759]136void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) {
137
138 OptionValue_t option;
[608]139 int arg_m = args.getintarg("m");
[759]140
[608]141 option.name = "Maxdocs";
142 option.value = arg_m;
143 request.filterOptions.push_back (option);
[1329]144
[759]145 // option.name = "StartResults";
146 // option.value = args["r"];
147 // request.filterOptions.push_back (option);
[270]148
[759]149 // option.name = "EndResults";
150 // int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
151 // if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
152 // option.value = endresults;
153 // request.filterOptions.push_back (option);
[270]154}
155
[470]156void format_querystring (text_t &querystring, int querymode) {
[270]157 text_t formattedstring;
158
159 text_t::const_iterator here = querystring.begin();
160 text_t::const_iterator end = querystring.end();
161
162 // space is used to insert spaces between Chinese
163 // characters. No space is needed before the first
164 // Chinese character.
165 bool space = false;
166
167 // want to remove ()|!& from querystring so boolean queries are just
[470]168 // "all the words" queries (unless querymode is advanced)
[270]169 while (here != end) {
[470]170 if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
171 *here == '!' || *here == '&')) {
[270]172 formattedstring.push_back(' ');
173 } else {
[397]174 if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
175 (*here >= 0xf900 && *here <= 0xfa2d)) {
176 // Chinese character
177 if (space) formattedstring.push_back (0x200b);
178 formattedstring.push_back (*here);
179 formattedstring.push_back (0x200b);
180 space = true;
[270]181 } else {
[397]182 // non-Chinese character
183 formattedstring.push_back (*here);
184 space = false;
[270]185 }
186 }
187 here ++;
188 }
[397]189 querystring = formattedstring;
[270]190}
191
[1373]192
193
194void add_dates(text_t &querystring, int startdate, int enddate,
195 int startbc, int endbc)
196{
197 if(startdate)
198 {
199 int querystringis = 0;
200 text_t::const_iterator here = querystring.begin();
201 text_t::const_iterator end = querystring.end();
202 while(here!=end)
203 {
204 if(!(isspace((*here)))){
205 here = end;
206 querystringis = 1;
207 }
208 else
209 here++;
210 }
211 //converting BCE dates
212 if(startbc && startdate > 0)
213 {
214 startdate *= -1;
215 }
216 if(endbc && enddate > 0)
217 {
218 enddate *= -1;
219 }
220 if(enddate != 0 && enddate<startdate)
221 {
222 cout<<"enddate too small"<<endl;
223 return;
224 }
225 if(querystringis)
226 querystring.appendcstr(" AND");
227 if(!enddate)
228 {
229 querystring.appendcstr(" [");
[1467]230 if(startdate<0)
231 {
232 querystring.appendcstr("bc");
233 querystring.appendint((startdate*-1));
234 }
235 else
236 querystring.appendint(startdate);
[1889]237 querystring.appendcstr("]:CO");
[1373]238 }
239 else{
240 int nextdate = startdate;
241 querystring.appendcstr(" (");
242 while(nextdate<=enddate)
243 {
244 if(nextdate!=0)
245 {
[1467]246 querystring.appendcstr(" [");
247 if(nextdate<0)
248 {
249 querystring.appendcstr("bc");
250 querystring.appendint((nextdate*-1));
251 }
252 else
253 querystring.appendint(nextdate);
[2745]254 querystring.appendcstr("]:CO");
[1373]255 }
256 nextdate++;
257 }
258 querystring.appendcstr(" )");
259 }
260 }
[1467]261
[1373]262}
[1467]263
[403]264void get_phrases (const text_t &querystring, text_tarray &phrases) {
265
266 phrases.erase (phrases.begin(), phrases.end());
267 if (!querystring.empty()) {
268
269 text_t::const_iterator end = querystring.end();
270 text_t::const_iterator here = findchar (querystring.begin(), end, '"');
271 if (here != end) {
272 text_t tmptext;
273 bool foundquote = false;
274 while (here != end) {
275 if (*here == '"') {
276 if (foundquote) {
277 if (!tmptext.empty()) {
278 phrases.push_back(tmptext);
279 tmptext.clear();
280 }
281 foundquote = false;
282 } else foundquote = true;
283 } else {
284 if (foundquote) tmptext.push_back (*here);
285 }
286 here ++;
287 }
288 }
289 }
290}
[1914]291
292// search history tool
293text_t escape_quotes(const text_t &querystring) {
294
295 text_t::const_iterator here = querystring.begin();
296 text_t::const_iterator end = querystring.end();
297
298 text_t escquery = "";
299 while (here != end) {
[1988]300 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
301 else if (*here == '\n' || *here == '\r') {
302 escquery.push_back(' ');
303 } else {
[1914]304 escquery +="\\\\";
305 escquery.push_back(*here);
306 }
307
308 here++;
309 }
310 return escquery;
311
312}
313
314// some query form parsing functions for use with mgpp
315
316void parse_reg_query_form(text_t &querystring, cgiargsclass &args){
317
318 querystring.clear();
319 text_t combine;
320 int argt = args.getintarg("t");// t=0 -and, t=1 - or
321 if (argt == 0) combine = "&";
322 else combine = "|";
323
324 text_t field = args["fqf"];
325 if (field.empty()) return; // no query
326 text_tarray fields;
327 splitchar(field.begin(), field.end(), ',', fields);
328
329 text_t value = args["fqv"];
330 if (value.empty()) return; // somethings wrong
331 text_tarray values;
332 splitchar(value.begin(), value.end(), ',', values);
333
334 for (int i=0; i< values.size(); i++) {
335 if (!values[i].empty()) {
336 text_t text = formatelem(values[i]);
[2745]337 addqueryelem(querystring, fields[i], text, combine);
[1914]338 }
339 }
340
341}
342
343text_t formatelem(text_t &text) {
344
345 text_t::iterator here = text.begin();
346 text_t::iterator end = text.end();
347
348 bool space = false;
[2745]349 int words = 0;
[1914]350 text_t newtext = "";
351 while (here != end) {
352 if (is_unicode_letdig(*here)) {
353 newtext.push_back(*here);
[2745]354 if (space==false) words++;
[1914]355 space = true;
356 }
357 else {
358 if (space) {
359 newtext.push_back(' ');
360 space = false;
361 }
362 }
363 here++;
364 }
365
[2745]366 if (words > 1) { // have a phrase, put it in quotes
367 newtext = "\""+newtext+"\"";
368 }
[1914]369 return newtext;
370}
371
372void parse_adv_query_form(text_t &querystring, cgiargsclass &args){
373
374 querystring.clear();
375 text_t combine = "&";
376
377 text_t field = args["fqf"];
378 if (field.empty()) return; // no query
379 text_tarray fields;
380 splitchar(field.begin(), field.end(), ',', fields);
381
382 text_t value = args["fqv"];
383 if (value.empty()) return; // somethings wrong
384 text_tarray values;
385 splitchar(value.begin(), value.end(), ',', values);
386
387 text_t stem = args["fqs"];
388 if (stem.empty()) return; // somethings wrong
389 text_tarray stems;
390 splitchar(stem.begin(), stem.end(), ',', stems);
391
392 text_t fold = args["fqk"];
393 if (fold.empty()) return; // somethings wrong
394 text_tarray folds;
395 splitchar(fold.begin(), fold.end(), ',', folds);
396
397 text_t comb = args["fqc"];
398 if (comb.empty()) return; //somethings wrong
399 text_tarray combs;
400 splitchar(comb.begin(), comb.end(), ',', combs);
401
402 for(int i=0; i< values.size(); i++) {
403 if (!values[i].empty()) {
404 if (i!=0) {
405 if (combs[i-1]=="and") combine = "&";
406 else if (combs[i-1]=="or")combine = "|";
407 else if (combs[i-1]=="not")combine = "!";
408 }
[2745]409 text_t term = formatelem(values[i]);
[1914]410 term = addstemcase(term, stems[i], folds[i]);
411 addqueryelem(querystring, fields[i], term, combine);
412 }
413
414 }
415}
416
417text_t addstemcase(text_t &terms, text_t &stem, text_t &fold) {
418
419 text_t outtext;
420 text_t word;
421 //unsigned short c;
422 text_t::iterator here = terms.begin();
423 text_t::iterator end = terms.end();
424
425 while (here !=end) {
[2745]426
[1914]427 if (is_unicode_letdig(*here)) {
428 // not word boundary
429 word.push_back(*here);
430 here++;
431 }
432 else {
433 // found word boundary
434 if (!word.empty() ) {
435 if (stem == "1" || fold =="1") {
436 word += "#";
437 if (stem == "1") word += "s";
438 //else word += "u";
439
440 if (fold == "1") word += "i";
441 //else word += "c";
442 }
[2745]443
[1914]444 word += " ";
445 outtext += word;
446 word.clear();
447 }
[2745]448 if (*here == '\"') {
449 outtext.push_back(*here);
450 }
[1914]451 here++;
452 }
453 }
454
455 // get last word
456 if (!word.empty()) {
457 if (stem == "1"|| fold == "1") {
458 word += "#";
459 if (stem == "1") word += "s";
460 //else word += "u";
461
462 if (fold == "1") word += "i";
463 //else word += "c";
464 }
465 word += " ";
466 outtext += word;
467 }
468 return outtext;
469}
470
471
472
473void addqueryelem(text_t &querystring, text_t &tag,
474 text_t &query, text_t combine) {
475 if (!querystring.empty()) { // have to put and/or
476 querystring += " "+combine + " ";
477
478 }
479 if (tag=="ZZ") { // just add onto querystring
480 querystring += query;
481 }
482 else {
483 querystring += "["+query+"]:"+tag;
484 }
485
486}
487
488
489
Note: See TracBrowser for help on using the repository browser.