source: trunk/gsdl/src/recpt/querytools.cpp@ 5017

Last change on this file since 5017 was 4757, checked in by kjdon, 21 years ago

new method for formatting the field info in the query for mgpp plain searches (have the field list in place of the index list

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.8 KB
Line 
1/**********************************************************************
2 *
3 * querytools.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "querytools.h"
27#include <ctype.h>
28#include "unitool.h" // for is_unicode_letdig
29
30// request.filterResultOptions and request.fields (if required) should
31// be set from the calling code
32void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring,
33 cgiargsclass &args) {
34
35 request.filterName = "QueryFilter";
36
37 OptionValue_t option;
38
39 option.name = "Term";
40 option.value = querystring;
41 request.filterOptions.push_back (option);
42
43 option.name = "QueryType";
44 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
45 request.filterOptions.push_back (option);
46
47 option.name = "MatchMode";
48 option.value = (args.getintarg("t")) ? "some" : "all";
49 request.filterOptions.push_back (option);
50
51 option.name = "Casefold";
52 option.value = (args.getintarg("k")) ? "true" : "false";
53 request.filterOptions.push_back (option);
54
55 option.name = "Stem";
56 option.value = (args.getintarg("s")) ? "true" : "false";
57 request.filterOptions.push_back (option);
58
59 if (!args["h"].empty()) {
60 option.name = "Index";
61 option.value = args["h"];
62 request.filterOptions.push_back (option);
63 }
64
65 if (!args["j"].empty()) {
66 option.name = "Subcollection";
67 option.value = args["j"];
68 request.filterOptions.push_back (option);
69 }
70
71 if (!args["n"].empty()) {
72 option.name = "Language";
73 option.value = args["n"];
74 request.filterOptions.push_back (option);
75 }
76
77 if (!args["g"].empty()) { // granularity for mgpp
78 option.name = "Level";
79 option.value = args["g"];
80 request.filterOptions.push_back (option);
81 }
82
83 set_more_queryfilter_options (request, args);
84}
85
86void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1,
87 const text_t &querystring2, cgiargsclass &args) {
88
89 set_queryfilter_options (request, querystring1, args);
90
91 // fill in the second query if needed
92 if (!args["cq2"].empty()) {
93 OptionValue_t option;
94
95 option.name = "CombineQuery";
96 option.value = args["cq2"];
97 request.filterOptions.push_back (option);
98
99 option.name = "Term";
100 option.value = querystring2;
101 request.filterOptions.push_back (option);
102
103 option.name = "QueryType";
104 option.value = (args.getintarg("t")) ? "ranked" : "boolean";
105 request.filterOptions.push_back (option);
106
107 option.name = "Casefold";
108 option.value = (args.getintarg("k")) ? "true" : "false";
109 request.filterOptions.push_back (option);
110
111 option.name = "Stem";
112 option.value = (args.getintarg("s")) ? "true" : "false";
113 request.filterOptions.push_back (option);
114
115 if (!args["h2"].empty()) {
116 option.name = "Index";
117 option.value = args["h2"];
118 request.filterOptions.push_back (option);
119 }
120
121 if (!args["j2"].empty()) {
122 option.name = "Subcollection";
123 option.value = args["j2"];
124 request.filterOptions.push_back (option);
125 }
126
127 if (!args["n2"].empty()) {
128 option.name = "Language";
129 option.value = args["n2"];
130 request.filterOptions.push_back (option);
131 }
132 }
133 set_more_queryfilter_options (request, args);
134}
135
136void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) {
137
138 OptionValue_t option;
139 int arg_m = args.getintarg("m");
140
141 option.name = "Maxdocs";
142 option.value = arg_m;
143 request.filterOptions.push_back (option);
144
145 // option.name = "StartResults";
146 // option.value = args["r"];
147 // request.filterOptions.push_back (option);
148
149 // option.name = "EndResults";
150 // int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
151 // if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
152 // option.value = endresults;
153 // request.filterOptions.push_back (option);
154}
155
156void format_querystring (text_t &querystring, int querymode) {
157 text_t formattedstring;
158
159 text_t::const_iterator here = querystring.begin();
160 text_t::const_iterator end = querystring.end();
161
162 // space is used to insert spaces between Chinese
163 // characters. No space is needed before the first
164 // Chinese character.
165 bool space = false;
166
167 // want to remove ()|!& from querystring so boolean queries are just
168 // "all the words" queries (unless querymode is advanced)
169 while (here != end) {
170 if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
171 *here == '!' || *here == '&')) {
172 formattedstring.push_back(' ');
173 } else {
174 if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
175 (*here >= 0xf900 && *here <= 0xfa2d)) {
176 // Chinese character
177 if (space) formattedstring.push_back (0x200b);
178 formattedstring.push_back (*here);
179 formattedstring.push_back (0x200b);
180 space = true;
181 } else {
182 // non-Chinese character
183 formattedstring.push_back (*here);
184 space = false;
185 }
186 }
187 here ++;
188 }
189 querystring = formattedstring;
190}
191
192
193
194void add_dates(text_t &querystring, int startdate, int enddate,
195 int startbc, int endbc)
196{
197 if(startdate)
198 {
199 int querystringis = 0;
200 text_t::const_iterator here = querystring.begin();
201 text_t::const_iterator end = querystring.end();
202 while(here!=end)
203 {
204 if(!(isspace((*here)))){
205 here = end;
206 querystringis = 1;
207 }
208 else
209 here++;
210 }
211 //converting BCE dates
212 if(startbc && startdate > 0)
213 {
214 startdate *= -1;
215 }
216 if(endbc && enddate > 0)
217 {
218 enddate *= -1;
219 }
220 if(enddate != 0 && enddate<startdate)
221 {
222 cout<<"enddate too small"<<endl;
223 return;
224 }
225 if(querystringis)
226 querystring.appendcstr(" AND");
227 if(!enddate)
228 {
229 querystring.appendcstr(" [");
230 if(startdate<0)
231 {
232 querystring.appendcstr("bc");
233 querystring.appendint((startdate*-1));
234 }
235 else
236 querystring.appendint(startdate);
237 querystring.appendcstr("]:CO");
238 }
239 else{
240 int nextdate = startdate;
241 querystring.appendcstr(" (");
242 while(nextdate<=enddate)
243 {
244 if(nextdate!=0)
245 {
246 querystring.appendcstr(" [");
247 if(nextdate<0)
248 {
249 querystring.appendcstr("bc");
250 querystring.appendint((nextdate*-1));
251 }
252 else
253 querystring.appendint(nextdate);
254 querystring.appendcstr("]:CO");
255 }
256 nextdate++;
257 }
258 querystring.appendcstr(" )");
259 }
260 }
261
262}
263
264void get_phrases (const text_t &querystring, text_tarray &phrases) {
265
266 phrases.erase (phrases.begin(), phrases.end());
267 if (!querystring.empty()) {
268
269 text_t::const_iterator end = querystring.end();
270 text_t::const_iterator here = findchar (querystring.begin(), end, '"');
271 if (here != end) {
272 text_t tmptext;
273 bool foundquote = false;
274 while (here != end) {
275 if (*here == '"') {
276 if (foundquote) {
277 if (!tmptext.empty()) {
278 phrases.push_back(tmptext);
279 tmptext.clear();
280 }
281 foundquote = false;
282 } else foundquote = true;
283 } else {
284 if (foundquote) tmptext.push_back (*here);
285 }
286 here ++;
287 }
288 }
289 }
290}
291
292// search history tool
293// also used for form query macros
294text_t escape_quotes(const text_t &querystring) {
295
296 text_t::const_iterator here = querystring.begin();
297 text_t::const_iterator end = querystring.end();
298
299 text_t escquery = "";
300 while (here != end) {
301 if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
302 else if (*here == '\n' || *here == '\r') {
303 escquery.push_back(' ');
304 } else {
305 escquery +="\\\\";
306 escquery.push_back(*here);
307 }
308
309 here++;
310 }
311 return escquery;
312
313}
314
315// some query form parsing functions for use with mgpp
316
317void parse_reg_query_form(text_t &querystring, cgiargsclass &args){
318
319 querystring.clear();
320 text_t combine;
321 int argt = args.getintarg("t");// t=0 -and, t=1 - or
322 if (argt == 0) combine = "&";
323 else combine = "|";
324
325 text_t field = args["fqf"];
326 if (field.empty()) return; // no query
327 text_tarray fields;
328 splitchar(field.begin(), field.end(), ',', fields);
329
330 text_t value = args["fqv"];
331 if (value.empty()) return; // somethings wrong
332 text_tarray values;
333 splitchar(value.begin(), value.end(), ',', values);
334
335 for (int i=0; i< values.size(); i++) {
336 if (!values[i].empty()) {
337 addqueryelem(querystring, fields[i], values[i], combine);
338 }
339 }
340
341}
342
343
344void parse_adv_query_form(text_t &querystring, cgiargsclass &args){
345
346 querystring.clear();
347 text_t combine = "&";
348
349 text_t field = args["fqf"];
350 if (field.empty()) return; // no query
351 text_tarray fields;
352 splitchar(field.begin(), field.end(), ',', fields);
353
354 text_t value = args["fqv"];
355 if (value.empty()) return; // somethings wrong
356 text_tarray values;
357 splitchar(value.begin(), value.end(), ',', values);
358
359 text_t stem = args["fqs"];
360 if (stem.empty()) return; // somethings wrong
361 text_tarray stems;
362 splitchar(stem.begin(), stem.end(), ',', stems);
363
364 text_t fold = args["fqk"];
365 if (fold.empty()) return; // somethings wrong
366 text_tarray folds;
367 splitchar(fold.begin(), fold.end(), ',', folds);
368
369 text_t comb = args["fqc"];
370 if (comb.empty()) return; //somethings wrong
371 text_tarray combs;
372 splitchar(comb.begin(), comb.end(), ',', combs);
373
374 for(int i=0; i< values.size(); i++) {
375 if (!values[i].empty()) {
376 if (i!=0) {
377 if (combs[i-1]=="and") combine = "&";
378 else if (combs[i-1]=="or")combine = "|";
379 else if (combs[i-1]=="not")combine = "!";
380 }
381 text_t term = addstemcase(values[i], stems[i], folds[i]);
382 addqueryelem(querystring, fields[i], term, combine);
383 }
384
385 }
386}
387
388text_t addstemcase(text_t &terms, text_t &stem, text_t &fold) {
389
390 text_t outtext;
391 text_t word;
392 //unsigned short c;
393 text_t::iterator here = terms.begin();
394 text_t::iterator end = terms.end();
395
396 while (here !=end) {
397
398 if (is_unicode_letdig(*here)) {
399 // not word boundary
400 word.push_back(*here);
401 here++;
402 }
403 else {
404 // found word boundary
405 if (!word.empty() ) {
406 if (stem == "1" || fold =="1") {
407 word += "#";
408 if (stem == "1") word += "s";
409 //else word += "u";
410
411 if (fold == "1") word += "i";
412 //else word += "c";
413 }
414
415 word += " ";
416 outtext += word;
417 word.clear();
418 }
419 if (*here == '\"') {
420 outtext.push_back(*here);
421 }
422 here++;
423 }
424 }
425
426 // get last word
427 if (!word.empty()) {
428 if (stem == "1"|| fold == "1") {
429 word += "#";
430 if (stem == "1") word += "s";
431 //else word += "u";
432
433 if (fold == "1") word += "i";
434 //else word += "c";
435 }
436 word += " ";
437 outtext += word;
438 }
439 return outtext;
440}
441
442
443
444void addqueryelem(text_t &querystring, text_t &tag,
445 text_t &query, text_t combine) {
446 if (!querystring.empty()) { // have to put and/or
447 querystring += " "+combine + " ";
448
449 }
450 if (tag=="ZZ" || tag=="") { // just add onto querystring
451 querystring += query;
452 }
453 else {
454 querystring += "["+query+"]:"+tag;
455 }
456
457}
458
459
460void format_field_info(text_t & querystring, text_t &tag) {
461
462 if (tag == "ZZ" || tag == "") {
463 return; // do nothing
464 }
465
466 querystring = "["+querystring+"]:"+tag;
467}
Note: See TracBrowser for help on using the repository browser.