source: trunk/gsdl/src/recpt/queryaction.cpp@ 13017

Last change on this file since 13017 was 12866, checked in by kjdon, 18 years ago

added af (accent folding) arg, and ks, ss and afs which state whether casefolding, stemming and accentfolding are supported for the current collection

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.4 KB
Line 
1/**********************************************************************
2 *
3 * queryaction.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryaction.h"
27#include "querytools.h"
28#include "formattools.h"
29#include "cgiutils.h"
30#include "OIDtools.h"
31//#include "infodbclass.h"
32#include "fileutil.h"
33#include "text_t.h"
34#include "historydb.h"
35#include "htmlutils.h" // for html_safe in do_action
36#include "gsdltools.h"
37#include "phrases.h" // for get_phrases
38#include <stdlib.h> // for strtol
39#include <assert.h>
40
41void colinfo_t::clear () {
42 formatlistptr = NULL;
43 browserptr = NULL;
44}
45
46void QueryResult_t::clear() {
47 doc.clear();
48 collection.clear();
49}
50
51queryaction::queryaction () {
52
53 recpt = NULL;
54 num_phrases = 0;
55
56 // this action uses cgi variable "a"
57 cgiarginfo arg_ainfo;
58 arg_ainfo.shortname = "a";
59 arg_ainfo.longname = "action";
60 arg_ainfo.multiplechar = true;
61 arg_ainfo.defaultstatus = cgiarginfo::weak;
62 arg_ainfo.argdefault = "q";
63 arg_ainfo.savedarginfo = cgiarginfo::must;
64 argsinfo.addarginfo (NULL, arg_ainfo);
65
66 // "ct" - 0 = mg, 1 = mgpp, 2=lucene
67 arg_ainfo.shortname = "ct";
68 arg_ainfo.longname = "collection type";
69 arg_ainfo.multiplechar = true; // can be empty or single char
70 arg_ainfo.defaultstatus = cgiarginfo::weak;
71 arg_ainfo.argdefault = g_EmptyText;
72 arg_ainfo.savedarginfo = cgiarginfo::must;
73 argsinfo.addarginfo (NULL, arg_ainfo);
74
75 // "b" - 0 = simple, 1 = advanced
76 arg_ainfo.shortname = "b";
77 arg_ainfo.longname = "query mode";
78 arg_ainfo.multiplechar = false;
79 arg_ainfo.defaultstatus = cgiarginfo::weak;
80 arg_ainfo.argdefault = "0";
81 arg_ainfo.savedarginfo = cgiarginfo::must;
82 argsinfo.addarginfo (NULL, arg_ainfo);
83
84 // "h"
85 arg_ainfo.shortname = "h";
86 arg_ainfo.longname = "main index";
87 arg_ainfo.multiplechar = true;
88 arg_ainfo.defaultstatus = cgiarginfo::weak;
89 arg_ainfo.argdefault = g_EmptyText;
90 arg_ainfo.savedarginfo = cgiarginfo::must;
91 argsinfo.addarginfo (NULL, arg_ainfo);
92
93 // "h2"
94 arg_ainfo.shortname = "h2";
95 arg_ainfo.longname = "main index for second query";
96 arg_ainfo.multiplechar = true;
97 arg_ainfo.defaultstatus = cgiarginfo::weak;
98 arg_ainfo.argdefault = g_EmptyText;
99 arg_ainfo.savedarginfo = cgiarginfo::must;
100 argsinfo.addarginfo (NULL, arg_ainfo);
101
102 // "j"
103 arg_ainfo.shortname = "j";
104 arg_ainfo.longname = "sub collection index";
105 arg_ainfo.multiplechar = true;
106 arg_ainfo.defaultstatus = cgiarginfo::weak;
107 arg_ainfo.argdefault = g_EmptyText;
108 arg_ainfo.savedarginfo = cgiarginfo::must;
109 argsinfo.addarginfo (NULL, arg_ainfo);
110
111 // "j2"
112 arg_ainfo.shortname = "j2";
113 arg_ainfo.longname = "sub collection index for second query";
114 arg_ainfo.multiplechar = true;
115 arg_ainfo.defaultstatus = cgiarginfo::weak;
116 arg_ainfo.argdefault = g_EmptyText;
117 arg_ainfo.savedarginfo = cgiarginfo::must;
118 argsinfo.addarginfo (NULL, arg_ainfo);
119
120 // "n"
121 arg_ainfo.shortname = "n";
122 arg_ainfo.longname = "language index";
123 arg_ainfo.multiplechar = true;
124 arg_ainfo.defaultstatus = cgiarginfo::weak;
125 arg_ainfo.argdefault = g_EmptyText;
126 arg_ainfo.savedarginfo = cgiarginfo::must;
127 argsinfo.addarginfo (NULL, arg_ainfo);
128
129 // "n2"
130 arg_ainfo.shortname = "n2";
131 arg_ainfo.longname = "language index for second query";
132 arg_ainfo.multiplechar = true;
133 arg_ainfo.defaultstatus = cgiarginfo::weak;
134 arg_ainfo.argdefault = g_EmptyText;
135 arg_ainfo.savedarginfo = cgiarginfo::must;
136 argsinfo.addarginfo (NULL, arg_ainfo);
137
138
139 // "q"
140 arg_ainfo.shortname = "q";
141 arg_ainfo.longname = "query string";
142 arg_ainfo.multiplechar = true;
143 arg_ainfo.defaultstatus = cgiarginfo::weak;
144 arg_ainfo.argdefault = g_EmptyText;
145 arg_ainfo.savedarginfo = cgiarginfo::must;
146 argsinfo.addarginfo (NULL, arg_ainfo);
147
148 // "q2"
149 arg_ainfo.shortname = "q2";
150 arg_ainfo.longname = "query string for second query";
151 arg_ainfo.multiplechar = true;
152 arg_ainfo.defaultstatus = cgiarginfo::weak;
153 arg_ainfo.argdefault = g_EmptyText;
154 arg_ainfo.savedarginfo = cgiarginfo::must;
155 argsinfo.addarginfo (NULL, arg_ainfo);
156
157 // "cq2" ""=don't combine, "and", "or", "not"
158 arg_ainfo.shortname = "cq2";
159 arg_ainfo.longname = "combine queries";
160 arg_ainfo.multiplechar = true;
161 arg_ainfo.defaultstatus = cgiarginfo::weak;
162 arg_ainfo.argdefault = g_EmptyText;
163 arg_ainfo.savedarginfo = cgiarginfo::must;
164 argsinfo.addarginfo (NULL, arg_ainfo);
165
166 // "t" - 1 = ranked 0 = boolean
167 arg_ainfo.shortname = "t";
168 arg_ainfo.longname = "search type";
169 arg_ainfo.multiplechar = false;
170 arg_ainfo.defaultstatus = cgiarginfo::weak;
171 arg_ainfo.argdefault = "1";
172 arg_ainfo.savedarginfo = cgiarginfo::must;
173 argsinfo.addarginfo (NULL, arg_ainfo);
174
175 // "k"
176 arg_ainfo.shortname = "k";
177 arg_ainfo.longname = "casefolding";
178 arg_ainfo.multiplechar = false;
179 arg_ainfo.defaultstatus = cgiarginfo::weak;
180 arg_ainfo.argdefault = "1";
181 arg_ainfo.savedarginfo = cgiarginfo::must;
182 argsinfo.addarginfo (NULL, arg_ainfo);
183
184 // "ks"
185 arg_ainfo.shortname = "ks";
186 arg_ainfo.longname = "casefolding support";
187 arg_ainfo.multiplechar = false;
188 arg_ainfo.defaultstatus = cgiarginfo::weak;
189 arg_ainfo.argdefault = "0";
190 arg_ainfo.savedarginfo = cgiarginfo::must;
191 argsinfo.addarginfo (NULL, arg_ainfo);
192
193 // "s"
194 arg_ainfo.shortname = "s";
195 arg_ainfo.longname = "stemming";
196 arg_ainfo.multiplechar = false;
197 arg_ainfo.defaultstatus = cgiarginfo::weak;
198 arg_ainfo.argdefault = "0";
199 arg_ainfo.savedarginfo = cgiarginfo::must;
200 argsinfo.addarginfo (NULL, arg_ainfo);
201
202 // "ss"
203 arg_ainfo.shortname = "ss";
204 arg_ainfo.longname = "stemming support";
205 arg_ainfo.multiplechar = false;
206 arg_ainfo.defaultstatus = cgiarginfo::weak;
207 arg_ainfo.argdefault = "0";
208 arg_ainfo.savedarginfo = cgiarginfo::must;
209 argsinfo.addarginfo (NULL, arg_ainfo);
210
211 // "af"
212 arg_ainfo.shortname = "af";
213 arg_ainfo.longname = "accentfolding";
214 arg_ainfo.multiplechar = false;
215 arg_ainfo.defaultstatus = cgiarginfo::weak;
216 arg_ainfo.argdefault = "0";
217 arg_ainfo.savedarginfo = cgiarginfo::must;
218 argsinfo.addarginfo (NULL, arg_ainfo);
219
220 // "afs"
221 arg_ainfo.shortname = "afs";
222 arg_ainfo.longname = "accentfolding support";
223 arg_ainfo.multiplechar = false;
224 arg_ainfo.defaultstatus = cgiarginfo::weak;
225 arg_ainfo.argdefault = "0";
226 arg_ainfo.savedarginfo = cgiarginfo::must;
227 argsinfo.addarginfo (NULL, arg_ainfo);
228
229 // "m"
230 arg_ainfo.shortname = "m";
231 arg_ainfo.longname = "maximum number of documents";
232 arg_ainfo.multiplechar = true;
233 arg_ainfo.defaultstatus = cgiarginfo::weak;
234 arg_ainfo.argdefault = "50";
235 arg_ainfo.savedarginfo = cgiarginfo::must;
236 argsinfo.addarginfo (NULL, arg_ainfo);
237
238 // "o"
239 arg_ainfo.shortname = "o";
240 arg_ainfo.longname = "hits per page";
241 arg_ainfo.multiplechar = true;
242 arg_ainfo.defaultstatus = cgiarginfo::weak;
243 arg_ainfo.argdefault = "20";
244 arg_ainfo.savedarginfo = cgiarginfo::must;
245 argsinfo.addarginfo (NULL, arg_ainfo);
246
247 // "r"
248 arg_ainfo.shortname = "r";
249 arg_ainfo.longname = "start results from";
250 arg_ainfo.multiplechar = true;
251 arg_ainfo.defaultstatus = cgiarginfo::weak;
252 arg_ainfo.argdefault = "1";
253 arg_ainfo.savedarginfo = cgiarginfo::must;
254 argsinfo.addarginfo (NULL, arg_ainfo);
255
256 // "ccs"
257 arg_ainfo.shortname = "ccs";
258 arg_ainfo.longname = "cross collection searching";
259 arg_ainfo.multiplechar = false;
260 arg_ainfo.defaultstatus = cgiarginfo::weak;
261 arg_ainfo.argdefault = "0";
262 arg_ainfo.savedarginfo = cgiarginfo::must;
263 argsinfo.addarginfo (NULL, arg_ainfo);
264
265 // "ccp"
266 arg_ainfo.shortname = "ccp";
267 arg_ainfo.longname = "cross collection page";
268 arg_ainfo.multiplechar = false;
269 arg_ainfo.defaultstatus = cgiarginfo::weak;
270 arg_ainfo.argdefault = "0";
271 arg_ainfo.savedarginfo = cgiarginfo::must;
272 argsinfo.addarginfo (NULL, arg_ainfo);
273
274 // "cc"
275 arg_ainfo.shortname = "cc";
276 arg_ainfo.longname = "collections to search";
277 arg_ainfo.multiplechar = true;
278 arg_ainfo.multiplevalue = true;
279 arg_ainfo.defaultstatus = cgiarginfo::weak;
280 arg_ainfo.argdefault = g_EmptyText;
281 arg_ainfo.savedarginfo = cgiarginfo::must;
282 argsinfo.addarginfo (NULL, arg_ainfo);
283
284 // "hd" history display - search history only displayed when
285 // this var set to something other than 0
286 // this number of records is displayed
287 arg_ainfo.shortname = "hd";
288 arg_ainfo.longname = "history display";
289 arg_ainfo.multiplechar = true;
290 arg_ainfo.multiplevalue = false;
291 arg_ainfo.defaultstatus = cgiarginfo::weak;
292 arg_ainfo.argdefault = "0";
293 arg_ainfo.savedarginfo = cgiarginfo::must;
294 argsinfo.addarginfo (NULL, arg_ainfo);
295
296 // "hs" save - set to 1 in query form, so only save when submit
297 // query
298 // 0 = no save 1 = save
299 arg_ainfo.shortname = "hs";
300 arg_ainfo.longname = "history save";
301 arg_ainfo.multiplechar = false;
302 arg_ainfo.defaultstatus = cgiarginfo::weak;
303 arg_ainfo.argdefault = "0";
304 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
305 argsinfo.addarginfo (NULL, arg_ainfo);
306
307 // "g" - new arg for granularity, for mgpp collections
308 arg_ainfo.shortname = "g";
309 arg_ainfo.longname = "granularity";
310 arg_ainfo.multiplechar = true;
311 arg_ainfo.defaultstatus = cgiarginfo::weak;
312 arg_ainfo.argdefault = g_EmptyText;
313 arg_ainfo.savedarginfo = cgiarginfo::must;
314 argsinfo.addarginfo (NULL, arg_ainfo);
315
316 // "ds" - start date
317 arg_ainfo.shortname = "ds";
318 arg_ainfo.longname = "start date";
319 arg_ainfo.multiplechar = true;
320 arg_ainfo.defaultstatus = cgiarginfo::weak;
321 arg_ainfo.argdefault = g_EmptyText;
322 arg_ainfo.savedarginfo = cgiarginfo::must;
323 argsinfo.addarginfo (NULL, arg_ainfo);
324
325 // "de" - end date
326 arg_ainfo.shortname = "de";
327 arg_ainfo.longname = "end date";
328 arg_ainfo.multiplechar = true;
329 arg_ainfo.defaultstatus = cgiarginfo::weak;
330 arg_ainfo.argdefault = g_EmptyText;
331 arg_ainfo.savedarginfo = cgiarginfo::must;
332 argsinfo.addarginfo (NULL, arg_ainfo);
333
334 // "dsbc" - whether or not start date is prechristian
335 arg_ainfo.shortname = "dsbc";
336 arg_ainfo.longname = "start date bc";
337 arg_ainfo.multiplechar = false;
338 arg_ainfo.defaultstatus = cgiarginfo::weak;
339 arg_ainfo.argdefault = "0";
340 arg_ainfo.savedarginfo = cgiarginfo::must;
341 argsinfo.addarginfo (NULL, arg_ainfo);
342
343 // "debc" - whether or not end date is prechristian
344 arg_ainfo.shortname = "debc";
345 arg_ainfo.longname = "end date bc";
346 arg_ainfo.multiplechar = false;
347 arg_ainfo.defaultstatus = cgiarginfo::weak;
348 arg_ainfo.argdefault = "0";
349 arg_ainfo.savedarginfo = cgiarginfo::must;
350 argsinfo.addarginfo (NULL, arg_ainfo);
351
352 // "qt" - 0 = text, 1 = form
353 arg_ainfo.shortname = "qt";
354 arg_ainfo.longname = "query type";
355 arg_ainfo.multiplechar = true; // can be empty or single char
356 arg_ainfo.defaultstatus = cgiarginfo::weak;
357 arg_ainfo.argdefault = g_EmptyText;
358 arg_ainfo.savedarginfo = cgiarginfo::must;
359 argsinfo.addarginfo (NULL, arg_ainfo);
360
361 // "qto" - 1 = text only, 2 = form only, 3 = text and form
362 arg_ainfo.shortname = "qto";
363 arg_ainfo.longname = "query type options";
364 arg_ainfo.multiplechar = true; // can be empty or single char
365 arg_ainfo.defaultstatus = cgiarginfo::weak;
366 arg_ainfo.argdefault = g_EmptyText;
367 arg_ainfo.savedarginfo = cgiarginfo::must;
368 argsinfo.addarginfo (NULL, arg_ainfo);
369
370 // "qb" - 0 = regular, 1 = large
371 arg_ainfo.shortname = "qb";
372 arg_ainfo.longname = "query box type";
373 arg_ainfo.multiplechar = false;
374 arg_ainfo.defaultstatus = cgiarginfo::weak;
375 arg_ainfo.argdefault = "0";
376 arg_ainfo.savedarginfo = cgiarginfo::must;
377 argsinfo.addarginfo (NULL, arg_ainfo);
378
379 // "fqn" - number of fields in the query form
380 arg_ainfo.shortname = "fqn";
381 arg_ainfo.longname = "form query num fields";
382 arg_ainfo.multiplechar = true;
383 arg_ainfo.defaultstatus = cgiarginfo::weak;
384 arg_ainfo.argdefault = "4";
385 arg_ainfo.savedarginfo = cgiarginfo::must;
386 argsinfo.addarginfo (NULL, arg_ainfo);
387
388 // "fqf" - the list of field names in the form query
389 // - a comma separated list
390 arg_ainfo.shortname = "fqf";
391 arg_ainfo.longname = "form query fields";
392 arg_ainfo.multiplechar = true;
393 arg_ainfo.defaultstatus = cgiarginfo::weak;
394 arg_ainfo.argdefault = g_EmptyText;
395 arg_ainfo.savedarginfo = cgiarginfo::must;
396 argsinfo.addarginfo (NULL, arg_ainfo);
397
398 // "fqv" - the list of values in the form query
399 // - a comma separated list
400 arg_ainfo.shortname = "fqv";
401 arg_ainfo.longname = "form query values";
402 arg_ainfo.multiplechar = true;
403 arg_ainfo.defaultstatus = cgiarginfo::weak;
404 arg_ainfo.argdefault = g_EmptyText;
405 arg_ainfo.savedarginfo = cgiarginfo::must;
406 argsinfo.addarginfo (NULL, arg_ainfo);
407
408
409 // "fqs" - the list of stemming options in the form query
410 // - a comma separated list
411 arg_ainfo.shortname = "fqs";
412 arg_ainfo.longname = "form query stems";
413 arg_ainfo.multiplechar = true;
414 arg_ainfo.defaultstatus = cgiarginfo::weak;
415 arg_ainfo.argdefault = g_EmptyText;
416 arg_ainfo.savedarginfo = cgiarginfo::must;
417 argsinfo.addarginfo (NULL, arg_ainfo);
418
419
420 // "fqk" - the list of casefolding options in the form query
421 // - a comma separated list
422 arg_ainfo.shortname = "fqk";
423 arg_ainfo.longname = "form query casefolds";
424 arg_ainfo.multiplechar = true;
425 arg_ainfo.defaultstatus = cgiarginfo::weak;
426 arg_ainfo.argdefault = g_EmptyText;
427 arg_ainfo.savedarginfo = cgiarginfo::must;
428 argsinfo.addarginfo (NULL, arg_ainfo);
429
430 // "fqc" - the list of boolean operators in the form query
431 // - a comma separated list
432 arg_ainfo.shortname = "fqc";
433 arg_ainfo.longname = "form query combines";
434 arg_ainfo.multiplechar = true;
435 arg_ainfo.defaultstatus = cgiarginfo::weak;
436 arg_ainfo.argdefault = g_EmptyText;
437 arg_ainfo.savedarginfo = cgiarginfo::must;
438 argsinfo.addarginfo (NULL, arg_ainfo);
439
440 // "fqa" - form query advanced - for "run query"
441 arg_ainfo.shortname = "fqa";
442 arg_ainfo.longname = "form query advanced query";
443 arg_ainfo.multiplechar = false;
444 arg_ainfo.defaultstatus = cgiarginfo::weak;
445 arg_ainfo.argdefault = "0";
446 arg_ainfo.savedarginfo = cgiarginfo::must;
447 argsinfo.addarginfo (NULL, arg_ainfo);
448
449 // "ifl" - I'm feeling lucky! (Go directly to the first matching document)
450 arg_ainfo.shortname = "ifl";
451 arg_ainfo.longname = "i'm feeling lucky";
452 arg_ainfo.multiplechar = false;
453 arg_ainfo.defaultstatus = cgiarginfo::weak;
454 arg_ainfo.argdefault = g_EmptyText;
455 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
456 argsinfo.addarginfo (NULL, arg_ainfo);
457
458 // "sf" - Sort field. Set to field to be used for sorting search reult
459 // set (only implemented for lucene collections at present).
460 arg_ainfo.shortname = "sf";
461 arg_ainfo.longname = "sort field";
462 arg_ainfo.multiplechar = true;
463 arg_ainfo.defaultstatus = cgiarginfo::weak;
464 arg_ainfo.argdefault = g_EmptyText;
465 arg_ainfo.savedarginfo = cgiarginfo::must;
466 argsinfo.addarginfo (NULL, arg_ainfo);
467
468 // "fuzziness" controls how closely the search terms must match
469 // 100 = exact match, 0 = very inexact match (only implemented for Lucene)
470 arg_ainfo.shortname = "fuzziness";
471 arg_ainfo.longname = "Lucene fuzziness value";
472 arg_ainfo.multiplechar = true;
473 arg_ainfo.defaultstatus = cgiarginfo::weak;
474 arg_ainfo.argdefault = g_EmptyText;
475 arg_ainfo.savedarginfo = cgiarginfo::must;
476 argsinfo.addarginfo (NULL, arg_ainfo);
477}
478
479void queryaction::configure (const text_t &key, const text_tarray &cfgline) {
480 action::configure (key, cfgline);
481}
482
483bool queryaction::init (ostream &logout) {
484 return action::init (logout);
485}
486
487bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
488 recptprotolistclass * /*protos*/, ostream &logout) {
489
490 // check t argument
491 int arg_t = args.getintarg("t");
492 if (arg_t != 0 && arg_t != 1) {
493 logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n";
494 cgiarginfo *tinfo = argsinfo.getarginfo ("t");
495 if (tinfo != NULL) args["t"] = tinfo->argdefault;
496 }
497
498 // check k argument
499 int arg_k = args.getintarg("k");
500 if (arg_k != 0 && arg_k != 1) {
501 logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n";
502 cgiarginfo *kinfo = argsinfo.getarginfo ("k");
503 if (kinfo != NULL) args["k"] = kinfo->argdefault;
504 }
505
506 // check s argument
507 int arg_s = args.getintarg("s");
508 if (arg_s != 0 && arg_s != 1) {
509 logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n";
510 cgiarginfo *sinfo = argsinfo.getarginfo ("s");
511 if (sinfo != NULL) args["s"] = sinfo->argdefault;
512 }
513
514 // check m argument
515 int arg_m = args.getintarg("m");
516 if (arg_m < -1) {
517 logout << "Warning: \"m\" argument less than -1 (" << arg_m << ")\n";
518 cgiarginfo *minfo = argsinfo.getarginfo ("m");
519 if (minfo != NULL) args["m"] = minfo->argdefault;
520 }
521
522 // check o argument
523 int arg_o = args.getintarg("o");
524 if (arg_o < -1) {
525 logout << "Warning: \"o\" argument less than -1 (" << arg_o << ")\n";
526 cgiarginfo *oinfo = argsinfo.getarginfo ("o");
527 if (oinfo != NULL) args["o"] = oinfo->argdefault;
528 }
529
530 // check r argument
531 int arg_r = args.getintarg("r");
532 if (arg_r < 1) {
533 logout << "Warning: \"r\" argument less than 1 (" << arg_r << ")\n";
534 cgiarginfo *rinfo = argsinfo.getarginfo ("r");
535 if (rinfo != NULL) args["r"] = rinfo->argdefault;
536 }
537 //check hd argument
538 int arg_hd = args.getintarg("hd");
539 if (arg_hd <0 ) {
540 logout << "Warning: \"hd\" argument less than 0 (" << arg_hd << ")\n";
541 cgiarginfo *hdinfo = argsinfo.getarginfo ("hd");
542 if (hdinfo != NULL) args["hd"] = hdinfo->argdefault;
543 }
544
545 //check hs argument
546 int arg_hs = args.getintarg("hs");
547 if (arg_hs !=0 && arg_hs !=1) {
548 logout << "Warning: \"hs\" argument out of range (" << arg_hs << ")\n";
549 cgiarginfo *hsinfo = argsinfo.getarginfo ("hs");
550 if (hsinfo != NULL) args["hs"] = hsinfo->argdefault;
551 }
552
553 // check ct argument
554 int arg_ct = args.getintarg("ct");
555 if (arg_ct < 0 || arg_ct > 2) {
556 logout << "Warning: \"ct\" argument out of range (" << arg_ct << ")\n";
557 cgiarginfo *ctinfo = argsinfo.getarginfo ("ct");
558 if (ctinfo != NULL) args["ct"] = ctinfo->argdefault;
559 }
560
561 // check qt argument
562 int arg_qt = args.getintarg("qt");
563 if (arg_qt !=0 && arg_qt !=1) {
564 logout << "Warning: \"qt\" argument out of range (" << arg_qt << ")\n";
565 cgiarginfo *qtinfo = argsinfo.getarginfo ("qt");
566 if (qtinfo != NULL) args["qt"] = qtinfo->argdefault;
567 }
568
569 // check qb argument
570 int arg_qb = args.getintarg("qb");
571 if (arg_qb !=0 && arg_qb !=1) {
572 logout << "Warning: \"qb\" argument out of range (" << arg_qb << ")\n";
573 cgiarginfo *qbinfo = argsinfo.getarginfo ("qb");
574 if (qbinfo != NULL) args["qb"] = qbinfo->argdefault;
575 }
576
577 // check fqa argument
578 int arg_fqa = args.getintarg("fqa");
579 if (arg_fqa !=0 && arg_fqa !=1) {
580 logout << "Warning: \"fqa\" argument out of range (" << arg_fqa << ")\n";
581 cgiarginfo *fqainfo = argsinfo.getarginfo ("fqa");
582 if (fqainfo != NULL) args["fqa"] = fqainfo->argdefault;
583 }
584
585 // check fqn argument
586 int arg_fqn = args.getintarg("fqn");
587 if (arg_fqn < -1) {
588 logout << "Warning: \"fqn\" argument less than -1 (" << arg_fqn << ")\n";
589 cgiarginfo *fqninfo = argsinfo.getarginfo ("fqn");
590 if (fqninfo != NULL) args["fqn"] = fqninfo->argdefault;
591 }
592
593 return true;
594}
595
596void queryaction::get_cgihead_info (cgiargsclass &args, recptprotolistclass * /*protos*/,
597 response_t &response, text_t &response_data,
598 ostream &/*logout*/) {
599 // If this is an "I'm feeling lucky" request, we don't know the target location until later
600 if (!args["ifl"].empty()) {
601 response = undecided_location;
602 return;
603 }
604
605 response = content;
606 response_data = "text/html";
607}
608
609void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args,
610 recptprotolistclass * protos,
611 ostream &logout) {
612
613 // define_internal_macros sets the following macros:
614
615 // The following macros are set later (in define_query_macros) as they can't be set until
616 // the query has been done.
617 // _quotedquery_ the part of the query string that was quoted for post-processing
618 // _freqmsg_ the term frequency string
619
620 // _resultline_ the "x documents matched the query" string
621
622 // _prevfirst_ these are used when setting up the links to previous/next
623 // _prevlast_ pages of results (_thisfirst_ and _thislast_ are used to set
624 // _nextfirst_ the 'results x-x for query: xxxx' string in the title bar)
625 // _nextlast_
626 // _thisfirst_
627 // _thislast_
628
629
630 define_form_macros(disp, args, protos, logout);
631
632 define_query_interface(disp, args, protos, logout);
633
634
635}
636
637void queryaction::define_query_interface(displayclass &disp,
638 cgiargsclass &args,
639 recptprotolistclass * protos,
640 ostream &logout){
641 text_t collection = args["c"];
642
643 //check that the protocol is alive
644 recptproto* colproto = protos->getrecptproto (collection, logout);
645 if(colproto == NULL) {
646 logout << "ERROR: Null collection protocol trying to query"
647 << collection.getcstr() << "\n";
648 return;
649 }
650
651 //check the collection is responding/in place
652 ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(colproto, collection,
653 logout);
654 if(colinfo == NULL){
655 logout << "ERROR: Null returned for get_collectinfo_ptr on "
656 << collection.getcstr() << "in queryaction::define_query_interface\n";
657 return;
658 }
659
660 text_tmap::iterator check = colinfo->format.find("QueryInterface");
661 if(check != colinfo->format.end()){
662 if((*check).second=="DateSearch"){
663 text_t current = "_datesearch_";
664 disp.setmacro("optdatesearch","query",current);
665 }
666 }
667}
668
669
670// sets the selection box macros _hselection_, _jselection_, _nselection_ _gselection_, fqfselection_
671void queryaction::set_option_macro (const text_t &macroname,
672 text_t current_value,
673 bool display_single,
674 bool add_js_update,
675 const FilterOption_t &option,
676 displayclass &disp) {
677
678 if (option.validValues.empty()) return;
679 if (option.validValues.size() == 1) {
680 if (display_single) {
681 disp.setmacro (macroname + "selection", displayclass::defaultpackage, "_" + option.defaultValue + "_");
682 }
683 return;
684 }
685 if (option.validValues.size() < 2) return;
686
687 text_t macrovalue = "<select name=\"" + macroname + "\"";
688 if (add_js_update) {
689 macrovalue += " onChange=\"update"+macroname+"();\"";
690 }
691 macrovalue += ">\n";
692
693 if (current_value.empty()) current_value = option.defaultValue;
694
695 text_tarray::const_iterator thisvalue = option.validValues.begin();
696 text_tarray::const_iterator endvalue = option.validValues.end();
697
698 while (thisvalue != endvalue) {
699 macrovalue += "<option value=\"" + *thisvalue + "\"";
700 if (*thisvalue == current_value)
701 macrovalue += " selected";
702 macrovalue += ">_" + *thisvalue + "_\n";
703 ++thisvalue;
704 }
705 macrovalue += "</select>\n";
706 disp.setmacro (macroname + "selection", displayclass::defaultpackage, macrovalue);
707}
708
709
710void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args,
711 recptprotolistclass *protos, ostream &logout) {
712
713 // define_external_macros sets the following macros:
714
715 // some or all of these may not be required to be set
716 // _hselection_, _h2selection_ the selection box for the main part of the index
717 // _jselection_, _j2selection_ the selection box for the subcollection part of the index
718 // _nselection_, _n2selection_ the selection box for the language part of the index
719 // _cq2selection the selection box for combining two queries
720
721 // _gselection_, the selection box forlevels (mgpp)
722 // _fqfselection_, the selection box for index/fields (mgpp)
723 // can't do anything if collectproto is null (i.e. no collection was specified)
724 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
725 if (collectproto == NULL) return;
726
727 ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(collectproto,
728 args["c"],
729 logout);
730 set_query_type_args(colinfo, args);
731 set_stem_index_args(colinfo, args);
732
733 comerror_t err;
734 InfoFilterOptionsResponse_t response;
735 InfoFilterOptionsRequest_t request;
736 request.filterName = "QueryFilter";
737
738 collectproto->get_filteroptions (args["c"], request, response, err, logout);
739 if (err == noError) {
740
741 FilterOption_tmap::const_iterator it;
742 FilterOption_tmap::const_iterator end = response.filterOptions.end();
743
744 // _hselection_ and _h2selection_ (Index)
745 it = response.filterOptions.find ("Index");
746 if (it != end) set_option_macro ("h", args["h"], true, false, (*it).second, disp);
747 if (it != end) set_option_macro ("h2", args["h2"], true,false, (*it).second, disp);
748
749 // _jselection_ and _j2selection_ (Subcollection)
750 it = response.filterOptions.find ("Subcollection");
751 if (it != end) set_option_macro ("j", args["j"], true,false, (*it).second, disp);
752 if (it != end) set_option_macro ("j2", args["j2"], true,false, (*it).second, disp);
753
754 // _nselection_ and _n2selection_ (Language)
755 it = response.filterOptions.find ("Language");
756 if (it != end) set_option_macro ("n", args["n"], true,false, (*it).second, disp);
757 if (it != end) set_option_macro ("n2", args["n2"], true,false, (*it).second, disp);
758
759 // _cq2selection_ (CombineQuery)
760 it = response.filterOptions.find ("CombineQuery");
761 if (it != end) set_option_macro ("cq2", args["cq2"], true,false, (*it).second, disp);
762
763 if ((args["ct"] == "1") || (args["ct"] == "2")) { // mgpp/lucene collections
764 // _gselection_ (Level)
765 it = response.filterOptions.find("Level");
766 if (it!=end) {
767 set_option_macro("g", args["g"], false, false, (*it).second, disp);
768 if (args["qt"]=="1") { // form search
769 set_gformselection_macro(args["g"], (*it).second, disp);
770 }
771 }
772 // _fqfselection_ field list
773 it = response.filterOptions.find("IndexField");
774 if (it!=end) {
775 bool form_search = false;
776 if (args["qto"]=="2" || args["qt"]=="1") {
777 form_search = true;
778 }
779 set_option_macro ("fqf", args["fqf"], true, form_search, (*it).second, disp);
780 if (args["ct"] == "2") {// lucene
781 // set the sort field macro
782 set_sfselection_macro(args["sf"], (*it).second, disp);
783 }
784 }
785 }
786 }
787} // define external macros
788
789void queryaction::set_sfselection_macro(text_t current_value,
790 const FilterOption_t &option,
791 displayclass &disp) {
792
793 // we need two or more options to continue
794 if (option.validValues.size() <= 2) {
795 return;
796 }
797
798 text_t macrovalue = "<select name=\"sf\">\n";
799
800 if (current_value.empty()) current_value = "";
801
802 // we give a rank option first
803 macrovalue += "<option value=\"\"";
804 if (current_value == "") {
805 macrovalue += " selected";
806 }
807 macrovalue += ">_query:textsortbyrank_\n";
808
809 text_tarray::const_iterator thisvalue = option.validValues.begin();
810 text_tarray::const_iterator endvalue = option.validValues.end();
811 int valid_count = 0;
812 while (thisvalue != endvalue) {
813 if (*thisvalue != "ZZ" && *thisvalue != "TX") {
814 ++valid_count;
815 macrovalue += "<option value=\"by" + *thisvalue + "\"";
816 if (current_value == "by"+*thisvalue)
817 macrovalue += " selected";
818 macrovalue += ">_" + *thisvalue + "_\n";
819 }
820 ++thisvalue;
821 }
822 macrovalue += "</select>";
823 if (valid_count > 0) {
824 disp.setmacro ("sfselection", displayclass::defaultpackage, macrovalue);
825 }
826
827}
828
829// sets the selection box macro _gformselection_.
830// the default for _gformselection_ is _gselection_
831void queryaction::set_gformselection_macro (text_t current_value,
832 const FilterOption_t &option,
833 displayclass &disp) {
834
835 if (option.validValues.size() <= 1) {
836 return;
837 }
838 // we need to check to see if there is paragraph present
839 text_tarray::const_iterator thisvalue = option.validValues.begin();
840 text_tarray::const_iterator endvalue = option.validValues.end();
841
842 bool has_paras = false;
843 while (thisvalue != endvalue) {
844 if (*thisvalue == "Para") {
845 has_paras = true;
846 break;
847 }
848 ++thisvalue;
849 }
850 if (!has_paras) return; // there is no difference between the form selection and the normal one
851
852 if (option.validValues.size() == 2) {
853 // we will only have one value, but we will still put it in as a text string
854 int opt = 0;
855 if (option.validValues[0] == "Para") {
856 opt = 1;
857 }
858 disp.setmacro ("gformselection", displayclass::defaultpackage, "_"+option.validValues[opt]+"_");
859 return;
860 }
861
862 // there will be a select box
863 text_t macrovalue = "<select name=\"g\">\n";
864
865 if (current_value.empty()) current_value = option.defaultValue;
866
867 thisvalue = option.validValues.begin();
868
869 while (thisvalue != endvalue) {
870 if (*thisvalue != "Para") {
871 macrovalue += "<option value=\"" + *thisvalue + "\"";
872 if (*thisvalue == current_value)
873 macrovalue += " selected";
874 macrovalue += ">_" + *thisvalue + "_\n";
875 }
876 ++thisvalue;
877 }
878 macrovalue += "</select>\n";
879 disp.setmacro ("gformselection", displayclass::defaultpackage, macrovalue);
880}
881void queryaction::define_form_macros (displayclass &disp, cgiargsclass &args,
882 recptprotolistclass *protos, ostream &logout) {
883
884 // defines the following macros
885 // _regformlist_
886 // _advformlist_
887
888 if (args["ct"]=="0" || args["qto"]=="1" || (args["qto"]=="3" && args["qt"] == "0") ) // mg, or mgpp/lucene with plain only, or mgpp with both, but set to plain
889 return; // dont need these macros
890
891 text_t form = "";
892 int argfqn = args.getintarg("fqn");
893
894 if (args["b"] == "1") { // advanced form
895 form += "_firstadvformelement_\n";
896 for (int i=1; i<argfqn; ++i) {
897 form += "_advformelement_\n";
898 }
899 disp.setmacro("advformlist", "query", form);
900 }
901 else { // simple form
902 for (int i=0; i<argfqn; ++i) {
903 form += "_regformelement_\n";
904 }
905 disp.setmacro("regformlist", "query", form);
906 }
907
908}
909
910void queryaction::define_history_macros (displayclass &disp, cgiargsclass &args,
911 recptprotolistclass *protos, ostream &logout) {
912
913 // defines the following macros
914 // _searchhistorylist_
915
916 text_t historylist;
917 int arghd = args.getintarg("hd");
918 if (arghd == 0) {
919 historylist="";
920 }
921 else {
922 historylist = "<!-- Search History List -->\n";
923
924 text_t userid = args["z"];
925 text_tarray entries;
926 if (get_history_info (userid, entries, gdbmhome, logout)) {
927 int count = 1;
928 text_tarray::iterator here = entries.begin();
929 text_tarray::iterator end = entries.end();
930 int numrecords=(int)entries.size();
931 if (numrecords>arghd) { // only display some of them
932 numrecords = arghd;
933 }
934 historylist += "<form action=\"_gwcgi_\" name=\"HistoryForm\"><table width=\"537\">\n";
935
936 for (int i=0; i<numrecords;++i) {
937 text_t query;
938 text_t numdocs;
939 text_t cgiargs;
940 text_t userinfo;
941 text_t escquery;
942 split_saved_query(entries[i],numdocs,cgiargs);
943 parse_saved_args(cgiargs, "q", query); // get query string out
944 decode_cgi_arg(query); // un cgisafe it
945 escquery = escape_quotes(query); // escape the quotes and newlines
946 text_t histvalue = "histvalue";
947 histvalue += i;
948 disp.setmacro(histvalue, "query", escquery);
949 format_user_info(cgiargs, userinfo, args, protos, logout);
950
951 historylist += "<tr><td align=\"right\">_imagehistbutton_(";
952 historylist += i;
953 historylist += ")</td>\n";
954 historylist += "<td><table border=\"1\" cellspacing=\"0\" ";
955 historylist += "cellpadding=\"0\"><tr><td width=\"365\" align=\"left\">"
956 + query
957 + "</td></tr></table></td><td width=\"110\" align=\"center\"><small>"
958 + numdocs;
959 if (numdocs == 1) historylist += " _texthresult_";
960 else historylist += " _texthresults_";
961 if (!userinfo.empty()) {
962 historylist += "<br>( "+userinfo+" )";
963 }
964 historylist += "</small></td>\n";
965 }
966 historylist+="</table></form>\n\n";
967
968 } // if get history info
969 else {
970 historylist += "_textnohistory_";
971 }
972 historylist += "<! ---- end of history list ----->\n";
973 } // else display list
974 disp.setmacro("searchhistorylist", "query", historylist);
975
976} // define history macros
977
978void queryaction::output_ccp (cgiargsclass &args, recptprotolistclass *protos,
979 displayclass &disp, outconvertclass &outconvert,
980 ostream &textout, ostream &logout) {
981
982 ColInfoResponse_t *cinfo = NULL;
983 comerror_t err;
984 InfoFilterOptionsResponse_t fresponse;
985 InfoFilterOptionsRequest_t frequest;
986 frequest.filterName = "QueryFilter";
987
988 text_t &index = args["h"];
989 text_t &subcollection = args["j"];
990 text_t &language = args["n"];
991
992 text_tset collections;
993 text_t arg_cc = args["cc"];
994 decode_cgi_arg (arg_cc);
995 splitchar (arg_cc.begin(), arg_cc.end(), ',', collections);
996
997 textout << outconvert << disp << "_query:header_\n"
998 << "<center>_navigationbar_</center><br>\n"
999 << "<form name=\"QueryForm\" method=\"get\" action=\"_gwcgi_\">\n"
1000 << "<input type=\"hidden\" name=\"a\" value=\"q\">\n"
1001 << "<input type=\"hidden\" name=\"site\" value=\"_cgiargsite_\"\n"
1002 << "<input type=\"hidden\" name=\"e\" value=\"_compressedoptions_\">\n"
1003 << "<input type=\"hidden\" name=\"ccp\" value=\"1\">\n"
1004 << "<center><table width=\"_pagewidth_\"><tr valign=\"top\">\n"
1005 << "<td>Select collections to search for \"" << args["q"]
1006 << "\" <i>(index=" << index << " subcollection=" << subcollection
1007 << " language=" << language << ")</i></td>\n"
1008 << "<td><input type=\"submit\" value=\"_query:textbeginsearch_\"></td>\n"
1009 << "</tr></table></center>\n"
1010 << "<center><table width=\"_pagewidth_\">\n"
1011 << "<tr><td>\n";
1012
1013 recptprotolistclass::iterator rprotolist_here = protos->begin();
1014 recptprotolistclass::iterator rprotolist_end = protos->end();
1015 while (rprotolist_here != rprotolist_end) {
1016 if ((*rprotolist_here).p != NULL) {
1017
1018 text_tarray collist;
1019 (*rprotolist_here).p->get_collection_list (collist, err, logout);
1020 if (err == noError) {
1021 text_tarray::iterator collist_here = collist.begin();
1022 text_tarray::iterator collist_end = collist.end();
1023 while (collist_here != collist_end) {
1024
1025 cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
1026 // if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) {
1027 if (cinfo != NULL && (cinfo->buildDate > 0)) {
1028
1029 (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout);
1030 if (err == noError) {
1031
1032 FilterOption_tmap::const_iterator it;
1033 FilterOption_tmap::const_iterator end = fresponse.filterOptions.end();
1034 if (!index.empty()) {
1035 it = fresponse.filterOptions.find ("Index");
1036 if (it == end) {++collist_here; continue;}
1037 text_tarray::const_iterator there = (*it).second.validValues.begin();
1038 text_tarray::const_iterator tend = (*it).second.validValues.end();
1039 while (there != tend) {
1040 if (*there == index) break;
1041 ++there;
1042 }
1043 if (there == tend) {++collist_here; continue;}
1044 }
1045 if (!subcollection.empty()) {
1046 it = fresponse.filterOptions.find ("Subcollection");
1047 if (it == end) {++collist_here; continue;}
1048 text_tarray::const_iterator there = (*it).second.validValues.begin();
1049 text_tarray::const_iterator tend = (*it).second.validValues.end();
1050 while (there != tend) {
1051 if (*there == subcollection) break;
1052 ++there;
1053 }
1054 if (there == tend) {++collist_here; continue;}
1055 }
1056 if (!language.empty()) {
1057 it = fresponse.filterOptions.find ("Language");
1058 if (it == end) {++collist_here; continue;}
1059 text_tarray::const_iterator there = (*it).second.validValues.begin();
1060 text_tarray::const_iterator tend = (*it).second.validValues.end();
1061 while (there != tend) {
1062 if (*there == language) break;
1063 ++there;
1064 }
1065 if (there == tend) {++collist_here; continue;}
1066 }
1067
1068 // we've got a matching collection
1069 textout << outconvert << "<input type=\"checkbox\"";
1070
1071 text_tset::const_iterator t = collections.find (*collist_here);
1072 if (t != collections.end()) textout << outconvert << " checked";
1073
1074 text_t collectionname = cinfo->get_collectionmeta("collectionname", args["l"]);
1075 if (collectionname.empty()) {
1076 collectionname = *collist_here;
1077 }
1078 textout << outconvert << disp
1079 << " name=\"cc\" value=\"" << *collist_here << "\">"
1080 << collectionname << "<br>\n";
1081
1082
1083 }
1084 }
1085 ++collist_here;
1086 }
1087 }
1088 }
1089 ++rprotolist_here;
1090 }
1091 textout << outconvert << disp
1092 << "</td></tr></table></center>\n"
1093 << "</form>\n"
1094 << "_query:footer_\n";
1095
1096}
1097
1098bool queryaction::do_action (cgiargsclass &args, recptprotolistclass *protos,
1099 browsermapclass *browsers, displayclass &disp,
1100 outconvertclass &outconvert, ostream &textout,
1101 ostream &logout) {
1102
1103 if (recpt == NULL) {
1104 logout << "ERROR (queryaction::do_action): This action does not contain information\n"
1105 << " about any receptionists. The method set_receptionist was probably\n"
1106 << " not called from the module which instantiated this action.\n";
1107 return true;
1108 }
1109
1110
1111
1112 if (args["ccs"] == "1") {
1113 if (!args["cc"].empty()) {
1114 // query the selected collections
1115 text_t::const_iterator b = args["cc"].begin();
1116 text_t::const_iterator e = args["cc"].end();
1117 if (findchar (b, e, ',') != e) {
1118 if (!search_multiple_collections (args, protos, browsers, disp, outconvert,
1119 textout, logout)) return false;
1120 return true;
1121 } else {
1122 if (!search_single_collection (args, args["cc"], protos, browsers, disp,
1123 outconvert, textout, logout)) return false;
1124 return true;
1125 }
1126 }
1127 }
1128
1129 // simply query the current collection
1130 if (!search_single_collection (args, args["c"], protos, browsers, disp,
1131 outconvert, textout, logout)) return false;
1132 return true;
1133}
1134
1135bool queryaction::search_multiple_collections (cgiargsclass &args, recptprotolistclass *protos,
1136 browsermapclass *browsers, displayclass &disp,
1137 outconvertclass &outconvert, ostream &textout,
1138 ostream &logout) {
1139
1140 text_tarray collections;
1141
1142 text_t arg_cc = args["cc"];
1143 decode_cgi_arg (arg_cc);
1144 splitchar (arg_cc.begin(), arg_cc.end(), ',', collections);
1145
1146 if (collections.empty()) {
1147 logout << "queryaction::search_multiple_collections: No collections "
1148 << "set for doing multiple query - will search current collection\n";
1149 textout << outconvert << disp << "_query:textwarningnocollections_\n";
1150 return search_single_collection (args, args["c"], protos, browsers, disp,
1151 outconvert, textout, logout);
1152 }
1153
1154 // queryaction uses "VList" browser to display results,
1155 // a queries clasification is "Search"
1156 text_t browsertype = "VList";
1157 text_t classification = "Search";
1158
1159 QueryResult_tset results;
1160 map<text_t, colinfo_t, lttext_t> colinfomap;
1161
1162 ColInfoResponse_t *cinfo = NULL;
1163 recptproto *collectproto = NULL;
1164 comerror_t err;
1165 FilterRequest_t request;
1166 FilterResponse_t response;
1167 request.filterResultOptions = FROID | FRmetadata | FRtermFreq | FRranking;
1168 text_t freqmsg = "_textfreqmsg1_";
1169 int numdocs = 0;
1170 isapprox isApprox = Exact;
1171
1172 // what to do about segmentation for multiple colls??
1173 bool segment = false;
1174 text_t formattedstring = "";
1175 get_formatted_query_string(formattedstring, segment, args, disp, logout);
1176
1177 if (formattedstring.empty()) {
1178 // dont bother doing a query if no query string
1179 define_history_macros (disp, args, protos, logout);
1180 textout << outconvert << disp << "_query:header_\n"
1181 << "_query:content_";
1182 textout << outconvert << disp << "_query:footer_";
1183
1184 return true;
1185 }
1186 bool syntax_error = false;
1187
1188 set_queryfilter_options (request, formattedstring, args);
1189
1190 // need to retrieve maxdocs matches for each collection
1191 // (will eventually want to tidy this up, do so caching etc.)
1192 OptionValue_t option;
1193 option.name = "StartResults";
1194 option.value = "1";
1195 request.filterOptions.push_back (option);
1196
1197 option.name = "EndResults";
1198 option.value = args["m"];
1199 request.filterOptions.push_back (option);
1200
1201 text_tarray::iterator col_here = collections.begin();
1202 text_tarray::iterator col_end = collections.end();
1203
1204 map<text_t, int, lttext_t> termfreqs;
1205
1206 // just check the main col for formatting info - use individual format statements, or the main one?
1207
1208 browserclass *bptr = browsers->getbrowser (browsertype);
1209
1210 text_t main_col = args["c"];
1211 cinfo = recpt->get_collectinfo_ptr (collectproto, main_col, logout);
1212 if (cinfo == NULL) {
1213 logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL for '"<<main_col<<"'\n";
1214 return false;
1215 }
1216
1217 bool use_main_col_format = false;
1218 if (cinfo->ccsOptions & CCSUniformSearchResultsFormatting) {
1219 use_main_col_format = true;
1220 }
1221
1222 request.fields.erase (request.fields.begin(), request.fields.end());
1223 request.getParents = false;
1224 bptr->load_metadata_defaults (request.fields);
1225
1226 text_t formatstring;
1227 format_t *formatlistptr = new format_t();
1228 if (use_main_col_format) {
1229 // just get one format for main coll and use it for each subcol
1230 if (!get_formatstring (classification, browsertype,
1231 cinfo->format, formatstring)) {
1232 formatstring = bptr->get_default_formatstring();
1233 }
1234
1235 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1236 }
1237
1238 while (col_here != col_end) {
1239
1240 collectproto = protos->getrecptproto (*col_here, logout);
1241 if (collectproto == NULL) {
1242 logout << outconvert << "queryaction::search_multiple_collections: " << *col_here
1243 << " collection has a NULL collectproto, ignoring\n";
1244 ++col_here;
1245 continue;
1246 }
1247 cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout);
1248 if (cinfo == NULL) {
1249 logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL\n";
1250 ++col_here;
1251 continue;
1252 }
1253
1254 if (!use_main_col_format) {
1255 request.fields.erase (request.fields.begin(), request.fields.end());
1256 request.getParents = false;
1257 bptr->load_metadata_defaults (request.fields);
1258
1259 //browserclass *bptr = browsers->getbrowser (browsertype);
1260
1261 // get the formatstring if there is one
1262 if (!get_formatstring (classification, browsertype,
1263 cinfo->format, formatstring)) {
1264 formatstring = bptr->get_default_formatstring();
1265 }
1266
1267 formatlistptr = new format_t();
1268 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1269 }
1270
1271 colinfo_t thiscolinfo;
1272 thiscolinfo.formatlistptr = formatlistptr;
1273 thiscolinfo.browserptr = bptr;
1274 colinfomap[*col_here] = thiscolinfo;
1275
1276 // do the query
1277 collectproto->filter (*col_here, request, response, err, logout);
1278 if (err != noError && err != syntaxError) {
1279 outconvertclass text_t2ascii;
1280 logout << text_t2ascii
1281 << "queryaction::search_multiple_collections: call to QueryFilter failed "
1282 << "for " << *col_here << " collection (" << get_comerror_string (err) << ")\n";
1283 return false;
1284 }
1285
1286 if (err == syntaxError) {
1287 syntax_error = true;
1288 freqmsg = "_textinvalidquery_";
1289 // assume the syntax will be invalid for all colls
1290 break;
1291 }
1292 if (response.error_message == "TOO_MANY_CLAUSES") {
1293 freqmsg = "_textlucenetoomanyclauses_";
1294 break;
1295 }
1296 if (isApprox == Exact)
1297 isApprox = response.isApprox;
1298 else if (isApprox == MoreThan)
1299 if (response.isApprox == Approximate)
1300 isApprox = response.isApprox;
1301
1302 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
1303 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
1304 while (this_term != end_term) {
1305 termfreqs[(*this_term).term] += (*this_term).freq;
1306 if ((col_here+1) == col_end) {
1307 freqmsg += (*this_term).term + ": " + termfreqs[(*this_term).term];
1308 if ((this_term+1) != end_term) freqmsg += ", ";
1309 }
1310 ++this_term;
1311 }
1312
1313 if (response.numDocs > 0) {
1314 numdocs += response.numDocs;
1315
1316 QueryResult_t thisresult;
1317 thisresult.collection = *col_here;
1318 ResultDocInfo_tarray::iterator doc_here = response.docInfo.begin();
1319 ResultDocInfo_tarray::iterator doc_end = response.docInfo.end();
1320 while (doc_here != doc_end) {
1321 thisresult.doc = *doc_here;
1322 results.insert (thisresult);
1323 ++doc_here;
1324 }
1325 }
1326 ++col_here;
1327 } // for each coll
1328
1329 disp.setmacro ("freqmsg", "query", freqmsg);
1330
1331 define_query_macros( args, disp, numdocs, isApprox);
1332 // save the query if appropriate
1333 save_search_history(args, numdocs, isApprox);
1334 define_history_macros (disp, args, protos, logout);
1335
1336 textout << outconvert << disp << "_query:header_\n"
1337 << "_query:content_";
1338
1339 if (!syntax_error) {
1340
1341 // now go through each result and output it
1342 QueryResult_tset::iterator res_here = results.begin();
1343 QueryResult_tset::iterator res_end = results.end();
1344 text_tset metadata; // empty !!
1345 bool getParents = false; // don't care !!
1346 bool use_table;
1347 ResultDocInfo_t thisdoc;
1348 format_t *formatlistptr = NULL;
1349 browserclass *browserptr = NULL;
1350
1351 int count = 1;
1352 int firstdoc = args.getintarg("r");
1353 int hitsperpage = args.getintarg("o");
1354 int thislast = firstdoc + (hitsperpage - 1);
1355
1356 // output results
1357 while (res_here != res_end) {
1358 if (count < firstdoc) {++count; ++res_here; continue;}
1359 if (count > thislast) break;
1360 formatlistptr = colinfomap[(*res_here).collection].formatlistptr;
1361 browserptr = colinfomap[(*res_here).collection].browserptr;
1362 thisdoc = (*res_here).doc;
1363 use_table = is_table_content (formatlistptr);
1364
1365 collectproto = protos->getrecptproto ((*res_here).collection, logout);
1366 if (collectproto == NULL) {
1367 logout << outconvert << "queryaction::search_multiple_collections: " << (*res_here).collection
1368 << " collection has a NULL collectproto, ignoring results\n";
1369 ++res_here;
1370 continue;
1371 }
1372
1373 browserptr->output_section_group (thisdoc, args, (*res_here).collection, 0,
1374 formatlistptr, use_table, metadata, getParents,
1375 collectproto, disp, outconvert, textout, logout);
1376 // textout << outconvert << "(ranking: " << (*res_here).doc.ranking << ")\n";
1377 ++res_here;
1378 ++count;
1379 }
1380 }
1381 textout << outconvert << disp << "_query:footer_";
1382
1383 // clean up the format_t pointers
1384 map<text_t, colinfo_t, lttext_t>::iterator here = colinfomap.begin();
1385 map<text_t, colinfo_t, lttext_t>::iterator end = colinfomap.end();
1386 while (here != end) {
1387 delete ((*here).second.formatlistptr);
1388 ++here;
1389 }
1390 return true;
1391}
1392
1393bool queryaction::search_single_collection (cgiargsclass &args, const text_t &collection,
1394 recptprotolistclass *protos, browsermapclass *browsers,
1395 displayclass &disp, outconvertclass &outconvert,
1396 ostream &textout, ostream &logout) {
1397
1398 recptproto *collectproto = protos->getrecptproto (collection, logout);
1399 if (collectproto == NULL) {
1400 logout << outconvert << "queryaction::search_single_collection: " << collection
1401 << " collection has a NULL collectproto\n";
1402
1403 // Display the "this collection is not installed on this system" page
1404 disp.setmacro("cvariable", displayclass::defaultpackage, collection);
1405 disp.setmacro("content", "query", "<p>_textbadcollection_<p>");
1406
1407 textout << outconvert << disp << "_query:header_\n"
1408 << "_query:content_\n" << "_query:footer_\n";
1409 return true;
1410 }
1411
1412 // queryaction uses "VList" browser to display results,
1413 // a queries clasification is "Search"
1414 text_t browsertype = "VList";
1415 text_t classification = "Search";
1416
1417 comerror_t err;
1418 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout);
1419
1420 if (cinfo == NULL) {
1421 logout << "ERROR (query_action::search_single_collection): get_collectinfo_ptr returned NULL\n";
1422 return false;
1423 }
1424
1425 bool segment = cinfo->isSegmented;
1426 browserclass *bptr = browsers->getbrowser (browsertype);
1427
1428 // get the formatstring if there is one
1429 text_t formatstring;
1430 if (!get_formatstring (classification, browsertype,
1431 cinfo->format, formatstring)) {
1432 formatstring = bptr->get_default_formatstring();
1433 }
1434 FilterRequest_t request;
1435 FilterResponse_t response;
1436 bptr->set_filter_options (request, args);
1437 bptr->load_metadata_defaults (request.fields);
1438
1439 format_t *formatlistptr = new format_t();
1440 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1441
1442 // do the query
1443 request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
1444 text_t formattedstring = "";
1445 get_formatted_query_string(formattedstring, segment, args, disp, logout);
1446
1447 if (!formattedstring.empty()) { // do the query
1448 // note! formattedstring is in unicode! mg and mgpp must convert!
1449 set_queryfilter_options (request, formattedstring, args);
1450 collectproto->filter (collection, request, response, err, logout);
1451 if (err != noError) {
1452 outconvertclass text_t2ascii;
1453 logout << text_t2ascii
1454 << "queryaction::search_single_collections: call to QueryFilter failed "
1455 << "for " << collection << " collection (" << get_comerror_string (err) << ")\n";
1456
1457 }
1458
1459 if (err == noError && !args["ifl"].empty()) {
1460 // Go directly to the first matching document
1461 ResultDocInfo_tarray::iterator thissection = response.docInfo.begin();
1462 if (thissection != response.docInfo.end()) {
1463 // This URL must have "&" and not "&amp;"!
1464 text_t doc_url = "_gwcgi_?e=_compressedoptions_&a=d&c=" + collection + "&d=" + (*thissection).OID;
1465
1466 // location response (url may contain macros!!)
1467 textout << outconvert << disp << "Location: " << doc_url << "\n\n";
1468 textout << flush;
1469 return true;
1470 }
1471 // Ooops... there weren't any matching documents
1472 else {
1473 // We'll just carry on as if ifl wasn't set. The only catch is that
1474 // get_cgihead_info won't have done the right thing (because ifl was
1475 // set), so we need to make sure the output is html
1476 textout << "Content-type: text/html\n\n";
1477 }
1478 }
1479
1480 if (err != noError) {
1481 disp.setmacro("resultline", "query", "_textnodocs_");
1482 if (err == syntaxError) {
1483 disp.setmacro ("freqmsg", "query", "_textinvalidquery_");
1484 } else {
1485 disp.setmacro ("freqmsg", "query", "");
1486 }
1487 } else {
1488
1489 define_query_macros (args, disp, response.numDocs, response.isApprox);
1490 define_single_query_macros(args, disp, response);
1491 // save the query if appropriate
1492 save_search_history(args, response.numDocs, response.isApprox);
1493 }
1494
1495 // If Lucene threw a TooManyClauses exception, tell the user about it
1496 if (args["ct"] == 2 && response.error_message == "TOO_MANY_CLAUSES") {
1497 disp.setmacro ("freqmsg", "query", "_textlucenetoomanyclauses_");
1498 }
1499 }
1500 define_history_macros (disp, args, protos, logout);
1501
1502 textout << outconvert << disp << "_query:header_\n"
1503 << "_query:content_";
1504
1505 if (err == noError) {
1506 // output the results
1507 bool use_table = is_table_content (formatlistptr);
1508 bptr->output_section_group (response, args, collection, 0, formatlistptr,
1509 use_table, request.fields, request.getParents,
1510 collectproto, disp, outconvert, textout, logout);
1511 }
1512
1513 textout << outconvert << disp << "_query:footer_";
1514
1515 delete (formatlistptr);
1516
1517 return true;
1518}
1519
1520// does the formatting of the query string - either uses q for a text search
1521// or the form values for an form search
1522// also adds dates if appropriate in text search
1523void queryaction::get_formatted_query_string (text_t &formattedstring,
1524 bool segment,
1525 cgiargsclass &args,
1526 displayclass &disp,
1527 ostream &logout) {
1528 if (args["qt"]=="0" && args["qto"] != "2") { // normal text search
1529 formattedstring = args["q"];
1530 // remove & | ! for simple search,do segmentation if necessary
1531 format_querystring (formattedstring, args.getintarg("b"), segment);
1532 if (args["ct"]!=0) { // mgpp and lucene - need to add in tag info if appropriate
1533 format_field_info(formattedstring, args["fqf"], args.getintarg("ct"),
1534 args.getintarg("t"), args.getintarg("b"));
1535 }
1536
1537 add_dates(formattedstring, args.getintarg("ds"), args.getintarg("de"),
1538 args.getintarg("dsbc"), args.getintarg("debc"),
1539 args.getintarg("ct"));
1540 args["q"] = formattedstring;
1541
1542 }
1543 else if (args["qt"]=="1" || args["qto"]=="2"){ // form search
1544
1545 if (args["b"]=="1" && args["fqa"]=="1") { // explicit query
1546 formattedstring = args["q"];
1547 }
1548 else { // form search
1549 if (args["b"]=="0") { // regular form
1550 parse_reg_query_form(formattedstring, args, segment);
1551 }
1552 else { // advanced form
1553 parse_adv_query_form(formattedstring, args, segment);
1554 }
1555 args["q"] = formattedstring;
1556
1557 // reset the cgiargfqv macro - need to escape any quotes in it
1558 disp.setmacro("cgiargfqv", "query", escape_quotes(args["fqv"]));
1559
1560 // also reset the _cgiargq_ macro as it has changed now
1561 disp.setmacro("cgiargq", displayclass::defaultpackage, html_safe(args["q"]));
1562
1563 // reset the compressed options to include the q arg
1564 text_t compressedoptions = recpt->get_compressed_arg(args, logout);
1565 if (!compressedoptions.empty()) {
1566 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1567 // need a decoded version of compressedoptions for use within forms
1568 // as browsers encode values from forms before sending to server
1569 // (e.g. %25 becomes %2525)
1570 decode_cgi_arg (compressedoptions);
1571 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1572 }
1573 } // form search
1574 } // args["qt"]=1
1575 else {
1576 logout << "ERROR (query_action::get_formatted_query_string): querytype not defined\n";
1577 }
1578}
1579
1580
1581// define_query_macros sets the macros that couldn't be set until the
1582// query had been done. Those macros are
1583// _resultline_, _nextfirst_, _nextlast_, _prevfirst_, _prevlast_,
1584// _thisfirst_, and _thislast_ and _quotedquery_
1585// this has been simplified so it can be used with both search_single_coll
1586// and search_multiple_coll
1587void queryaction::define_query_macros (cgiargsclass &args, displayclass &disp,
1588 int numdocs, isapprox isApprox) {
1589
1590 // set up _resultline_ macro
1591 text_t resline;
1592 int maxdocs = args.getintarg("m");
1593 if (num_phrases > 0) isApprox = Exact;
1594 if (maxdocs == -1) maxdocs = numdocs;
1595 else if (numdocs > maxdocs) {
1596 numdocs = maxdocs;
1597 isApprox = MoreThan;
1598 }
1599
1600 if (isApprox == Approximate) resline = "_textapprox_";
1601 else if (isApprox == MoreThan) resline = "_textmorethan_";
1602
1603 if (numdocs == 0) resline = "_textnodocs_";
1604 else if (numdocs == 1) resline += "_text1doc_";
1605 else resline += text_t(numdocs) + " _textlotsdocs_";
1606
1607 disp.setmacro("resultline", "query", resline);
1608
1609 int firstdoc = args.getintarg("r");
1610 int hitsperpage = args.getintarg("o");
1611 if (hitsperpage == -1) hitsperpage = numdocs;
1612
1613 // set up _thisfirst_ and _thislast_ macros
1614 disp.setmacro ("thisfirst", "query", firstdoc);
1615 int thislast = firstdoc + (hitsperpage - 1);
1616 if (thislast > numdocs) thislast = numdocs;
1617 disp.setmacro ("thislast", "query", thislast);
1618
1619 // set up _prevfirst_ and _prevlast_ macros
1620 if (firstdoc > 1) {
1621 disp.setmacro ("prevlast", "query", firstdoc - 1);
1622 int prevfirst = firstdoc - hitsperpage;
1623 if (prevfirst < 1) prevfirst = 1;
1624 disp.setmacro ("prevfirst", "query", prevfirst);
1625 }
1626
1627 // set up _nextfirst_ and _nextlast_ macros
1628 if (thislast < numdocs) {
1629 disp.setmacro ("nextfirst", "query", thislast + 1);
1630 int nextlast = thislast + hitsperpage;
1631 if (nextlast > numdocs) nextlast = numdocs;
1632 disp.setmacro ("nextlast", "query", nextlast);
1633 }
1634
1635 // do quoted query here cos we may have added quotes during query pre-processing
1636 if (args["ct"]==0) { // mg queries only, not mgpp
1637 // get the quoted bits of the query string and set _quotedquery_
1638 text_tarray phrases;
1639 get_phrases (args["q"], phrases);
1640 num_phrases = phrases.size();
1641 text_tarray::const_iterator phere = phrases.begin();
1642 text_tarray::const_iterator pend = phrases.end();
1643 bool first = true;
1644 text_t quotedquery;
1645 while (phere != pend) {
1646 if (!first)
1647 if ((phere +1) == pend) quotedquery += " and ";
1648 else quotedquery += ", ";
1649
1650 quotedquery += "\"" + *phere + "\"";
1651 first = false;
1652 ++phere;
1653 }
1654 if (args.getintarg("s") && !quotedquery.empty()) quotedquery += "_textstemon_";
1655 disp.setmacro ("quotedquery", "query", quotedquery);
1656 }
1657
1658}
1659
1660// define_single_query_macros sets the extra macros for search_single_coll
1661// that couldn't be set until the query had been done. Those macros are
1662// _freqmsg_ and _stopwordsmsg_
1663void queryaction::define_single_query_macros (cgiargsclass &args,
1664 displayclass &disp,
1665 const FilterResponse_t &response) {
1666 // set up _freqmsg_ and _stopwordsmsg_ macros
1667
1668 text_t freqmsg = "";
1669 freqmsg = "_textfreqmsg1_";
1670 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
1671 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
1672 while (this_term != end_term) {
1673 freqmsg += (*this_term).term + ": " + (*this_term).freq;
1674 if ((this_term + 1) != end_term)
1675 freqmsg += ", ";
1676 ++this_term;
1677 }
1678 disp.setmacro ("freqmsg", "query", freqmsg);
1679
1680 text_tset::const_iterator this_stopword = response.stopwords.begin();
1681 text_tset::const_iterator end_stopword = response.stopwords.end();
1682 if (this_stopword != end_stopword) {
1683 text_t stopwordsmsg = "_textstopwordsmsg_ ";
1684 while (this_stopword != end_stopword) {
1685 if (stopwordsmsg != "_textstopwordsmsg_ ") {
1686 stopwordsmsg += ", ";
1687 }
1688 stopwordsmsg += (*this_stopword);
1689 ++this_stopword;
1690 }
1691 disp.setmacro("stopwordsmsg", "query", stopwordsmsg);
1692 }
1693}
1694
1695// should this change for cross coll search??
1696bool queryaction::save_search_history (cgiargsclass &args, int numdocs,
1697 isapprox isApprox) {
1698 if (args["q"]=="") return true; // null query, dont save
1699 if (args["hs"]=="0") return true; // only save when submit query pressed
1700
1701 // get userid
1702 text_t userid = args["z"];
1703
1704 // the number of docs goes on the front of the query string
1705 text_t query = text_t(numdocs);
1706 if (isApprox==MoreThan) { // there were more docs found
1707 query.push_back('+');
1708 }
1709 query += "c="+args["c"];
1710 query += ";h="+args["h"];
1711 query += ";t="+args["t"];
1712 query += ";b="+args["b"];
1713 query += ";j="+args["j"];
1714 query += ";n="+args["n"];
1715 query += ";s="+args["s"];
1716 query += ";k="+args["k"];
1717 query += ";g="+args["g"];
1718
1719 text_t qstring = args["q"];
1720 //text_t formattedquery =cgi_safe(qstring);
1721 //query += "&amp;q="+formattedquery;
1722 query += ";q="+qstring;
1723 bool display=false;
1724 int hd = args.getintarg("hd");
1725 if (hd > 0) display=true;
1726 if (set_history_info(userid, query, gdbmhome, display)) return true;
1727 else return false;
1728
1729
1730}
1731
Note: See TracBrowser for help on using the repository browser.