source: trunk/gsdl/src/recpt/queryaction.cpp@ 12786

Last change on this file since 12786 was 12786, checked in by kjdon, 18 years ago

added sort field macro for use in query form with lucene, query parsing methods arguments changed

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 57.2 KB
Line 
1/**********************************************************************
2 *
3 * queryaction.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "queryaction.h"
27#include "querytools.h"
28#include "formattools.h"
29#include "cgiutils.h"
30#include "OIDtools.h"
31//#include "infodbclass.h"
32#include "fileutil.h"
33#include "text_t.h"
34#include "historydb.h"
35#include "htmlutils.h" // for html_safe in do_action
36#include "gsdltools.h"
37#include "phrases.h" // for get_phrases
38#include <stdlib.h> // for strtol
39#include <assert.h>
40
41void colinfo_t::clear () {
42 formatlistptr = NULL;
43 browserptr = NULL;
44}
45
46void QueryResult_t::clear() {
47 doc.clear();
48 collection.clear();
49}
50
51queryaction::queryaction () {
52
53 recpt = NULL;
54 num_phrases = 0;
55
56 // this action uses cgi variable "a"
57 cgiarginfo arg_ainfo;
58 arg_ainfo.shortname = "a";
59 arg_ainfo.longname = "action";
60 arg_ainfo.multiplechar = true;
61 arg_ainfo.defaultstatus = cgiarginfo::weak;
62 arg_ainfo.argdefault = "q";
63 arg_ainfo.savedarginfo = cgiarginfo::must;
64 argsinfo.addarginfo (NULL, arg_ainfo);
65
66 // "ct" - 0 = mg, 1 = mgpp, 2=lucene
67 arg_ainfo.shortname = "ct";
68 arg_ainfo.longname = "collection type";
69 arg_ainfo.multiplechar = true; // can be empty or single char
70 arg_ainfo.defaultstatus = cgiarginfo::weak;
71 arg_ainfo.argdefault = g_EmptyText;
72 arg_ainfo.savedarginfo = cgiarginfo::must;
73 argsinfo.addarginfo (NULL, arg_ainfo);
74
75 // "b" - 0 = simple, 1 = advanced
76 arg_ainfo.shortname = "b";
77 arg_ainfo.longname = "query mode";
78 arg_ainfo.multiplechar = false;
79 arg_ainfo.defaultstatus = cgiarginfo::weak;
80 arg_ainfo.argdefault = "0";
81 arg_ainfo.savedarginfo = cgiarginfo::must;
82 argsinfo.addarginfo (NULL, arg_ainfo);
83
84 // "h"
85 arg_ainfo.shortname = "h";
86 arg_ainfo.longname = "main index";
87 arg_ainfo.multiplechar = true;
88 arg_ainfo.defaultstatus = cgiarginfo::weak;
89 arg_ainfo.argdefault = g_EmptyText;
90 arg_ainfo.savedarginfo = cgiarginfo::must;
91 argsinfo.addarginfo (NULL, arg_ainfo);
92
93 // "h2"
94 arg_ainfo.shortname = "h2";
95 arg_ainfo.longname = "main index for second query";
96 arg_ainfo.multiplechar = true;
97 arg_ainfo.defaultstatus = cgiarginfo::weak;
98 arg_ainfo.argdefault = g_EmptyText;
99 arg_ainfo.savedarginfo = cgiarginfo::must;
100 argsinfo.addarginfo (NULL, arg_ainfo);
101
102 // "j"
103 arg_ainfo.shortname = "j";
104 arg_ainfo.longname = "sub collection index";
105 arg_ainfo.multiplechar = true;
106 arg_ainfo.defaultstatus = cgiarginfo::weak;
107 arg_ainfo.argdefault = g_EmptyText;
108 arg_ainfo.savedarginfo = cgiarginfo::must;
109 argsinfo.addarginfo (NULL, arg_ainfo);
110
111 // "j2"
112 arg_ainfo.shortname = "j2";
113 arg_ainfo.longname = "sub collection index for second query";
114 arg_ainfo.multiplechar = true;
115 arg_ainfo.defaultstatus = cgiarginfo::weak;
116 arg_ainfo.argdefault = g_EmptyText;
117 arg_ainfo.savedarginfo = cgiarginfo::must;
118 argsinfo.addarginfo (NULL, arg_ainfo);
119
120 // "n"
121 arg_ainfo.shortname = "n";
122 arg_ainfo.longname = "language index";
123 arg_ainfo.multiplechar = true;
124 arg_ainfo.defaultstatus = cgiarginfo::weak;
125 arg_ainfo.argdefault = g_EmptyText;
126 arg_ainfo.savedarginfo = cgiarginfo::must;
127 argsinfo.addarginfo (NULL, arg_ainfo);
128
129 // "n2"
130 arg_ainfo.shortname = "n2";
131 arg_ainfo.longname = "language index for second query";
132 arg_ainfo.multiplechar = true;
133 arg_ainfo.defaultstatus = cgiarginfo::weak;
134 arg_ainfo.argdefault = g_EmptyText;
135 arg_ainfo.savedarginfo = cgiarginfo::must;
136 argsinfo.addarginfo (NULL, arg_ainfo);
137
138
139 // "q"
140 arg_ainfo.shortname = "q";
141 arg_ainfo.longname = "query string";
142 arg_ainfo.multiplechar = true;
143 arg_ainfo.defaultstatus = cgiarginfo::weak;
144 arg_ainfo.argdefault = g_EmptyText;
145 arg_ainfo.savedarginfo = cgiarginfo::must;
146 argsinfo.addarginfo (NULL, arg_ainfo);
147
148 // "q2"
149 arg_ainfo.shortname = "q2";
150 arg_ainfo.longname = "query string for second query";
151 arg_ainfo.multiplechar = true;
152 arg_ainfo.defaultstatus = cgiarginfo::weak;
153 arg_ainfo.argdefault = g_EmptyText;
154 arg_ainfo.savedarginfo = cgiarginfo::must;
155 argsinfo.addarginfo (NULL, arg_ainfo);
156
157 // "cq2" ""=don't combine, "and", "or", "not"
158 arg_ainfo.shortname = "cq2";
159 arg_ainfo.longname = "combine queries";
160 arg_ainfo.multiplechar = true;
161 arg_ainfo.defaultstatus = cgiarginfo::weak;
162 arg_ainfo.argdefault = g_EmptyText;
163 arg_ainfo.savedarginfo = cgiarginfo::must;
164 argsinfo.addarginfo (NULL, arg_ainfo);
165
166 // "t" - 1 = ranked 0 = boolean
167 arg_ainfo.shortname = "t";
168 arg_ainfo.longname = "search type";
169 arg_ainfo.multiplechar = false;
170 arg_ainfo.defaultstatus = cgiarginfo::weak;
171 arg_ainfo.argdefault = "1";
172 arg_ainfo.savedarginfo = cgiarginfo::must;
173 argsinfo.addarginfo (NULL, arg_ainfo);
174
175 // "k"
176 arg_ainfo.shortname = "k";
177 arg_ainfo.longname = "casefolding";
178 arg_ainfo.multiplechar = false;
179 arg_ainfo.defaultstatus = cgiarginfo::weak;
180 arg_ainfo.argdefault = "1";
181 arg_ainfo.savedarginfo = cgiarginfo::must;
182 argsinfo.addarginfo (NULL, arg_ainfo);
183
184 // "s"
185 arg_ainfo.shortname = "s";
186 arg_ainfo.longname = "stemming";
187 arg_ainfo.multiplechar = false;
188 arg_ainfo.defaultstatus = cgiarginfo::weak;
189 arg_ainfo.argdefault ="0";
190 arg_ainfo.savedarginfo = cgiarginfo::must;
191 argsinfo.addarginfo (NULL, arg_ainfo);
192
193 // "m"
194 arg_ainfo.shortname = "m";
195 arg_ainfo.longname = "maximum number of documents";
196 arg_ainfo.multiplechar = true;
197 arg_ainfo.defaultstatus = cgiarginfo::weak;
198 arg_ainfo.argdefault = "50";
199 arg_ainfo.savedarginfo = cgiarginfo::must;
200 argsinfo.addarginfo (NULL, arg_ainfo);
201
202 // "o"
203 arg_ainfo.shortname = "o";
204 arg_ainfo.longname = "hits per page";
205 arg_ainfo.multiplechar = true;
206 arg_ainfo.defaultstatus = cgiarginfo::weak;
207 arg_ainfo.argdefault = "20";
208 arg_ainfo.savedarginfo = cgiarginfo::must;
209 argsinfo.addarginfo (NULL, arg_ainfo);
210
211 // "r"
212 arg_ainfo.shortname = "r";
213 arg_ainfo.longname = "start results from";
214 arg_ainfo.multiplechar = true;
215 arg_ainfo.defaultstatus = cgiarginfo::weak;
216 arg_ainfo.argdefault = "1";
217 arg_ainfo.savedarginfo = cgiarginfo::must;
218 argsinfo.addarginfo (NULL, arg_ainfo);
219
220 // "ccs"
221 arg_ainfo.shortname = "ccs";
222 arg_ainfo.longname = "cross collection searching";
223 arg_ainfo.multiplechar = false;
224 arg_ainfo.defaultstatus = cgiarginfo::weak;
225 arg_ainfo.argdefault = "0";
226 arg_ainfo.savedarginfo = cgiarginfo::must;
227 argsinfo.addarginfo (NULL, arg_ainfo);
228
229 // "ccp"
230 arg_ainfo.shortname = "ccp";
231 arg_ainfo.longname = "cross collection page";
232 arg_ainfo.multiplechar = false;
233 arg_ainfo.defaultstatus = cgiarginfo::weak;
234 arg_ainfo.argdefault = "0";
235 arg_ainfo.savedarginfo = cgiarginfo::must;
236 argsinfo.addarginfo (NULL, arg_ainfo);
237
238 // "cc"
239 arg_ainfo.shortname = "cc";
240 arg_ainfo.longname = "collections to search";
241 arg_ainfo.multiplechar = true;
242 arg_ainfo.multiplevalue = true;
243 arg_ainfo.defaultstatus = cgiarginfo::weak;
244 arg_ainfo.argdefault = g_EmptyText;
245 arg_ainfo.savedarginfo = cgiarginfo::must;
246 argsinfo.addarginfo (NULL, arg_ainfo);
247
248 // "hd" history display - search history only displayed when
249 // this var set to something other than 0
250 // this number of records is displayed
251 arg_ainfo.shortname = "hd";
252 arg_ainfo.longname = "history display";
253 arg_ainfo.multiplechar = true;
254 arg_ainfo.multiplevalue = false;
255 arg_ainfo.defaultstatus = cgiarginfo::weak;
256 arg_ainfo.argdefault = "0";
257 arg_ainfo.savedarginfo = cgiarginfo::must;
258 argsinfo.addarginfo (NULL, arg_ainfo);
259
260 // "hs" save - set to 1 in query form, so only save when submit
261 // query
262 // 0 = no save 1 = save
263 arg_ainfo.shortname = "hs";
264 arg_ainfo.longname = "history save";
265 arg_ainfo.multiplechar = false;
266 arg_ainfo.defaultstatus = cgiarginfo::weak;
267 arg_ainfo.argdefault = "0";
268 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
269 argsinfo.addarginfo (NULL, arg_ainfo);
270
271 // "g" - new arg for granularity, for mgpp collections
272 arg_ainfo.shortname = "g";
273 arg_ainfo.longname = "granularity";
274 arg_ainfo.multiplechar = true;
275 arg_ainfo.defaultstatus = cgiarginfo::weak;
276 arg_ainfo.argdefault = g_EmptyText;
277 arg_ainfo.savedarginfo = cgiarginfo::must;
278 argsinfo.addarginfo (NULL, arg_ainfo);
279
280 // "ds" - start date
281 arg_ainfo.shortname = "ds";
282 arg_ainfo.longname = "start date";
283 arg_ainfo.multiplechar = true;
284 arg_ainfo.defaultstatus = cgiarginfo::weak;
285 arg_ainfo.argdefault = g_EmptyText;
286 arg_ainfo.savedarginfo = cgiarginfo::must;
287 argsinfo.addarginfo (NULL, arg_ainfo);
288
289 // "de" - end date
290 arg_ainfo.shortname = "de";
291 arg_ainfo.longname = "end date";
292 arg_ainfo.multiplechar = true;
293 arg_ainfo.defaultstatus = cgiarginfo::weak;
294 arg_ainfo.argdefault = g_EmptyText;
295 arg_ainfo.savedarginfo = cgiarginfo::must;
296 argsinfo.addarginfo (NULL, arg_ainfo);
297
298 // "dsbc" - whether or not start date is prechristian
299 arg_ainfo.shortname = "dsbc";
300 arg_ainfo.longname = "start date bc";
301 arg_ainfo.multiplechar = false;
302 arg_ainfo.defaultstatus = cgiarginfo::weak;
303 arg_ainfo.argdefault = "0";
304 arg_ainfo.savedarginfo = cgiarginfo::must;
305 argsinfo.addarginfo (NULL, arg_ainfo);
306
307 // "debc" - whether or not end date is prechristian
308 arg_ainfo.shortname = "debc";
309 arg_ainfo.longname = "end date bc";
310 arg_ainfo.multiplechar = false;
311 arg_ainfo.defaultstatus = cgiarginfo::weak;
312 arg_ainfo.argdefault = "0";
313 arg_ainfo.savedarginfo = cgiarginfo::must;
314 argsinfo.addarginfo (NULL, arg_ainfo);
315
316 // "qt" - 0 = text, 1 = form
317 arg_ainfo.shortname = "qt";
318 arg_ainfo.longname = "query type";
319 arg_ainfo.multiplechar = true; // can be empty or single char
320 arg_ainfo.defaultstatus = cgiarginfo::weak;
321 arg_ainfo.argdefault = g_EmptyText;
322 arg_ainfo.savedarginfo = cgiarginfo::must;
323 argsinfo.addarginfo (NULL, arg_ainfo);
324
325 // "qto" - 1 = text only, 2 = form only, 3 = text and form
326 arg_ainfo.shortname = "qto";
327 arg_ainfo.longname = "query type options";
328 arg_ainfo.multiplechar = true; // can be empty or single char
329 arg_ainfo.defaultstatus = cgiarginfo::weak;
330 arg_ainfo.argdefault = g_EmptyText;
331 arg_ainfo.savedarginfo = cgiarginfo::must;
332 argsinfo.addarginfo (NULL, arg_ainfo);
333
334 // "qb" - 0 = regular, 1 = large
335 arg_ainfo.shortname = "qb";
336 arg_ainfo.longname = "query box type";
337 arg_ainfo.multiplechar = false;
338 arg_ainfo.defaultstatus = cgiarginfo::weak;
339 arg_ainfo.argdefault = "0";
340 arg_ainfo.savedarginfo = cgiarginfo::must;
341 argsinfo.addarginfo (NULL, arg_ainfo);
342
343 // "fqn" - number of fields in the query form
344 arg_ainfo.shortname = "fqn";
345 arg_ainfo.longname = "form query num fields";
346 arg_ainfo.multiplechar = true;
347 arg_ainfo.defaultstatus = cgiarginfo::weak;
348 arg_ainfo.argdefault = "4";
349 arg_ainfo.savedarginfo = cgiarginfo::must;
350 argsinfo.addarginfo (NULL, arg_ainfo);
351
352 // "fqf" - the list of field names in the form query
353 // - a comma separated list
354 arg_ainfo.shortname = "fqf";
355 arg_ainfo.longname = "form query fields";
356 arg_ainfo.multiplechar = true;
357 arg_ainfo.defaultstatus = cgiarginfo::weak;
358 arg_ainfo.argdefault = g_EmptyText;
359 arg_ainfo.savedarginfo = cgiarginfo::must;
360 argsinfo.addarginfo (NULL, arg_ainfo);
361
362 // "fqv" - the list of values in the form query
363 // - a comma separated list
364 arg_ainfo.shortname = "fqv";
365 arg_ainfo.longname = "form query values";
366 arg_ainfo.multiplechar = true;
367 arg_ainfo.defaultstatus = cgiarginfo::weak;
368 arg_ainfo.argdefault = g_EmptyText;
369 arg_ainfo.savedarginfo = cgiarginfo::must;
370 argsinfo.addarginfo (NULL, arg_ainfo);
371
372
373 // "fqs" - the list of stemming options in the form query
374 // - a comma separated list
375 arg_ainfo.shortname = "fqs";
376 arg_ainfo.longname = "form query stems";
377 arg_ainfo.multiplechar = true;
378 arg_ainfo.defaultstatus = cgiarginfo::weak;
379 arg_ainfo.argdefault = g_EmptyText;
380 arg_ainfo.savedarginfo = cgiarginfo::must;
381 argsinfo.addarginfo (NULL, arg_ainfo);
382
383
384 // "fqk" - the list of casefolding options in the form query
385 // - a comma separated list
386 arg_ainfo.shortname = "fqk";
387 arg_ainfo.longname = "form query casefolds";
388 arg_ainfo.multiplechar = true;
389 arg_ainfo.defaultstatus = cgiarginfo::weak;
390 arg_ainfo.argdefault = g_EmptyText;
391 arg_ainfo.savedarginfo = cgiarginfo::must;
392 argsinfo.addarginfo (NULL, arg_ainfo);
393
394 // "fqc" - the list of boolean operators in the form query
395 // - a comma separated list
396 arg_ainfo.shortname = "fqc";
397 arg_ainfo.longname = "form query combines";
398 arg_ainfo.multiplechar = true;
399 arg_ainfo.defaultstatus = cgiarginfo::weak;
400 arg_ainfo.argdefault = g_EmptyText;
401 arg_ainfo.savedarginfo = cgiarginfo::must;
402 argsinfo.addarginfo (NULL, arg_ainfo);
403
404 // "fqa" - form query advanced - for "run query"
405 arg_ainfo.shortname = "fqa";
406 arg_ainfo.longname = "form query advanced query";
407 arg_ainfo.multiplechar = false;
408 arg_ainfo.defaultstatus = cgiarginfo::weak;
409 arg_ainfo.argdefault = "0";
410 arg_ainfo.savedarginfo = cgiarginfo::must;
411 argsinfo.addarginfo (NULL, arg_ainfo);
412
413 // "ifl" - I'm feeling lucky! (Go directly to the first matching document)
414 arg_ainfo.shortname = "ifl";
415 arg_ainfo.longname = "i'm feeling lucky";
416 arg_ainfo.multiplechar = false;
417 arg_ainfo.defaultstatus = cgiarginfo::weak;
418 arg_ainfo.argdefault = g_EmptyText;
419 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
420 argsinfo.addarginfo (NULL, arg_ainfo);
421
422 // "sf" - Sort field. Set to field to be used for sorting search reult
423 // set (only implemented for lucene collections at present).
424 arg_ainfo.shortname = "sf";
425 arg_ainfo.longname = "sort field";
426 arg_ainfo.multiplechar = true;
427 arg_ainfo.defaultstatus = cgiarginfo::weak;
428 arg_ainfo.argdefault = g_EmptyText;
429 arg_ainfo.savedarginfo = cgiarginfo::must;
430 argsinfo.addarginfo (NULL, arg_ainfo);
431
432 // "fuzziness" controls how closely the search terms must match
433 // 100 = exact match, 0 = very inexact match (only implemented for Lucene)
434 arg_ainfo.shortname = "fuzziness";
435 arg_ainfo.longname = "Lucene fuzziness value";
436 arg_ainfo.multiplechar = true;
437 arg_ainfo.defaultstatus = cgiarginfo::weak;
438 arg_ainfo.argdefault = g_EmptyText;
439 arg_ainfo.savedarginfo = cgiarginfo::must;
440 argsinfo.addarginfo (NULL, arg_ainfo);
441}
442
443void queryaction::configure (const text_t &key, const text_tarray &cfgline) {
444 action::configure (key, cfgline);
445}
446
447bool queryaction::init (ostream &logout) {
448 return action::init (logout);
449}
450
451bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
452 recptprotolistclass * /*protos*/, ostream &logout) {
453
454 // check t argument
455 int arg_t = args.getintarg("t");
456 if (arg_t != 0 && arg_t != 1) {
457 logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n";
458 cgiarginfo *tinfo = argsinfo.getarginfo ("t");
459 if (tinfo != NULL) args["t"] = tinfo->argdefault;
460 }
461
462 // check k argument
463 int arg_k = args.getintarg("k");
464 if (arg_k != 0 && arg_k != 1) {
465 logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n";
466 cgiarginfo *kinfo = argsinfo.getarginfo ("k");
467 if (kinfo != NULL) args["k"] = kinfo->argdefault;
468 }
469
470 // check s argument
471 int arg_s = args.getintarg("s");
472 if (arg_s != 0 && arg_s != 1) {
473 logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n";
474 cgiarginfo *sinfo = argsinfo.getarginfo ("s");
475 if (sinfo != NULL) args["s"] = sinfo->argdefault;
476 }
477
478 // check m argument
479 int arg_m = args.getintarg("m");
480 if (arg_m < -1) {
481 logout << "Warning: \"m\" argument less than -1 (" << arg_m << ")\n";
482 cgiarginfo *minfo = argsinfo.getarginfo ("m");
483 if (minfo != NULL) args["m"] = minfo->argdefault;
484 }
485
486 // check o argument
487 int arg_o = args.getintarg("o");
488 if (arg_o < -1) {
489 logout << "Warning: \"o\" argument less than -1 (" << arg_o << ")\n";
490 cgiarginfo *oinfo = argsinfo.getarginfo ("o");
491 if (oinfo != NULL) args["o"] = oinfo->argdefault;
492 }
493
494 // check r argument
495 int arg_r = args.getintarg("r");
496 if (arg_r < 1) {
497 logout << "Warning: \"r\" argument less than 1 (" << arg_r << ")\n";
498 cgiarginfo *rinfo = argsinfo.getarginfo ("r");
499 if (rinfo != NULL) args["r"] = rinfo->argdefault;
500 }
501 //check hd argument
502 int arg_hd = args.getintarg("hd");
503 if (arg_hd <0 ) {
504 logout << "Warning: \"hd\" argument less than 0 (" << arg_hd << ")\n";
505 cgiarginfo *hdinfo = argsinfo.getarginfo ("hd");
506 if (hdinfo != NULL) args["hd"] = hdinfo->argdefault;
507 }
508
509 //check hs argument
510 int arg_hs = args.getintarg("hs");
511 if (arg_hs !=0 && arg_hs !=1) {
512 logout << "Warning: \"hs\" argument out of range (" << arg_hs << ")\n";
513 cgiarginfo *hsinfo = argsinfo.getarginfo ("hs");
514 if (hsinfo != NULL) args["hs"] = hsinfo->argdefault;
515 }
516
517 // check ct argument
518 int arg_ct = args.getintarg("ct");
519 if (arg_ct < 0 || arg_ct > 2) {
520 logout << "Warning: \"ct\" argument out of range (" << arg_ct << ")\n";
521 cgiarginfo *ctinfo = argsinfo.getarginfo ("ct");
522 if (ctinfo != NULL) args["ct"] = ctinfo->argdefault;
523 }
524
525 // check qt argument
526 int arg_qt = args.getintarg("qt");
527 if (arg_qt !=0 && arg_qt !=1) {
528 logout << "Warning: \"qt\" argument out of range (" << arg_qt << ")\n";
529 cgiarginfo *qtinfo = argsinfo.getarginfo ("qt");
530 if (qtinfo != NULL) args["qt"] = qtinfo->argdefault;
531 }
532
533 // check qb argument
534 int arg_qb = args.getintarg("qb");
535 if (arg_qb !=0 && arg_qb !=1) {
536 logout << "Warning: \"qb\" argument out of range (" << arg_qb << ")\n";
537 cgiarginfo *qbinfo = argsinfo.getarginfo ("qb");
538 if (qbinfo != NULL) args["qb"] = qbinfo->argdefault;
539 }
540
541 // check fqa argument
542 int arg_fqa = args.getintarg("fqa");
543 if (arg_fqa !=0 && arg_fqa !=1) {
544 logout << "Warning: \"fqa\" argument out of range (" << arg_fqa << ")\n";
545 cgiarginfo *fqainfo = argsinfo.getarginfo ("fqa");
546 if (fqainfo != NULL) args["fqa"] = fqainfo->argdefault;
547 }
548
549 // check fqn argument
550 int arg_fqn = args.getintarg("fqn");
551 if (arg_fqn < -1) {
552 logout << "Warning: \"fqn\" argument less than -1 (" << arg_fqn << ")\n";
553 cgiarginfo *fqninfo = argsinfo.getarginfo ("fqn");
554 if (fqninfo != NULL) args["fqn"] = fqninfo->argdefault;
555 }
556
557 return true;
558}
559
560void queryaction::get_cgihead_info (cgiargsclass &args, recptprotolistclass * /*protos*/,
561 response_t &response, text_t &response_data,
562 ostream &/*logout*/) {
563 // If this is an "I'm feeling lucky" request, we don't know the target location until later
564 if (!args["ifl"].empty()) {
565 response = undecided_location;
566 return;
567 }
568
569 response = content;
570 response_data = "text/html";
571}
572
573void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args,
574 recptprotolistclass * protos,
575 ostream &logout) {
576
577 // define_internal_macros sets the following macros:
578
579 // The following macros are set later (in define_query_macros) as they can't be set until
580 // the query has been done.
581 // _quotedquery_ the part of the query string that was quoted for post-processing
582 // _freqmsg_ the term frequency string
583
584 // _resultline_ the "x documents matched the query" string
585
586 // _prevfirst_ these are used when setting up the links to previous/next
587 // _prevlast_ pages of results (_thisfirst_ and _thislast_ are used to set
588 // _nextfirst_ the 'results x-x for query: xxxx' string in the title bar)
589 // _nextlast_
590 // _thisfirst_
591 // _thislast_
592
593
594 define_form_macros(disp, args, protos, logout);
595
596 define_query_interface(disp, args, protos, logout);
597
598
599}
600
601void queryaction::define_query_interface(displayclass &disp,
602 cgiargsclass &args,
603 recptprotolistclass * protos,
604 ostream &logout){
605 text_t collection = args["c"];
606
607 //check that the protocol is alive
608 recptproto* colproto = protos->getrecptproto (collection, logout);
609 if(colproto == NULL) {
610 logout << "ERROR: Null collection protocol trying to query"
611 << collection.getcstr() << "\n";
612 return;
613 }
614
615 //check the collection is responding/in place
616 ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(colproto, collection,
617 logout);
618 if(colinfo == NULL){
619 logout << "ERROR: Null returned for get_collectinfo_ptr on "
620 << collection.getcstr() << "in queryaction::define_query_interface\n";
621 return;
622 }
623
624 text_tmap::iterator check = colinfo->format.find("QueryInterface");
625 if(check != colinfo->format.end()){
626 if((*check).second=="DateSearch"){
627 text_t current = "_datesearch_";
628 disp.setmacro("optdatesearch","query",current);
629 }
630 }
631}
632
633
634// sets the selection box macros _hselection_, _jselection_, _nselection_ _gselection_, fqfselection_
635void queryaction::set_option_macro (const text_t &macroname,
636 text_t current_value,
637 bool display_single,
638 bool add_js_update,
639 const FilterOption_t &option,
640 displayclass &disp) {
641
642 if (option.validValues.empty()) return;
643 if (option.validValues.size() == 1) {
644 if (display_single) {
645 disp.setmacro (macroname + "selection", displayclass::defaultpackage, "_" + option.defaultValue + "_");
646 }
647 return;
648 }
649 if (option.validValues.size() < 2) return;
650
651 text_t macrovalue = "<select name=\"" + macroname + "\"";
652 if (add_js_update) {
653 macrovalue += " onChange=\"update"+macroname+"();\"";
654 }
655 macrovalue += ">\n";
656
657 if (current_value.empty()) current_value = option.defaultValue;
658
659 text_tarray::const_iterator thisvalue = option.validValues.begin();
660 text_tarray::const_iterator endvalue = option.validValues.end();
661
662 while (thisvalue != endvalue) {
663 macrovalue += "<option value=\"" + *thisvalue + "\"";
664 if (*thisvalue == current_value)
665 macrovalue += " selected";
666 macrovalue += ">_" + *thisvalue + "_\n";
667 ++thisvalue;
668 }
669 macrovalue += "</select>\n";
670 disp.setmacro (macroname + "selection", displayclass::defaultpackage, macrovalue);
671}
672
673
674void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args,
675 recptprotolistclass *protos, ostream &logout) {
676
677 // define_external_macros sets the following macros:
678
679 // some or all of these may not be required to be set
680 // _hselection_, _h2selection_ the selection box for the main part of the index
681 // _jselection_, _j2selection_ the selection box for the subcollection part of the index
682 // _nselection_, _n2selection_ the selection box for the language part of the index
683 // _cq2selection the selection box for combining two queries
684
685 // _gselection_, the selection box forlevels (mgpp)
686 // _fqfselection_, the selection box for index/fields (mgpp)
687 // can't do anything if collectproto is null (i.e. no collection was specified)
688 recptproto *collectproto = protos->getrecptproto (args["c"], logout);
689 if (collectproto == NULL) return;
690
691 ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(collectproto,
692 args["c"],
693 logout);
694 set_query_type_args(colinfo, args);
695
696 comerror_t err;
697 InfoFilterOptionsResponse_t response;
698 InfoFilterOptionsRequest_t request;
699 request.filterName = "QueryFilter";
700
701 collectproto->get_filteroptions (args["c"], request, response, err, logout);
702 if (err == noError) {
703
704 FilterOption_tmap::const_iterator it;
705 FilterOption_tmap::const_iterator end = response.filterOptions.end();
706
707 // _hselection_ and _h2selection_ (Index)
708 it = response.filterOptions.find ("Index");
709 if (it != end) set_option_macro ("h", args["h"], true, false, (*it).second, disp);
710 if (it != end) set_option_macro ("h2", args["h2"], true,false, (*it).second, disp);
711
712 // _jselection_ and _j2selection_ (Subcollection)
713 it = response.filterOptions.find ("Subcollection");
714 if (it != end) set_option_macro ("j", args["j"], true,false, (*it).second, disp);
715 if (it != end) set_option_macro ("j2", args["j2"], true,false, (*it).second, disp);
716
717 // _nselection_ and _n2selection_ (Language)
718 it = response.filterOptions.find ("Language");
719 if (it != end) set_option_macro ("n", args["n"], true,false, (*it).second, disp);
720 if (it != end) set_option_macro ("n2", args["n2"], true,false, (*it).second, disp);
721
722 // _cq2selection_ (CombineQuery)
723 it = response.filterOptions.find ("CombineQuery");
724 if (it != end) set_option_macro ("cq2", args["cq2"], true,false, (*it).second, disp);
725
726 if ((args["ct"] == "1") || (args["ct"] == "2")) { // mgpp/lucene collections
727 // _gselection_ (Level)
728 it = response.filterOptions.find("Level");
729 if (it!=end) {
730 set_option_macro("g", args["g"], false, false, (*it).second, disp);
731 if (args["qt"]=="1") { // form search
732 set_gformselection_macro(args["g"], (*it).second, disp);
733 }
734 }
735 // _fqfselection_ field list
736 it = response.filterOptions.find("IndexField");
737 if (it!=end) {
738 bool form_search = false;
739 if (args["qto"]=="2" || args["qt"]=="1") {
740 form_search = true;
741 }
742 set_option_macro ("fqf", args["fqf"], true, form_search, (*it).second, disp);
743 if (args["ct"] == "2") {// lucene
744 // set the sort field macro
745 set_sfselection_macro(args["sf"], (*it).second, disp);
746 }
747 }
748 }
749 }
750} // define external macros
751
752void queryaction::set_sfselection_macro(text_t current_value,
753 const FilterOption_t &option,
754 displayclass &disp) {
755
756 // we need two or more options to continue
757 if (option.validValues.size() <= 2) {
758 return;
759 }
760
761 text_t macrovalue = "<select name=\"sf\">\n";
762
763 if (current_value.empty()) current_value = "";
764
765 // we give a rank option first
766 macrovalue += "<option value=\"\"";
767 if (current_value == "") {
768 macrovalue += " selected";
769 }
770 macrovalue += ">_query:textsortbyrank_\n";
771
772 text_tarray::const_iterator thisvalue = option.validValues.begin();
773 text_tarray::const_iterator endvalue = option.validValues.end();
774 int valid_count = 0;
775 while (thisvalue != endvalue) {
776 if (*thisvalue != "ZZ" && *thisvalue != "TX") {
777 ++valid_count;
778 macrovalue += "<option value=\"by" + *thisvalue + "\"";
779 if (current_value == "by"+*thisvalue)
780 macrovalue += " selected";
781 macrovalue += ">_" + *thisvalue + "_\n";
782 }
783 ++thisvalue;
784 }
785 macrovalue += "</select>";
786 if (valid_count > 0) {
787 disp.setmacro ("sfselection", displayclass::defaultpackage, macrovalue);
788 }
789
790}
791
792// sets the selection box macro _gformselection_.
793// the default for _gformselection_ is _gselection_
794void queryaction::set_gformselection_macro (text_t current_value,
795 const FilterOption_t &option,
796 displayclass &disp) {
797
798 if (option.validValues.size() <= 1) {
799 return;
800 }
801 // we need to check to see if there is paragraph present
802 text_tarray::const_iterator thisvalue = option.validValues.begin();
803 text_tarray::const_iterator endvalue = option.validValues.end();
804
805 bool has_paras = false;
806 while (thisvalue != endvalue) {
807 if (*thisvalue == "Para") {
808 has_paras = true;
809 break;
810 }
811 ++thisvalue;
812 }
813 if (!has_paras) return; // there is no difference between the form selection and the normal one
814
815 if (option.validValues.size() == 2) {
816 // we will only have one value, but we will still put it in as a text string
817 int opt = 0;
818 if (option.validValues[0] == "Para") {
819 opt = 1;
820 }
821 disp.setmacro ("gformselection", displayclass::defaultpackage, "_"+option.validValues[opt]+"_");
822 return;
823 }
824
825 // there will be a select box
826 text_t macrovalue = "<select name=\"g\">\n";
827
828 if (current_value.empty()) current_value = option.defaultValue;
829
830 thisvalue = option.validValues.begin();
831
832 while (thisvalue != endvalue) {
833 if (*thisvalue != "Para") {
834 macrovalue += "<option value=\"" + *thisvalue + "\"";
835 if (*thisvalue == current_value)
836 macrovalue += " selected";
837 macrovalue += ">_" + *thisvalue + "_\n";
838 }
839 ++thisvalue;
840 }
841 macrovalue += "</select>\n";
842 disp.setmacro ("gformselection", displayclass::defaultpackage, macrovalue);
843}
844void queryaction::define_form_macros (displayclass &disp, cgiargsclass &args,
845 recptprotolistclass *protos, ostream &logout) {
846
847 // defines the following macros
848 // _regformlist_
849 // _advformlist_
850
851 if (args["ct"]=="0" || args["qto"]=="1" || (args["qto"]=="3" && args["qt"] == "0") ) // mg, or mgpp/lucene with plain only, or mgpp with both, but set to plain
852 return; // dont need these macros
853
854 text_t form = "";
855 int argfqn = args.getintarg("fqn");
856
857 if (args["b"] == "1") { // advanced form
858 form += "_firstadvformelement_\n";
859 for (int i=1; i<argfqn; ++i) {
860 form += "_advformelement_\n";
861 }
862 disp.setmacro("advformlist", "query", form);
863 }
864 else { // simple form
865 for (int i=0; i<argfqn; ++i) {
866 form += "_regformelement_\n";
867 }
868 disp.setmacro("regformlist", "query", form);
869 }
870
871}
872
873void queryaction::define_history_macros (displayclass &disp, cgiargsclass &args,
874 recptprotolistclass *protos, ostream &logout) {
875
876 // defines the following macros
877 // _searchhistorylist_
878
879 text_t historylist;
880 int arghd = args.getintarg("hd");
881 if (arghd == 0) {
882 historylist="";
883 }
884 else {
885 historylist = "<!-- Search History List -->\n";
886
887 text_t userid = args["z"];
888 text_tarray entries;
889 if (get_history_info (userid, entries, gdbmhome, logout)) {
890 int count = 1;
891 text_tarray::iterator here = entries.begin();
892 text_tarray::iterator end = entries.end();
893 int numrecords=(int)entries.size();
894 if (numrecords>arghd) { // only display some of them
895 numrecords = arghd;
896 }
897 historylist += "<form action=\"_gwcgi_\" name=\"HistoryForm\"><table width=\"537\">\n";
898
899 for (int i=0; i<numrecords;++i) {
900 text_t query;
901 text_t numdocs;
902 text_t cgiargs;
903 text_t userinfo;
904 text_t escquery;
905 split_saved_query(entries[i],numdocs,cgiargs);
906 parse_saved_args(cgiargs, "q", query); // get query string out
907 decode_cgi_arg(query); // un cgisafe it
908 escquery = escape_quotes(query); // escape the quotes and newlines
909 text_t histvalue = "histvalue";
910 histvalue += i;
911 disp.setmacro(histvalue, "query", escquery);
912 format_user_info(cgiargs, userinfo, args, protos, logout);
913
914 historylist += "<tr><td align=\"right\">_imagehistbutton_(";
915 historylist += i;
916 historylist += ")</td>\n";
917 historylist += "<td><table border=\"1\" cellspacing=\"0\" ";
918 historylist += "cellpadding=\"0\"><tr><td width=\"365\" align=\"left\">"
919 + query
920 + "</td></tr></table></td><td width=\"110\" align=\"center\"><small>"
921 + numdocs;
922 if (numdocs == 1) historylist += " _texthresult_";
923 else historylist += " _texthresults_";
924 if (!userinfo.empty()) {
925 historylist += "<br>( "+userinfo+" )";
926 }
927 historylist += "</small></td>\n";
928 }
929 historylist+="</table></form>\n\n";
930
931 } // if get history info
932 else {
933 historylist += "_textnohistory_";
934 }
935 historylist += "<! ---- end of history list ----->\n";
936 } // else display list
937 disp.setmacro("searchhistorylist", "query", historylist);
938
939} // define history macros
940
941void queryaction::output_ccp (cgiargsclass &args, recptprotolistclass *protos,
942 displayclass &disp, outconvertclass &outconvert,
943 ostream &textout, ostream &logout) {
944
945 ColInfoResponse_t *cinfo = NULL;
946 comerror_t err;
947 InfoFilterOptionsResponse_t fresponse;
948 InfoFilterOptionsRequest_t frequest;
949 frequest.filterName = "QueryFilter";
950
951 text_t &index = args["h"];
952 text_t &subcollection = args["j"];
953 text_t &language = args["n"];
954
955 text_tset collections;
956 text_t arg_cc = args["cc"];
957 decode_cgi_arg (arg_cc);
958 splitchar (arg_cc.begin(), arg_cc.end(), ',', collections);
959
960 textout << outconvert << disp << "_query:header_\n"
961 << "<center>_navigationbar_</center><br>\n"
962 << "<form name=\"QueryForm\" method=\"get\" action=\"_gwcgi_\">\n"
963 << "<input type=\"hidden\" name=\"a\" value=\"q\">\n"
964 << "<input type=\"hidden\" name=\"site\" value=\"_cgiargsite_\"\n"
965 << "<input type=\"hidden\" name=\"e\" value=\"_compressedoptions_\">\n"
966 << "<input type=\"hidden\" name=\"ccp\" value=\"1\">\n"
967 << "<center><table width=\"_pagewidth_\"><tr valign=\"top\">\n"
968 << "<td>Select collections to search for \"" << args["q"]
969 << "\" <i>(index=" << index << " subcollection=" << subcollection
970 << " language=" << language << ")</i></td>\n"
971 << "<td><input type=\"submit\" value=\"_query:textbeginsearch_\"></td>\n"
972 << "</tr></table></center>\n"
973 << "<center><table width=\"_pagewidth_\">\n"
974 << "<tr><td>\n";
975
976 recptprotolistclass::iterator rprotolist_here = protos->begin();
977 recptprotolistclass::iterator rprotolist_end = protos->end();
978 while (rprotolist_here != rprotolist_end) {
979 if ((*rprotolist_here).p != NULL) {
980
981 text_tarray collist;
982 (*rprotolist_here).p->get_collection_list (collist, err, logout);
983 if (err == noError) {
984 text_tarray::iterator collist_here = collist.begin();
985 text_tarray::iterator collist_end = collist.end();
986 while (collist_here != collist_end) {
987
988 cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
989 // if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) {
990 if (cinfo != NULL && (cinfo->buildDate > 0)) {
991
992 (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout);
993 if (err == noError) {
994
995 FilterOption_tmap::const_iterator it;
996 FilterOption_tmap::const_iterator end = fresponse.filterOptions.end();
997 if (!index.empty()) {
998 it = fresponse.filterOptions.find ("Index");
999 if (it == end) {++collist_here; continue;}
1000 text_tarray::const_iterator there = (*it).second.validValues.begin();
1001 text_tarray::const_iterator tend = (*it).second.validValues.end();
1002 while (there != tend) {
1003 if (*there == index) break;
1004 ++there;
1005 }
1006 if (there == tend) {++collist_here; continue;}
1007 }
1008 if (!subcollection.empty()) {
1009 it = fresponse.filterOptions.find ("Subcollection");
1010 if (it == end) {++collist_here; continue;}
1011 text_tarray::const_iterator there = (*it).second.validValues.begin();
1012 text_tarray::const_iterator tend = (*it).second.validValues.end();
1013 while (there != tend) {
1014 if (*there == subcollection) break;
1015 ++there;
1016 }
1017 if (there == tend) {++collist_here; continue;}
1018 }
1019 if (!language.empty()) {
1020 it = fresponse.filterOptions.find ("Language");
1021 if (it == end) {++collist_here; continue;}
1022 text_tarray::const_iterator there = (*it).second.validValues.begin();
1023 text_tarray::const_iterator tend = (*it).second.validValues.end();
1024 while (there != tend) {
1025 if (*there == language) break;
1026 ++there;
1027 }
1028 if (there == tend) {++collist_here; continue;}
1029 }
1030
1031 // we've got a matching collection
1032 textout << outconvert << "<input type=\"checkbox\"";
1033
1034 text_tset::const_iterator t = collections.find (*collist_here);
1035 if (t != collections.end()) textout << outconvert << " checked";
1036
1037 text_t collectionname = cinfo->get_collectionmeta("collectionname", args["l"]);
1038 if (collectionname.empty()) {
1039 collectionname = *collist_here;
1040 }
1041 textout << outconvert << disp
1042 << " name=\"cc\" value=\"" << *collist_here << "\">"
1043 << collectionname << "<br>\n";
1044
1045
1046 }
1047 }
1048 ++collist_here;
1049 }
1050 }
1051 }
1052 ++rprotolist_here;
1053 }
1054 textout << outconvert << disp
1055 << "</td></tr></table></center>\n"
1056 << "</form>\n"
1057 << "_query:footer_\n";
1058
1059}
1060
1061bool queryaction::do_action (cgiargsclass &args, recptprotolistclass *protos,
1062 browsermapclass *browsers, displayclass &disp,
1063 outconvertclass &outconvert, ostream &textout,
1064 ostream &logout) {
1065
1066 if (recpt == NULL) {
1067 logout << "ERROR (queryaction::do_action): This action does not contain information\n"
1068 << " about any receptionists. The method set_receptionist was probably\n"
1069 << " not called from the module which instantiated this action.\n";
1070 return true;
1071 }
1072
1073
1074
1075 if (args["ccs"] == "1") {
1076 if (!args["cc"].empty()) {
1077 // query the selected collections
1078 text_t::const_iterator b = args["cc"].begin();
1079 text_t::const_iterator e = args["cc"].end();
1080 if (findchar (b, e, ',') != e) {
1081 if (!search_multiple_collections (args, protos, browsers, disp, outconvert,
1082 textout, logout)) return false;
1083 return true;
1084 } else {
1085 if (!search_single_collection (args, args["cc"], protos, browsers, disp,
1086 outconvert, textout, logout)) return false;
1087 return true;
1088 }
1089 }
1090 }
1091
1092 // simply query the current collection
1093 if (!search_single_collection (args, args["c"], protos, browsers, disp,
1094 outconvert, textout, logout)) return false;
1095 return true;
1096}
1097
1098bool queryaction::search_multiple_collections (cgiargsclass &args, recptprotolistclass *protos,
1099 browsermapclass *browsers, displayclass &disp,
1100 outconvertclass &outconvert, ostream &textout,
1101 ostream &logout) {
1102
1103 text_tarray collections;
1104
1105 text_t arg_cc = args["cc"];
1106 decode_cgi_arg (arg_cc);
1107 splitchar (arg_cc.begin(), arg_cc.end(), ',', collections);
1108
1109 if (collections.empty()) {
1110 logout << "queryaction::search_multiple_collections: No collections "
1111 << "set for doing multiple query - will search current collection\n";
1112 textout << outconvert << disp << "_query:textwarningnocollections_\n";
1113 return search_single_collection (args, args["c"], protos, browsers, disp,
1114 outconvert, textout, logout);
1115 }
1116
1117 // queryaction uses "VList" browser to display results,
1118 // a queries clasification is "Search"
1119 text_t browsertype = "VList";
1120 text_t classification = "Search";
1121
1122 QueryResult_tset results;
1123 map<text_t, colinfo_t, lttext_t> colinfomap;
1124
1125 ColInfoResponse_t *cinfo = NULL;
1126 recptproto *collectproto = NULL;
1127 comerror_t err;
1128 FilterRequest_t request;
1129 FilterResponse_t response;
1130 request.filterResultOptions = FROID | FRmetadata | FRtermFreq | FRranking;
1131 text_t freqmsg = "_textfreqmsg1_";
1132 int numdocs = 0;
1133 isapprox isApprox = Exact;
1134
1135 // what to do about segmentation for multiple colls??
1136 bool segment = false;
1137 text_t formattedstring = "";
1138 get_formatted_query_string(formattedstring, segment, args, disp, logout);
1139
1140 if (formattedstring.empty()) {
1141 // dont bother doing a query if no query string
1142 define_history_macros (disp, args, protos, logout);
1143 textout << outconvert << disp << "_query:header_\n"
1144 << "_query:content_";
1145 textout << outconvert << disp << "_query:footer_";
1146
1147 return true;
1148 }
1149 bool syntax_error = false;
1150
1151 set_queryfilter_options (request, formattedstring, args);
1152
1153 // need to retrieve maxdocs matches for each collection
1154 // (will eventually want to tidy this up, do so caching etc.)
1155 OptionValue_t option;
1156 option.name = "StartResults";
1157 option.value = "1";
1158 request.filterOptions.push_back (option);
1159
1160 option.name = "EndResults";
1161 option.value = args["m"];
1162 request.filterOptions.push_back (option);
1163
1164 text_tarray::iterator col_here = collections.begin();
1165 text_tarray::iterator col_end = collections.end();
1166
1167 map<text_t, int, lttext_t> termfreqs;
1168
1169 // just check the main col for formatting info - use individual format statements, or the main one?
1170
1171 browserclass *bptr = browsers->getbrowser (browsertype);
1172
1173 text_t main_col = args["c"];
1174 cinfo = recpt->get_collectinfo_ptr (collectproto, main_col, logout);
1175 if (cinfo == NULL) {
1176 logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL for '"<<main_col<<"'\n";
1177 return false;
1178 }
1179
1180 bool use_main_col_format = false;
1181 if (cinfo->ccsOptions & CCSUniformSearchResultsFormatting) {
1182 use_main_col_format = true;
1183 }
1184
1185 request.fields.erase (request.fields.begin(), request.fields.end());
1186 request.getParents = false;
1187 bptr->load_metadata_defaults (request.fields);
1188
1189 text_t formatstring;
1190 format_t *formatlistptr = new format_t();
1191 if (use_main_col_format) {
1192 // just get one format for main coll and use it for each subcol
1193 if (!get_formatstring (classification, browsertype,
1194 cinfo->format, formatstring)) {
1195 formatstring = bptr->get_default_formatstring();
1196 }
1197
1198 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1199 }
1200
1201 while (col_here != col_end) {
1202
1203 collectproto = protos->getrecptproto (*col_here, logout);
1204 if (collectproto == NULL) {
1205 logout << outconvert << "queryaction::search_multiple_collections: " << *col_here
1206 << " collection has a NULL collectproto, ignoring\n";
1207 ++col_here;
1208 continue;
1209 }
1210 cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout);
1211 if (cinfo == NULL) {
1212 logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL\n";
1213 ++col_here;
1214 continue;
1215 }
1216
1217 if (!use_main_col_format) {
1218 request.fields.erase (request.fields.begin(), request.fields.end());
1219 request.getParents = false;
1220 bptr->load_metadata_defaults (request.fields);
1221
1222 //browserclass *bptr = browsers->getbrowser (browsertype);
1223
1224 // get the formatstring if there is one
1225 if (!get_formatstring (classification, browsertype,
1226 cinfo->format, formatstring)) {
1227 formatstring = bptr->get_default_formatstring();
1228 }
1229
1230 formatlistptr = new format_t();
1231 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1232 }
1233
1234 colinfo_t thiscolinfo;
1235 thiscolinfo.formatlistptr = formatlistptr;
1236 thiscolinfo.browserptr = bptr;
1237 colinfomap[*col_here] = thiscolinfo;
1238
1239 // do the query
1240 collectproto->filter (*col_here, request, response, err, logout);
1241 if (err != noError && err != syntaxError) {
1242 outconvertclass text_t2ascii;
1243 logout << text_t2ascii
1244 << "queryaction::search_multiple_collections: call to QueryFilter failed "
1245 << "for " << *col_here << " collection (" << get_comerror_string (err) << ")\n";
1246 return false;
1247 }
1248
1249 if (err == syntaxError) {
1250 syntax_error = true;
1251 freqmsg = "_textinvalidquery_";
1252 // assume the syntax will be invalid for all colls
1253 break;
1254 }
1255 if (response.error_message == "TOO_MANY_CLAUSES") {
1256 freqmsg = "_textlucenetoomanyclauses_";
1257 break;
1258 }
1259 if (isApprox == Exact)
1260 isApprox = response.isApprox;
1261 else if (isApprox == MoreThan)
1262 if (response.isApprox == Approximate)
1263 isApprox = response.isApprox;
1264
1265 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
1266 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
1267 while (this_term != end_term) {
1268 termfreqs[(*this_term).term] += (*this_term).freq;
1269 if ((col_here+1) == col_end) {
1270 freqmsg += (*this_term).term + ": " + termfreqs[(*this_term).term];
1271 if ((this_term+1) != end_term) freqmsg += ", ";
1272 }
1273 ++this_term;
1274 }
1275
1276 if (response.numDocs > 0) {
1277 numdocs += response.numDocs;
1278
1279 QueryResult_t thisresult;
1280 thisresult.collection = *col_here;
1281 ResultDocInfo_tarray::iterator doc_here = response.docInfo.begin();
1282 ResultDocInfo_tarray::iterator doc_end = response.docInfo.end();
1283 while (doc_here != doc_end) {
1284 thisresult.doc = *doc_here;
1285 results.insert (thisresult);
1286 ++doc_here;
1287 }
1288 }
1289 ++col_here;
1290 } // for each coll
1291
1292 disp.setmacro ("freqmsg", "query", freqmsg);
1293
1294 define_query_macros( args, disp, numdocs, isApprox);
1295 // save the query if appropriate
1296 save_search_history(args, numdocs, isApprox);
1297 define_history_macros (disp, args, protos, logout);
1298
1299 textout << outconvert << disp << "_query:header_\n"
1300 << "_query:content_";
1301
1302 if (!syntax_error) {
1303
1304 // now go through each result and output it
1305 QueryResult_tset::iterator res_here = results.begin();
1306 QueryResult_tset::iterator res_end = results.end();
1307 text_tset metadata; // empty !!
1308 bool getParents = false; // don't care !!
1309 bool use_table;
1310 ResultDocInfo_t thisdoc;
1311 format_t *formatlistptr = NULL;
1312 browserclass *browserptr = NULL;
1313
1314 int count = 1;
1315 int firstdoc = args.getintarg("r");
1316 int hitsperpage = args.getintarg("o");
1317 int thislast = firstdoc + (hitsperpage - 1);
1318
1319 // output results
1320 while (res_here != res_end) {
1321 if (count < firstdoc) {++count; ++res_here; continue;}
1322 if (count > thislast) break;
1323 formatlistptr = colinfomap[(*res_here).collection].formatlistptr;
1324 browserptr = colinfomap[(*res_here).collection].browserptr;
1325 thisdoc = (*res_here).doc;
1326 use_table = is_table_content (formatlistptr);
1327
1328 collectproto = protos->getrecptproto ((*res_here).collection, logout);
1329 if (collectproto == NULL) {
1330 logout << outconvert << "queryaction::search_multiple_collections: " << (*res_here).collection
1331 << " collection has a NULL collectproto, ignoring results\n";
1332 ++res_here;
1333 continue;
1334 }
1335
1336 browserptr->output_section_group (thisdoc, args, (*res_here).collection, 0,
1337 formatlistptr, use_table, metadata, getParents,
1338 collectproto, disp, outconvert, textout, logout);
1339 // textout << outconvert << "(ranking: " << (*res_here).doc.ranking << ")\n";
1340 ++res_here;
1341 ++count;
1342 }
1343 }
1344 textout << outconvert << disp << "_query:footer_";
1345
1346 // clean up the format_t pointers
1347 map<text_t, colinfo_t, lttext_t>::iterator here = colinfomap.begin();
1348 map<text_t, colinfo_t, lttext_t>::iterator end = colinfomap.end();
1349 while (here != end) {
1350 delete ((*here).second.formatlistptr);
1351 ++here;
1352 }
1353 return true;
1354}
1355
1356bool queryaction::search_single_collection (cgiargsclass &args, const text_t &collection,
1357 recptprotolistclass *protos, browsermapclass *browsers,
1358 displayclass &disp, outconvertclass &outconvert,
1359 ostream &textout, ostream &logout) {
1360
1361 recptproto *collectproto = protos->getrecptproto (collection, logout);
1362 if (collectproto == NULL) {
1363 logout << outconvert << "queryaction::search_single_collection: " << collection
1364 << " collection has a NULL collectproto\n";
1365
1366 // Display the "this collection is not installed on this system" page
1367 disp.setmacro("cvariable", displayclass::defaultpackage, collection);
1368 disp.setmacro("content", "query", "<p>_textbadcollection_<p>");
1369
1370 textout << outconvert << disp << "_query:header_\n"
1371 << "_query:content_\n" << "_query:footer_\n";
1372 return true;
1373 }
1374
1375 // queryaction uses "VList" browser to display results,
1376 // a queries clasification is "Search"
1377 text_t browsertype = "VList";
1378 text_t classification = "Search";
1379
1380 comerror_t err;
1381 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, collection, logout);
1382
1383 if (cinfo == NULL) {
1384 logout << "ERROR (query_action::search_single_collection): get_collectinfo_ptr returned NULL\n";
1385 return false;
1386 }
1387
1388 bool segment = cinfo->isSegmented;
1389 browserclass *bptr = browsers->getbrowser (browsertype);
1390
1391 // get the formatstring if there is one
1392 text_t formatstring;
1393 if (!get_formatstring (classification, browsertype,
1394 cinfo->format, formatstring)) {
1395 formatstring = bptr->get_default_formatstring();
1396 }
1397 FilterRequest_t request;
1398 FilterResponse_t response;
1399 bptr->set_filter_options (request, args);
1400 bptr->load_metadata_defaults (request.fields);
1401
1402 format_t *formatlistptr = new format_t();
1403 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
1404
1405 // do the query
1406 request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
1407 text_t formattedstring = "";
1408 get_formatted_query_string(formattedstring, segment, args, disp, logout);
1409
1410 if (!formattedstring.empty()) { // do the query
1411 // note! formattedstring is in unicode! mg and mgpp must convert!
1412 set_queryfilter_options (request, formattedstring, args);
1413 collectproto->filter (collection, request, response, err, logout);
1414 if (err != noError) {
1415 outconvertclass text_t2ascii;
1416 logout << text_t2ascii
1417 << "queryaction::search_single_collections: call to QueryFilter failed "
1418 << "for " << collection << " collection (" << get_comerror_string (err) << ")\n";
1419
1420 }
1421
1422 if (err == noError && !args["ifl"].empty()) {
1423 // Go directly to the first matching document
1424 ResultDocInfo_tarray::iterator thissection = response.docInfo.begin();
1425 if (thissection != response.docInfo.end()) {
1426 // This URL must have "&" and not "&amp;"!
1427 text_t doc_url = "_gwcgi_?e=_compressedoptions_&a=d&c=" + collection + "&d=" + (*thissection).OID;
1428
1429 // location response (url may contain macros!!)
1430 textout << outconvert << disp << "Location: " << doc_url << "\n\n";
1431 textout << flush;
1432 return true;
1433 }
1434 // Ooops... there weren't any matching documents
1435 else {
1436 // We'll just carry on as if ifl wasn't set. The only catch is that
1437 // get_cgihead_info won't have done the right thing (because ifl was
1438 // set), so we need to make sure the output is html
1439 textout << "Content-type: text/html\n\n";
1440 }
1441 }
1442
1443 if (err != noError) {
1444 disp.setmacro("resultline", "query", "_textnodocs_");
1445 if (err == syntaxError) {
1446 disp.setmacro ("freqmsg", "query", "_textinvalidquery_");
1447 } else {
1448 disp.setmacro ("freqmsg", "query", "");
1449 }
1450 } else {
1451
1452 define_query_macros (args, disp, response.numDocs, response.isApprox);
1453 define_single_query_macros(args, disp, response);
1454 // save the query if appropriate
1455 save_search_history(args, response.numDocs, response.isApprox);
1456 }
1457
1458 // If Lucene threw a TooManyClauses exception, tell the user about it
1459 if (args["ct"] == 2 && response.error_message == "TOO_MANY_CLAUSES") {
1460 disp.setmacro ("freqmsg", "query", "_textlucenetoomanyclauses_");
1461 }
1462 }
1463 define_history_macros (disp, args, protos, logout);
1464
1465 textout << outconvert << disp << "_query:header_\n"
1466 << "_query:content_";
1467
1468 if (err == noError) {
1469 // output the results
1470 bool use_table = is_table_content (formatlistptr);
1471 bptr->output_section_group (response, args, collection, 0, formatlistptr,
1472 use_table, request.fields, request.getParents,
1473 collectproto, disp, outconvert, textout, logout);
1474 }
1475
1476 textout << outconvert << disp << "_query:footer_";
1477
1478 delete (formatlistptr);
1479
1480 return true;
1481}
1482
1483// does the formatting of the query string - either uses q for a text search
1484// or the form values for an form search
1485// also adds dates if appropriate in text search
1486void queryaction::get_formatted_query_string (text_t &formattedstring,
1487 bool segment,
1488 cgiargsclass &args,
1489 displayclass &disp,
1490 ostream &logout) {
1491 if (args["qt"]=="0" && args["qto"] != "2") { // normal text search
1492 formattedstring = args["q"];
1493 // remove & | ! for simple search,do segmentation if necessary
1494 format_querystring (formattedstring, args.getintarg("b"), segment);
1495 if (args["ct"]!=0) { // mgpp and lucene - need to add in tag info if appropriate
1496 format_field_info(formattedstring, args["fqf"], args.getintarg("ct"),
1497 args.getintarg("t"), args.getintarg("b"));
1498 }
1499
1500 add_dates(formattedstring, args.getintarg("ds"), args.getintarg("de"),
1501 args.getintarg("dsbc"), args.getintarg("debc"),
1502 args.getintarg("ct"));
1503 args["q"] = formattedstring;
1504
1505 }
1506 else if (args["qt"]=="1" || args["qto"]=="2"){ // form search
1507
1508 if (args["b"]=="1" && args["fqa"]=="1") { // explicit query
1509 formattedstring = args["q"];
1510 }
1511 else { // form search
1512 if (args["b"]=="0") { // regular form
1513 parse_reg_query_form(formattedstring, args, segment);
1514 }
1515 else { // advanced form
1516 parse_adv_query_form(formattedstring, args, segment);
1517 }
1518 args["q"] = formattedstring;
1519
1520 // reset the cgiargfqv macro - need to escape any quotes in it
1521 disp.setmacro("cgiargfqv", "query", escape_quotes(args["fqv"]));
1522
1523 // also reset the _cgiargq_ macro as it has changed now
1524 disp.setmacro("cgiargq", displayclass::defaultpackage, html_safe(args["q"]));
1525
1526 // reset the compressed options to include the q arg
1527 text_t compressedoptions = recpt->get_compressed_arg(args, logout);
1528 if (!compressedoptions.empty()) {
1529 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1530 // need a decoded version of compressedoptions for use within forms
1531 // as browsers encode values from forms before sending to server
1532 // (e.g. %25 becomes %2525)
1533 decode_cgi_arg (compressedoptions);
1534 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1535 }
1536 } // form search
1537 } // args["qt"]=1
1538 else {
1539 logout << "ERROR (query_action::get_formatted_query_string): querytype not defined\n";
1540 }
1541}
1542
1543
1544// define_query_macros sets the macros that couldn't be set until the
1545// query had been done. Those macros are
1546// _resultline_, _nextfirst_, _nextlast_, _prevfirst_, _prevlast_,
1547// _thisfirst_, and _thislast_ and _quotedquery_
1548// this has been simplified so it can be used with both search_single_coll
1549// and search_multiple_coll
1550void queryaction::define_query_macros (cgiargsclass &args, displayclass &disp,
1551 int numdocs, isapprox isApprox) {
1552
1553 // set up _resultline_ macro
1554 text_t resline;
1555 int maxdocs = args.getintarg("m");
1556 if (num_phrases > 0) isApprox = Exact;
1557 if (maxdocs == -1) maxdocs = numdocs;
1558 else if (numdocs > maxdocs) {
1559 numdocs = maxdocs;
1560 isApprox = MoreThan;
1561 }
1562
1563 if (isApprox == Approximate) resline = "_textapprox_";
1564 else if (isApprox == MoreThan) resline = "_textmorethan_";
1565
1566 if (numdocs == 0) resline = "_textnodocs_";
1567 else if (numdocs == 1) resline += "_text1doc_";
1568 else resline += text_t(numdocs) + " _textlotsdocs_";
1569
1570 disp.setmacro("resultline", "query", resline);
1571
1572 int firstdoc = args.getintarg("r");
1573 int hitsperpage = args.getintarg("o");
1574 if (hitsperpage == -1) hitsperpage = numdocs;
1575
1576 // set up _thisfirst_ and _thislast_ macros
1577 disp.setmacro ("thisfirst", "query", firstdoc);
1578 int thislast = firstdoc + (hitsperpage - 1);
1579 if (thislast > numdocs) thislast = numdocs;
1580 disp.setmacro ("thislast", "query", thislast);
1581
1582 // set up _prevfirst_ and _prevlast_ macros
1583 if (firstdoc > 1) {
1584 disp.setmacro ("prevlast", "query", firstdoc - 1);
1585 int prevfirst = firstdoc - hitsperpage;
1586 if (prevfirst < 1) prevfirst = 1;
1587 disp.setmacro ("prevfirst", "query", prevfirst);
1588 }
1589
1590 // set up _nextfirst_ and _nextlast_ macros
1591 if (thislast < numdocs) {
1592 disp.setmacro ("nextfirst", "query", thislast + 1);
1593 int nextlast = thislast + hitsperpage;
1594 if (nextlast > numdocs) nextlast = numdocs;
1595 disp.setmacro ("nextlast", "query", nextlast);
1596 }
1597
1598 // do quoted query here cos we may have added quotes during query pre-processing
1599 if (args["ct"]==0) { // mg queries only, not mgpp
1600 // get the quoted bits of the query string and set _quotedquery_
1601 text_tarray phrases;
1602 get_phrases (args["q"], phrases);
1603 num_phrases = phrases.size();
1604 text_tarray::const_iterator phere = phrases.begin();
1605 text_tarray::const_iterator pend = phrases.end();
1606 bool first = true;
1607 text_t quotedquery;
1608 while (phere != pend) {
1609 if (!first)
1610 if ((phere +1) == pend) quotedquery += " and ";
1611 else quotedquery += ", ";
1612
1613 quotedquery += "\"" + *phere + "\"";
1614 first = false;
1615 ++phere;
1616 }
1617 if (args.getintarg("s") && !quotedquery.empty()) quotedquery += "_textstemon_";
1618 disp.setmacro ("quotedquery", "query", quotedquery);
1619 }
1620
1621}
1622
1623// define_single_query_macros sets the extra macros for search_single_coll
1624// that couldn't be set until the query had been done. Those macros are
1625// _freqmsg_ and _stopwordsmsg_
1626void queryaction::define_single_query_macros (cgiargsclass &args,
1627 displayclass &disp,
1628 const FilterResponse_t &response) {
1629 // set up _freqmsg_ and _stopwordsmsg_ macros
1630
1631 text_t freqmsg = "";
1632 freqmsg = "_textfreqmsg1_";
1633 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
1634 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
1635 while (this_term != end_term) {
1636 freqmsg += (*this_term).term + ": " + (*this_term).freq;
1637 if ((this_term + 1) != end_term)
1638 freqmsg += ", ";
1639 ++this_term;
1640 }
1641 disp.setmacro ("freqmsg", "query", freqmsg);
1642
1643 text_tset::const_iterator this_stopword = response.stopwords.begin();
1644 text_tset::const_iterator end_stopword = response.stopwords.end();
1645 if (this_stopword != end_stopword) {
1646 text_t stopwordsmsg = "_textstopwordsmsg_ ";
1647 while (this_stopword != end_stopword) {
1648 if (stopwordsmsg != "_textstopwordsmsg_ ") {
1649 stopwordsmsg += ", ";
1650 }
1651 stopwordsmsg += (*this_stopword);
1652 ++this_stopword;
1653 }
1654 disp.setmacro("stopwordsmsg", "query", stopwordsmsg);
1655 }
1656}
1657
1658// should this change for cross coll search??
1659bool queryaction::save_search_history (cgiargsclass &args, int numdocs,
1660 isapprox isApprox) {
1661 if (args["q"]=="") return true; // null query, dont save
1662 if (args["hs"]=="0") return true; // only save when submit query pressed
1663
1664 // get userid
1665 text_t userid = args["z"];
1666
1667 // the number of docs goes on the front of the query string
1668 text_t query = text_t(numdocs);
1669 if (isApprox==MoreThan) { // there were more docs found
1670 query.push_back('+');
1671 }
1672 query += "c="+args["c"];
1673 query += ";h="+args["h"];
1674 query += ";t="+args["t"];
1675 query += ";b="+args["b"];
1676 query += ";j="+args["j"];
1677 query += ";n="+args["n"];
1678 query += ";s="+args["s"];
1679 query += ";k="+args["k"];
1680 query += ";g="+args["g"];
1681
1682 text_t qstring = args["q"];
1683 //text_t formattedquery =cgi_safe(qstring);
1684 //query += "&amp;q="+formattedquery;
1685 query += ";q="+qstring;
1686 bool display=false;
1687 int hd = args.getintarg("hd");
1688 if (hd > 0) display=true;
1689 if (set_history_info(userid, query, gdbmhome, display)) return true;
1690 else return false;
1691
1692
1693}
1694
Note: See TracBrowser for help on using the repository browser.