source: trunk/gsdl/src/recpt/queryaction.cpp@ 470

Last change on this file since 470 was 470, checked in by sjboddie, 25 years ago

added advanced search option - other minor changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 19.9 KB
Line 
1/**********************************************************************
2 *
3 * queryaction.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryaction.cpp 470 1999-08-25 04:47:55Z sjboddie $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.20 1999/08/25 04:47:55 sjboddie
15 added advanced search option - other minor changes
16
17 Revision 1.19 1999/08/13 04:17:24 sjboddie
18 small change to do with new collection-level metadata
19
20 Revision 1.18 1999/08/10 22:46:33 sjboddie
21 changed format option result to QueryResults and added QueryLinks option
22
23 Revision 1.17 1999/07/30 02:24:42 sjboddie
24 added collectinfo argument to some functions
25
26 Revision 1.16 1999/07/19 00:16:58 sjboddie
27 no longer display documents that don't match all phrases in query string
28
29 Revision 1.15 1999/07/16 08:33:36 rjmcnab
30 Changed the logic for getting the results string slightly
31
32 Revision 1.14 1999/07/16 03:41:29 sjboddie
33 changed isApprox
34
35 Revision 1.13 1999/07/16 00:19:01 sjboddie
36 some changes to the way quoted queries are handled
37
38 Revision 1.12 1999/07/09 02:17:55 rjmcnab
39 Setting macros needed for a second query.
40
41 Revision 1.11 1999/07/07 06:13:10 rjmcnab
42 Added ability to combine two independant queries.
43
44 Revision 1.10 1999/07/07 05:49:35 sjboddie
45 had another crack at the format string code - created a new formattools
46 module. It can now handle {If} and {Or} statements although there's a
47 bug preventing nested if's and or's.
48
49 Revision 1.9 1999/07/01 22:48:46 sjboddie
50 had a go at getting a query result format string working
51
52 Revision 1.8 1999/06/27 22:02:11 sjboddie
53 author is added to queryresults if there is one
54
55 Revision 1.7 1999/06/26 01:10:18 rjmcnab
56 Made h, i, and n arguments saved in the compressed arguments.
57
58 Revision 1.6 1999/06/24 05:12:25 sjboddie
59 lots of small changes
60
61 Revision 1.5 1999/06/16 04:03:48 sjboddie
62 Now sets "cl" arg to "search" when going to a document from a search
63 results page. This allows the close book icon (in hierarchy toc) to
64 take you back to the results page if that's where you came from.
65 If you got to the document page somehow other than from a
66 classification or a search (i.e. if "cl" isn't set) then the close
67 book icon is disabled
68
69 Revision 1.4 1999/06/16 02:08:38 sjboddie
70 got queryaction working
71
72 Revision 1.3 1999/03/25 03:06:45 sjboddie
73
74 altered receptionist slightly so it now passes *collectproto to
75 define_internal_macros and define_external_macros - need it
76 for browseaction
77
78 Revision 1.2 1999/03/03 20:26:50 rjmcnab
79
80 Modified stuff.
81
82 Revision 1.1 1999/02/28 22:45:21 rjmcnab
83
84 Initial revision.
85
86 */
87
88
89#include "queryaction.h"
90#include "querytools.h"
91#include "formattools.h"
92
93queryaction::queryaction () {
94
95 num_phrases = 0;
96
97 // this action uses cgi variable "a"
98 cgiarginfo arg_ainfo;
99 arg_ainfo.shortname = "a";
100 arg_ainfo.longname = "action";
101 arg_ainfo.multiplechar = true;
102 arg_ainfo.defaultstatus = cgiarginfo::weak;
103 arg_ainfo.argdefault = "q";
104 arg_ainfo.savedarginfo = cgiarginfo::must;
105 argsinfo.addarginfo (NULL, arg_ainfo);
106
107 // "b" - 0 = simple, 1 = advanced
108 arg_ainfo.shortname = "b";
109 arg_ainfo.longname = "query mode";
110 arg_ainfo.multiplechar = false;
111 arg_ainfo.defaultstatus = cgiarginfo::weak;
112 arg_ainfo.argdefault = "0";
113 arg_ainfo.savedarginfo = cgiarginfo::must;
114 argsinfo.addarginfo (NULL, arg_ainfo);
115
116 // "h"
117 arg_ainfo.shortname = "h";
118 arg_ainfo.longname = "main index";
119 arg_ainfo.multiplechar = true;
120 arg_ainfo.defaultstatus = cgiarginfo::weak;
121 arg_ainfo.argdefault = "";
122 arg_ainfo.savedarginfo = cgiarginfo::must;
123 argsinfo.addarginfo (NULL, arg_ainfo);
124
125 // "h2"
126 arg_ainfo.shortname = "h2";
127 arg_ainfo.longname = "main index for second query";
128 arg_ainfo.multiplechar = true;
129 arg_ainfo.defaultstatus = cgiarginfo::weak;
130 arg_ainfo.argdefault = "";
131 arg_ainfo.savedarginfo = cgiarginfo::must;
132 argsinfo.addarginfo (NULL, arg_ainfo);
133
134 // "j"
135 arg_ainfo.shortname = "j";
136 arg_ainfo.longname = "sub collection index";
137 arg_ainfo.multiplechar = true;
138 arg_ainfo.defaultstatus = cgiarginfo::weak;
139 arg_ainfo.argdefault = "";
140 arg_ainfo.savedarginfo = cgiarginfo::must;
141 argsinfo.addarginfo (NULL, arg_ainfo);
142
143 // "j2"
144 arg_ainfo.shortname = "j2";
145 arg_ainfo.longname = "sub collection index for second query";
146 arg_ainfo.multiplechar = true;
147 arg_ainfo.defaultstatus = cgiarginfo::weak;
148 arg_ainfo.argdefault = "";
149 arg_ainfo.savedarginfo = cgiarginfo::must;
150 argsinfo.addarginfo (NULL, arg_ainfo);
151
152 // "n"
153 arg_ainfo.shortname = "n";
154 arg_ainfo.longname = "language index";
155 arg_ainfo.multiplechar = true;
156 arg_ainfo.defaultstatus = cgiarginfo::weak;
157 arg_ainfo.argdefault = "";
158 arg_ainfo.savedarginfo = cgiarginfo::must;
159 argsinfo.addarginfo (NULL, arg_ainfo);
160
161 // "n2"
162 arg_ainfo.shortname = "n2";
163 arg_ainfo.longname = "language index for second query";
164 arg_ainfo.multiplechar = true;
165 arg_ainfo.defaultstatus = cgiarginfo::weak;
166 arg_ainfo.argdefault = "";
167 arg_ainfo.savedarginfo = cgiarginfo::must;
168 argsinfo.addarginfo (NULL, arg_ainfo);
169
170 // "q"
171 arg_ainfo.shortname = "q";
172 arg_ainfo.longname = "query string";
173 arg_ainfo.multiplechar = true;
174 arg_ainfo.defaultstatus = cgiarginfo::weak;
175 arg_ainfo.argdefault = "";
176 arg_ainfo.savedarginfo = cgiarginfo::must;
177 argsinfo.addarginfo (NULL, arg_ainfo);
178
179 // "q2"
180 arg_ainfo.shortname = "q2";
181 arg_ainfo.longname = "query string for second query";
182 arg_ainfo.multiplechar = true;
183 arg_ainfo.defaultstatus = cgiarginfo::weak;
184 arg_ainfo.argdefault = "";
185 arg_ainfo.savedarginfo = cgiarginfo::must;
186 argsinfo.addarginfo (NULL, arg_ainfo);
187
188 // "cq2" ""=don't combine, "and", "or", "not"
189 arg_ainfo.shortname = "cq2";
190 arg_ainfo.longname = "combine queries";
191 arg_ainfo.multiplechar = true;
192 arg_ainfo.defaultstatus = cgiarginfo::weak;
193 arg_ainfo.argdefault = "";
194 arg_ainfo.savedarginfo = cgiarginfo::must;
195 argsinfo.addarginfo (NULL, arg_ainfo);
196
197 // "t" - 1 = ranked 0 = boolean
198 arg_ainfo.shortname = "t";
199 arg_ainfo.longname = "search type";
200 arg_ainfo.multiplechar = false;
201 arg_ainfo.defaultstatus = cgiarginfo::weak;
202 arg_ainfo.argdefault = "1";
203 arg_ainfo.savedarginfo = cgiarginfo::must;
204 argsinfo.addarginfo (NULL, arg_ainfo);
205
206 // "k"
207 arg_ainfo.shortname = "k";
208 arg_ainfo.longname = "casefolding";
209 arg_ainfo.multiplechar = false;
210 arg_ainfo.defaultstatus = cgiarginfo::weak;
211 arg_ainfo.argdefault = "1";
212 arg_ainfo.savedarginfo = cgiarginfo::must;
213 argsinfo.addarginfo (NULL, arg_ainfo);
214
215 // "s"
216 arg_ainfo.shortname = "s";
217 arg_ainfo.longname = "stemming";
218 arg_ainfo.multiplechar = false;
219 arg_ainfo.defaultstatus = cgiarginfo::weak;
220 arg_ainfo.argdefault ="0";
221 arg_ainfo.savedarginfo = cgiarginfo::must;
222 argsinfo.addarginfo (NULL, arg_ainfo);
223
224 // "m"
225 arg_ainfo.shortname = "m";
226 arg_ainfo.longname = "maximum number of documents";
227 arg_ainfo.multiplechar = true;
228 arg_ainfo.defaultstatus = cgiarginfo::weak;
229 arg_ainfo.argdefault = "50";
230 arg_ainfo.savedarginfo = cgiarginfo::must;
231 argsinfo.addarginfo (NULL, arg_ainfo);
232
233 // "o"
234 arg_ainfo.shortname = "o";
235 arg_ainfo.longname = "hits per page";
236 arg_ainfo.multiplechar = true;
237 arg_ainfo.defaultstatus = cgiarginfo::weak;
238 arg_ainfo.argdefault = "20";
239 arg_ainfo.savedarginfo = cgiarginfo::must;
240 argsinfo.addarginfo (NULL, arg_ainfo);
241
242 // "r"
243 arg_ainfo.shortname = "r";
244 arg_ainfo.longname = "start results from";
245 arg_ainfo.multiplechar = true;
246 arg_ainfo.defaultstatus = cgiarginfo::weak;
247 arg_ainfo.argdefault = "1";
248 arg_ainfo.savedarginfo = cgiarginfo::must;
249 argsinfo.addarginfo (NULL, arg_ainfo);
250}
251
252void queryaction::configure (const text_t &key, const text_tarray &cfgline) {
253 action::configure (key, cfgline);
254}
255
256bool queryaction::init (ostream &logout) {
257 return action::init (logout);
258}
259
260bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
261 ostream &logout) {
262
263 // check t argument
264 int arg_t = args.getintarg("t");
265 if (arg_t != 0 && arg_t != 1) {
266 logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n";
267 cgiarginfo *tinfo = argsinfo.getarginfo ("t");
268 if (tinfo != NULL) args["t"] = tinfo->argdefault;
269 }
270
271 // check k argument
272 int arg_k = args.getintarg("k");
273 if (arg_k != 0 && arg_k != 1) {
274 logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n";
275 cgiarginfo *kinfo = argsinfo.getarginfo ("k");
276 if (kinfo != NULL) args["k"] = kinfo->argdefault;
277 }
278
279 // check s argument
280 int arg_s = args.getintarg("s");
281 if (arg_s != 0 && arg_s != 1) {
282 logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n";
283 cgiarginfo *sinfo = argsinfo.getarginfo ("s");
284 if (sinfo != NULL) args["s"] = sinfo->argdefault;
285 }
286
287 // check m argument
288 int arg_m = args.getintarg("m");
289 if (arg_m < 0) {
290 logout << "Warning: \"m\" argument less than 0 (" << arg_m << ")\n";
291 cgiarginfo *minfo = argsinfo.getarginfo ("m");
292 if (minfo != NULL) args["m"] = minfo->argdefault;
293 }
294
295 // check o argument
296 int arg_o = args.getintarg("o");
297 if (arg_o < 0) {
298 logout << "Warning: \"o\" argument less than 0 (" << arg_o << ")\n";
299 cgiarginfo *oinfo = argsinfo.getarginfo ("o");
300 if (oinfo != NULL) args["o"] = oinfo->argdefault;
301 }
302
303 // check r argument
304 int arg_r = args.getintarg("r");
305 if (arg_r < 1) {
306 logout << "Warning: \"r\" argument less than 1 (" << arg_r << ")\n";
307 cgiarginfo *rinfo = argsinfo.getarginfo ("r");
308 if (rinfo != NULL) args["r"] = rinfo->argdefault;
309 }
310
311 return true;
312}
313
314void queryaction::get_cgihead_info (cgiargsclass &/*args*/, response_t &response,
315 text_t &response_data, ostream &/*logout*/) {
316 response = content;
317 response_data = "text/html";
318}
319
320void queryaction::define_internal_macros (const ColInfoResponse_t &/*collectinfo*/, displayclass &disp,
321 cgiargsclass &args, recptproto */*collectproto*/,
322 ostream &/*logout*/) {
323
324 // define_internal_macros sets the following macros:
325
326 // _quotedquery_ the part of the query string that was quoted for post-processing
327
328
329
330 // The following macros are set later (in define_query_macros) as they can't be set until
331 // the query has been done.
332
333 // _freqmsg_ the term frequency string
334
335 // _resultline_ the "x documents matched the query" string
336
337 // _prevfirst_ these are used when setting up the links to previous/next
338 // _prevlast_ pages of results (_thisfirst_ and _thislast_ are used to set
339 // _nextfirst_ the 'results x-x for query: xxxx' string in the title bar)
340 // _nextlast_
341 // _thisfirst_
342 // _thislast_
343
344
345 // get the quoted bits of the query string and set _quotedquery_
346 text_tarray phrases;
347 get_phrases (args["q"], phrases);
348 text_tarray::const_iterator phere = phrases.begin();
349 text_tarray::const_iterator pend = phrases.end();
350 bool first = true;
351 text_t quotedquery;
352 while (phere != pend) {
353 if (!first)
354 if ((phere +1) == pend) quotedquery += " and ";
355 else quotedquery += ", ";
356
357 quotedquery += "\"" + *phere + "\"";
358 first = false;
359 phere ++;
360 }
361 if (args.getintarg("s")) quotedquery += "_textstemon_";
362 disp.setmacro ("quotedquery", "query", quotedquery);
363
364 // we'll also set num_phrases here so we don't have to parse the
365 // querystring again later (we need to know this before outputting
366 // results so we don't include results for documents not containing
367 // all requested phrases).
368 num_phrases = phrases.size();
369
370}
371
372// sets the selection box macros _hselection_, _jselection_, and _nselection_.
373void queryaction::set_option_macro (const text_t &macroname, text_t current_value,
374 const FilterOption_t &option, displayclass &disp) {
375
376 if (option.validValues.size() < 2) return;
377
378 text_t macrovalue = "<select name=\"" + macroname + "\">\n";
379
380 if (current_value.empty()) current_value = option.defaultValue;
381
382 text_tarray::const_iterator thisvalue = option.validValues.begin();
383 text_tarray::const_iterator endvalue = option.validValues.end();
384
385 while (thisvalue != endvalue) {
386 macrovalue += "<option value=\"" + *thisvalue + "\"";
387 if (*thisvalue == current_value)
388 macrovalue += " selected";
389 macrovalue += ">_" + *thisvalue + "_\n";
390 thisvalue ++;
391 }
392 macrovalue += "</select>\n";
393 disp.setmacro (macroname + "selection", "Global", macrovalue);
394}
395
396void queryaction::define_external_macros (const ColInfoResponse_t &/*collectinfo*/, displayclass &disp,
397 cgiargsclass &args, recptproto *collectproto,
398 ostream &logout) {
399
400 // define_external_macros sets the following macros:
401
402 // some or all of these may not be required to be set
403 // _hselection_, _h2selection_ the selection box for the main part of the index
404 // _jselection_, _j2selection_ the selection box for the subcollection part of the index
405 // _nselection_, _n2selection_ the selection box for the language part of the index
406 // _cq2selection the selection box for combining two queries
407
408
409 // can't do anything if collectproto is null (i.e. no collection was specified)
410 if (collectproto == NULL) return;
411
412 comerror_t err;
413 InfoFilterOptionsResponse_t response;
414 InfoFilterOptionsRequest_t request;
415 request.filterName = "QueryFilter";
416
417 collectproto->get_filteroptions (args["c"], request, response, err, logout);
418 if (err == noError) {
419
420 FilterOption_tmap::const_iterator it;
421 FilterOption_tmap::const_iterator end = response.filterOptions.end();
422
423 // _hselection_ and _h2selection_ (Index)
424 it = response.filterOptions.find ("Index");
425 if (it != end) set_option_macro ("h", args["h"], (*it).second, disp);
426 if (it != end) set_option_macro ("h2", args["h2"], (*it).second, disp);
427
428 // _jselection_ and _j2selection_ (Subcollection)
429 it = response.filterOptions.find ("Subcollection");
430 if (it != end) set_option_macro ("j", args["j"], (*it).second, disp);
431 if (it != end) set_option_macro ("j2", args["j2"], (*it).second, disp);
432
433 // _nselection_ and _n2selection_ (Language)
434 it = response.filterOptions.find ("Language");
435 if (it != end) set_option_macro ("n", args["n"], (*it).second, disp);
436 if (it != end) set_option_macro ("n2", args["n2"], (*it).second, disp);
437
438 // _cq2selection_ (CombineQuery)
439 it = response.filterOptions.find ("CombineQuery");
440 if (it != end) set_option_macro ("cq2", args["cq2"], (*it).second, disp);
441 }
442}
443
444bool queryaction::do_action (cgiargsclass &args, const ColInfoResponse_t &collectinfo,
445 recptproto *collectproto, displayclass &disp,
446 outconvertclass &outconvert, ostream &textout,
447 ostream &logout) {
448
449 if (formatstring.empty()) {
450 text_tmap::const_iterator result = collectinfo.format.find("QueryResults");
451 if (result != collectinfo.format.end())
452 formatstring = (*result).second;
453 }
454
455 // see if there's a QueryLinks format option
456 text_t querylinkmeta;
457 bool havequerylink = false;
458 text_tmap::const_iterator it = collectinfo.format.find("QueryLinks");
459 if (it != collectinfo.format.end()) {
460 querylinkmeta = (*it).second;
461 havequerylink = true;
462 }
463
464 // if we still don't have a format string use the default
465 if (formatstring.empty())
466 formatstring = "<td valign=top nowrap>[link]_icontext_[/link]</td><td>[Title]</td>";
467
468 if (collectproto == NULL) {
469 logout << "queryaction::do_action called with NULL collectproto\n";
470 textout << outconvert << disp << "_query:header_\n"
471 << "Error: Attempt to do query without setting collection\n"
472 << "_query:footer_\n";
473 } else {
474
475 FilterRequest_t request;
476 FilterResponse_t response;
477 format_t *formatlistptr = new format_t();
478
479 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
480
481 if (havequerylink)
482 request.fields.push_back (querylinkmeta);
483
484 int metasize = request.fields.size();
485
486 // do the query
487 request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
488 if (!do_query (request, args, collectproto, response, logout))
489 return false;
490
491 // set macros
492 define_query_macros (args, disp, response);
493
494 // output the header
495 textout << outconvert << disp << "_query:header_\n"
496 << "_query:content_";
497
498 // output the results
499 textout << "<table cellspacing=4>\n";
500 ResultDocInfo_tarray::const_iterator this_doc = response.docInfo.begin();
501 ResultDocInfo_tarray::const_iterator end_doc = response.docInfo.end();
502
503 while (this_doc != end_doc) {
504 // don't include docs that didn't match phrases (if there were any)
505 // those that did match will have been sorted to the top
506 if ((*this_doc).num_phrase_match < num_phrases) break;
507 textout << "<tr>\n";
508 if (havequerylink) {
509 const text_t &qlmeta = (*this_doc).metadata[metasize-1].values.back();
510 if (qlmeta.empty())
511 textout << outconvert << disp
512 << get_formatted_string (*this_doc, formatlistptr, "", "_iconblanktext_") << "\n";
513 else
514 textout << outconvert << disp
515 << get_formatted_string (*this_doc, formatlistptr) << "\n";
516 } else {
517 textout << outconvert << disp
518 << get_formatted_string (*this_doc, formatlistptr) << "\n";
519 }
520 textout << "</tr>\n";
521
522 this_doc ++;
523 }
524 textout << "</table>\n";
525
526 delete (formatlistptr);
527
528 // output the footer
529 textout << outconvert << disp << "_query:footer_";
530 }
531
532 return true;
533}
534
535// define_query_macros sets the macros that couldn't be set until the
536// query had been done. Those macros are _freqmsg_, _quotedquery_,
537// _resultline_, _nextfirst_, _nextlast_, _prevfirst_, _prevlast_,
538// _thisfirst_, and _thislast_
539void queryaction::define_query_macros (cgiargsclass &args, displayclass &disp,
540 const FilterResponse_t &response) {
541 // set up _freqmsg_ and _quotedquery_ macros
542 text_t freqmsg = "_textfreqmsg1_";
543 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
544 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
545 while (this_term != end_term) {
546 freqmsg += (*this_term).term + ": " + (*this_term).freq;
547 if ((this_term + 1) != end_term)
548 freqmsg += ", ";
549 this_term ++;
550 }
551
552 disp.setmacro ("freqmsg", "query", freqmsg);
553
554
555 // set up _resultline_ macro
556 text_t resline;
557 int maxdocs = args.getintarg("m");
558 int numdocs = response.numDocs;
559 isapprox isApprox = response.isApprox;
560
561 // if there were phrases (post-processing) we're not going to include
562 // those documents that didn't match
563 if (num_phrases > 0) {
564 numdocs = 0;
565 isApprox = Exact;
566 ResultDocInfo_tarray::const_iterator this_doc = response.docInfo.begin();
567 ResultDocInfo_tarray::const_iterator end_doc = response.docInfo.end();
568 while (this_doc != end_doc) {
569 if ((*this_doc).num_phrase_match == num_phrases) numdocs ++;
570 else break; // we can bail here as matching docs are sorted to top
571 this_doc++;
572 }
573 }
574
575 // if (isApprox == MoreThan && numdocs > maxdocs) numdocs = maxdocs;
576 if (numdocs > maxdocs) {numdocs = maxdocs; isApprox = MoreThan;}
577
578 if (isApprox == Approximate) resline = "_textapprox_";
579 else if (isApprox == MoreThan) resline = "_textmorethan_";
580
581 if (numdocs == 0) resline = "_textnodocs_";
582 else if (numdocs == 1) resline += "_text1doc_";
583 else resline += text_t(numdocs) + " _textlotsdocs_";
584
585 disp.setmacro("resultline", "query", resline);
586
587
588 int firstdoc = args.getintarg("r");
589 int hitsperpage = args.getintarg("o");
590
591 // set up _thisfirst_ and _thislast_ macros
592 disp.setmacro ("thisfirst", "query", firstdoc);
593 int thislast = firstdoc + (hitsperpage - 1);
594 if (thislast > numdocs) thislast = numdocs;
595 disp.setmacro ("thislast", "query", thislast);
596
597 // set up _prevfirst_ and _prevlast_ macros
598 if (firstdoc > 1) {
599 disp.setmacro ("prevlast", "query", firstdoc - 1);
600 int prevfirst = firstdoc - hitsperpage;
601 if (prevfirst < 1) prevfirst = 1;
602 disp.setmacro ("prevfirst", "query", prevfirst);
603 }
604
605 // set up _nextfirst_ and _nextlast_ macros
606 if (thislast < numdocs) {
607 disp.setmacro ("nextfirst", "query", thislast + 1);
608 int nextlast = thislast + hitsperpage;
609 if (nextlast > numdocs) nextlast = numdocs;
610 disp.setmacro ("nextlast", "query", nextlast);
611 }
612}
613
614
615
Note: See TracBrowser for help on using the repository browser.