source: trunk/gsdl/src/recpt/queryaction.cpp@ 403

Last change on this file since 403 was 403, checked in by sjboddie, 25 years ago

no longer display documents that don't match all phrases in query string

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 18.8 KB
Line 
1/**********************************************************************
2 *
3 * queryaction.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * PUT COPYRIGHT NOTICE HERE
7 *
8 * $Id: queryaction.cpp 403 1999-07-19 00:16:59Z sjboddie $
9 *
10 *********************************************************************/
11
12/*
13 $Log$
14 Revision 1.16 1999/07/19 00:16:58 sjboddie
15 no longer display documents that don't match all phrases in query string
16
17 Revision 1.15 1999/07/16 08:33:36 rjmcnab
18 Changed the logic for getting the results string slightly
19
20 Revision 1.14 1999/07/16 03:41:29 sjboddie
21 changed isApprox
22
23 Revision 1.13 1999/07/16 00:19:01 sjboddie
24 some changes to the way quoted queries are handled
25
26 Revision 1.12 1999/07/09 02:17:55 rjmcnab
27 Setting macros needed for a second query.
28
29 Revision 1.11 1999/07/07 06:13:10 rjmcnab
30 Added ability to combine two independant queries.
31
32 Revision 1.10 1999/07/07 05:49:35 sjboddie
33 had another crack at the format string code - created a new formattools
34 module. It can now handle {If} and {Or} statements although there's a
35 bug preventing nested if's and or's.
36
37 Revision 1.9 1999/07/01 22:48:46 sjboddie
38 had a go at getting a query result format string working
39
40 Revision 1.8 1999/06/27 22:02:11 sjboddie
41 author is added to queryresults if there is one
42
43 Revision 1.7 1999/06/26 01:10:18 rjmcnab
44 Made h, i, and n arguments saved in the compressed arguments.
45
46 Revision 1.6 1999/06/24 05:12:25 sjboddie
47 lots of small changes
48
49 Revision 1.5 1999/06/16 04:03:48 sjboddie
50 Now sets "cl" arg to "search" when going to a document from a search
51 results page. This allows the close book icon (in hierarchy toc) to
52 take you back to the results page if that's where you came from.
53 If you got to the document page somehow other than from a
54 classification or a search (i.e. if "cl" isn't set) then the close
55 book icon is disabled
56
57 Revision 1.4 1999/06/16 02:08:38 sjboddie
58 got queryaction working
59
60 Revision 1.3 1999/03/25 03:06:45 sjboddie
61
62 altered receptionist slightly so it now passes *collectproto to
63 define_internal_macros and define_external_macros - need it
64 for browseaction
65
66 Revision 1.2 1999/03/03 20:26:50 rjmcnab
67
68 Modified stuff.
69
70 Revision 1.1 1999/02/28 22:45:21 rjmcnab
71
72 Initial revision.
73
74 */
75
76
77#include "queryaction.h"
78#include "querytools.h"
79#include "formattools.h"
80
81queryaction::queryaction () {
82
83 num_phrases = 0;
84
85 // this action uses cgi variable "a"
86 cgiarginfo arg_ainfo;
87 arg_ainfo.shortname = "a";
88 arg_ainfo.longname = "action";
89 arg_ainfo.multiplechar = true;
90 arg_ainfo.defaultstatus = cgiarginfo::weak;
91 arg_ainfo.argdefault = "q";
92 arg_ainfo.savedarginfo = cgiarginfo::must;
93 argsinfo.addarginfo (NULL, arg_ainfo);
94
95 // "h"
96 arg_ainfo.shortname = "h";
97 arg_ainfo.longname = "main index";
98 arg_ainfo.multiplechar = true;
99 arg_ainfo.defaultstatus = cgiarginfo::weak;
100 arg_ainfo.argdefault = "";
101 arg_ainfo.savedarginfo = cgiarginfo::must;
102 argsinfo.addarginfo (NULL, arg_ainfo);
103
104 // "h2"
105 arg_ainfo.shortname = "h2";
106 arg_ainfo.longname = "main index for second query";
107 arg_ainfo.multiplechar = true;
108 arg_ainfo.defaultstatus = cgiarginfo::weak;
109 arg_ainfo.argdefault = "";
110 arg_ainfo.savedarginfo = cgiarginfo::must;
111 argsinfo.addarginfo (NULL, arg_ainfo);
112
113 // "j"
114 arg_ainfo.shortname = "j";
115 arg_ainfo.longname = "sub collection index";
116 arg_ainfo.multiplechar = true;
117 arg_ainfo.defaultstatus = cgiarginfo::weak;
118 arg_ainfo.argdefault = "";
119 arg_ainfo.savedarginfo = cgiarginfo::must;
120 argsinfo.addarginfo (NULL, arg_ainfo);
121
122 // "j2"
123 arg_ainfo.shortname = "j2";
124 arg_ainfo.longname = "sub collection index for second query";
125 arg_ainfo.multiplechar = true;
126 arg_ainfo.defaultstatus = cgiarginfo::weak;
127 arg_ainfo.argdefault = "";
128 arg_ainfo.savedarginfo = cgiarginfo::must;
129 argsinfo.addarginfo (NULL, arg_ainfo);
130
131 // "n"
132 arg_ainfo.shortname = "n";
133 arg_ainfo.longname = "language index";
134 arg_ainfo.multiplechar = true;
135 arg_ainfo.defaultstatus = cgiarginfo::weak;
136 arg_ainfo.argdefault = "";
137 arg_ainfo.savedarginfo = cgiarginfo::must;
138 argsinfo.addarginfo (NULL, arg_ainfo);
139
140 // "n2"
141 arg_ainfo.shortname = "n2";
142 arg_ainfo.longname = "language index for second query";
143 arg_ainfo.multiplechar = true;
144 arg_ainfo.defaultstatus = cgiarginfo::weak;
145 arg_ainfo.argdefault = "";
146 arg_ainfo.savedarginfo = cgiarginfo::must;
147 argsinfo.addarginfo (NULL, arg_ainfo);
148
149 // "q"
150 arg_ainfo.shortname = "q";
151 arg_ainfo.longname = "query string";
152 arg_ainfo.multiplechar = true;
153 arg_ainfo.defaultstatus = cgiarginfo::weak;
154 arg_ainfo.argdefault = "";
155 arg_ainfo.savedarginfo = cgiarginfo::must;
156 argsinfo.addarginfo (NULL, arg_ainfo);
157
158 // "q2"
159 arg_ainfo.shortname = "q2";
160 arg_ainfo.longname = "query string for second query";
161 arg_ainfo.multiplechar = true;
162 arg_ainfo.defaultstatus = cgiarginfo::weak;
163 arg_ainfo.argdefault = "";
164 arg_ainfo.savedarginfo = cgiarginfo::must;
165 argsinfo.addarginfo (NULL, arg_ainfo);
166
167 // "cq2" ""=don't combine, "and", "or", "not"
168 arg_ainfo.shortname = "cq2";
169 arg_ainfo.longname = "combine queries";
170 arg_ainfo.multiplechar = true;
171 arg_ainfo.defaultstatus = cgiarginfo::weak;
172 arg_ainfo.argdefault = "";
173 arg_ainfo.savedarginfo = cgiarginfo::must;
174 argsinfo.addarginfo (NULL, arg_ainfo);
175
176 // "t" - 1 = ranked 0 = boolean
177 arg_ainfo.shortname = "t";
178 arg_ainfo.longname = "search type";
179 arg_ainfo.multiplechar = false;
180 arg_ainfo.defaultstatus = cgiarginfo::weak;
181 arg_ainfo.argdefault = "1";
182 arg_ainfo.savedarginfo = cgiarginfo::must;
183 argsinfo.addarginfo (NULL, arg_ainfo);
184
185 // "k"
186 arg_ainfo.shortname = "k";
187 arg_ainfo.longname = "casefolding";
188 arg_ainfo.multiplechar = false;
189 arg_ainfo.defaultstatus = cgiarginfo::weak;
190 arg_ainfo.argdefault = "1";
191 arg_ainfo.savedarginfo = cgiarginfo::must;
192 argsinfo.addarginfo (NULL, arg_ainfo);
193
194 // "s"
195 arg_ainfo.shortname = "s";
196 arg_ainfo.longname = "stemming";
197 arg_ainfo.multiplechar = false;
198 arg_ainfo.defaultstatus = cgiarginfo::weak;
199 arg_ainfo.argdefault ="0";
200 arg_ainfo.savedarginfo = cgiarginfo::must;
201 argsinfo.addarginfo (NULL, arg_ainfo);
202
203 // "m"
204 arg_ainfo.shortname = "m";
205 arg_ainfo.longname = "maximum number of documents";
206 arg_ainfo.multiplechar = true;
207 arg_ainfo.defaultstatus = cgiarginfo::weak;
208 arg_ainfo.argdefault = "50";
209 arg_ainfo.savedarginfo = cgiarginfo::must;
210 argsinfo.addarginfo (NULL, arg_ainfo);
211
212 // "o"
213 arg_ainfo.shortname = "o";
214 arg_ainfo.longname = "hits per page";
215 arg_ainfo.multiplechar = true;
216 arg_ainfo.defaultstatus = cgiarginfo::weak;
217 arg_ainfo.argdefault = "20";
218 arg_ainfo.savedarginfo = cgiarginfo::must;
219 argsinfo.addarginfo (NULL, arg_ainfo);
220
221 // "r"
222 arg_ainfo.shortname = "r";
223 arg_ainfo.longname = "start results from";
224 arg_ainfo.multiplechar = true;
225 arg_ainfo.defaultstatus = cgiarginfo::weak;
226 arg_ainfo.argdefault = "1";
227 arg_ainfo.savedarginfo = cgiarginfo::must;
228 argsinfo.addarginfo (NULL, arg_ainfo);
229}
230
231void queryaction::configure (const text_t &key, const text_tarray &cfgline) {
232 action::configure (key, cfgline);
233}
234
235bool queryaction::init (ostream &logout) {
236 return action::init (logout);
237}
238
239bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
240 ostream &logout) {
241
242 // check t argument
243 int arg_t = args.getintarg("t");
244 if (arg_t != 0 && arg_t != 1) {
245 logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n";
246 cgiarginfo *tinfo = argsinfo.getarginfo ("t");
247 if (tinfo != NULL) args["t"] = tinfo->argdefault;
248 }
249
250 // check k argument
251 int arg_k = args.getintarg("k");
252 if (arg_k != 0 && arg_k != 1) {
253 logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n";
254 cgiarginfo *kinfo = argsinfo.getarginfo ("k");
255 if (kinfo != NULL) args["k"] = kinfo->argdefault;
256 }
257
258 // check s argument
259 int arg_s = args.getintarg("s");
260 if (arg_s != 0 && arg_s != 1) {
261 logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n";
262 cgiarginfo *sinfo = argsinfo.getarginfo ("s");
263 if (sinfo != NULL) args["s"] = sinfo->argdefault;
264 }
265
266 // check m argument
267 int arg_m = args.getintarg("m");
268 if (arg_m < 0) {
269 logout << "Warning: \"m\" argument less than 0 (" << arg_m << ")\n";
270 cgiarginfo *minfo = argsinfo.getarginfo ("m");
271 if (minfo != NULL) args["m"] = minfo->argdefault;
272 }
273
274 // check o argument
275 int arg_o = args.getintarg("o");
276 if (arg_o < 0) {
277 logout << "Warning: \"o\" argument less than 0 (" << arg_o << ")\n";
278 cgiarginfo *oinfo = argsinfo.getarginfo ("o");
279 if (oinfo != NULL) args["o"] = oinfo->argdefault;
280 }
281
282 // check r argument
283 int arg_r = args.getintarg("r");
284 if (arg_r < 1) {
285 logout << "Warning: \"r\" argument less than 1 (" << arg_r << ")\n";
286 cgiarginfo *rinfo = argsinfo.getarginfo ("r");
287 if (rinfo != NULL) args["r"] = rinfo->argdefault;
288 }
289
290 return true;
291}
292
293void queryaction::get_cgihead_info (cgiargsclass &/*args*/, response_t &response,
294 text_t &response_data, ostream &/*logout*/) {
295 response = content;
296 response_data = "text/html";
297}
298
299void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args,
300 recptproto */*collectproto*/, ostream &/*logout*/) {
301
302 // define_internal_macros sets the following macros:
303
304 // _quotedquery_ the part of the query string that was quoted for post-processing
305
306
307
308 // The following macros are set later (in define_query_macros) as they can't be set until
309 // the query has been done.
310
311 // _freqmsg_ the term frequency string
312
313 // _resultline_ the "x documents matched the query" string
314
315 // _prevfirst_ these are used when setting up the links to previous/next
316 // _prevlast_ pages of results (_thisfirst_ and _thislast_ are used to set
317 // _nextfirst_ the 'results x-x for query: xxxx' string in the title bar)
318 // _nextlast_
319 // _thisfirst_
320 // _thislast_
321
322
323 // get the quoted bits of the query string and set _quotedquery_
324 text_tarray phrases;
325 get_phrases (args["q"], phrases);
326 text_tarray::const_iterator phere = phrases.begin();
327 text_tarray::const_iterator pend = phrases.end();
328 bool first = true;
329 text_t quotedquery;
330 while (phere != pend) {
331 if (!first)
332 if ((phere +1) == pend) quotedquery += " and ";
333 else quotedquery += ", ";
334
335 quotedquery += "\"" + *phere + "\"";
336 first = false;
337 phere ++;
338 }
339 if (args.getintarg("s")) quotedquery += "_textstemon_";
340 disp.setmacro ("quotedquery", "query", quotedquery);
341
342 // we'll also set num_phrases here so we don't have to parse the
343 // querystring again later (we need to know this before outputting
344 // results so we don't include results for documents not containing
345 // all requested phrases).
346 num_phrases = phrases.size();
347
348}
349
350// sets the selection box macros _hselection_, _jselection_, and _nselection_.
351// each option will need an _optionxoption_ macro (i.e. an _hselection_ macro
352// with options stx and ptx will need _optionhstx_ and _optionhptx_ macros)
353void queryaction::set_option_macro (const text_t &macroname, text_t current_value,
354 const FilterOption_t &option, displayclass &disp) {
355
356 if (option.validValues.size() < 2) return;
357
358 text_t macrovalue = "<select name=\"" + macroname + "\">\n";
359
360 if (current_value.empty()) current_value = option.defaultValue;
361
362 text_tarray::const_iterator thisvalue = option.validValues.begin();
363 text_tarray::const_iterator endvalue = option.validValues.end();
364
365 while (thisvalue != endvalue) {
366 macrovalue += "<option value=\"" + *thisvalue + "\"";
367 if (*thisvalue == current_value)
368 macrovalue += " selected";
369 macrovalue += ">_option" + macroname + *thisvalue + "_\n";
370 thisvalue ++;
371 }
372 macrovalue += "</select>\n";
373 disp.setmacro (macroname + "selection", "Global", macrovalue);
374}
375
376void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args,
377 recptproto *collectproto, ostream &logout) {
378
379 // define_external_macros sets the following macros:
380
381 // some or all of these may not be required to be set
382 // _hselection_, _h2selection_ the selection box for the main part of the index
383 // _jselection_, _j2selection_ the selection box for the subcollection part of the index
384 // _nselection_, _n2selection_ the selection box for the language part of the index
385 // _cq2selection the selection box for combining two queries
386
387
388 // can't do anything if collectproto is null (i.e. no collection was specified)
389 if (collectproto == NULL) return;
390
391 comerror_t err;
392 InfoFilterOptionsResponse_t response;
393 InfoFilterOptionsRequest_t request;
394 request.filterName = "QueryFilter";
395
396 collectproto->get_filteroptions (args["c"], request, response, err, logout);
397 if (err == noError) {
398
399 FilterOption_tmap::const_iterator it;
400 FilterOption_tmap::const_iterator end = response.filterOptions.end();
401
402 // _hselection_ and _h2selection_ (Index)
403 it = response.filterOptions.find ("Index");
404 if (it != end) set_option_macro ("h", args["h"], (*it).second, disp);
405 if (it != end) set_option_macro ("h2", args["h2"], (*it).second, disp);
406
407 // _jselection_ and _j2selection_ (Subcollection)
408 it = response.filterOptions.find ("Subcollection");
409 if (it != end) set_option_macro ("j", args["j"], (*it).second, disp);
410 if (it != end) set_option_macro ("j2", args["j2"], (*it).second, disp);
411
412 // _nselection_ and _n2selection_ (Language)
413 it = response.filterOptions.find ("Language");
414 if (it != end) set_option_macro ("n", args["n"], (*it).second, disp);
415 if (it != end) set_option_macro ("n2", args["n2"], (*it).second, disp);
416
417 // _cq2selection_ (CombineQuery)
418 it = response.filterOptions.find ("CombineQuery");
419 if (it != end) set_option_macro ("cq2", args["cq2"], (*it).second, disp);
420 }
421}
422
423bool queryaction::do_action (cgiargsclass &args, recptproto *collectproto,
424 displayclass &disp, outconvertclass &outconvert,
425 ostream &textout, ostream &logout) {
426
427 // if we have no format string see if the collection server has one
428 if (formatstring.empty()) {
429 ColInfoResponse_t collectinfo;
430 comerror_t err;
431 collectproto->get_collectinfo (args["c"], collectinfo, err, logout);
432 if (err == noError) {
433 text_tmap::const_iterator result = collectinfo.format.find("result");
434 if (result != collectinfo.format.end())
435 formatstring = (*result).second;
436 }
437 }
438 // if we still don't have a format string use the default
439 if (formatstring.empty())
440 formatstring = "<td valign=top nowrap>[link]_icontext_[/link]</td><td>[Title]</td>";
441
442 if (collectproto == NULL) {
443 logout << "queryaction::do_action called with NULL collectproto\n";
444 textout << outconvert << disp << "_query:header_\n"
445 << "Error: Attempt to do query without setting collection\n"
446 << "_query:footer_\n";
447 } else {
448
449 FilterRequest_t request;
450 FilterResponse_t response;
451 format_t *formatlistptr = new format_t();
452
453 parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents);
454
455 // do the query
456 request.filterResultOptions = FROID | FRmetadata | FRtermFreq;
457 if (!do_query (request, args, collectproto, response, logout))
458 return false;
459
460 // set macros
461 define_query_macros (args, disp, response);
462
463 // output the header
464 textout << outconvert << disp << "_query:header_\n"
465 << "_query:content_";
466
467 // output the results
468 textout << "<table cellspacing=4>\n";
469 ResultDocInfo_tarray::const_iterator this_doc = response.docInfo.begin();
470 ResultDocInfo_tarray::const_iterator end_doc = response.docInfo.end();
471
472 while (this_doc != end_doc) {
473 // don't include docs that didn't match phrases (if there were any)
474 // those that did match will have been sorted to the top
475 if ((*this_doc).num_phrase_match < num_phrases) break;
476 textout << outconvert << disp << "<tr>\n"
477
478 // << "<td valign=top nowrap>r: " << (*this_doc).ranking
479 // << " t: " << (*this_doc).num_terms_matched << " p: "
480 // << (*this_doc).num_phrase_match << "</td>\n"
481
482 << get_formatted_string (*this_doc, formatlistptr) << "\n"
483 << "</tr>\n";
484
485 this_doc ++;
486 }
487 textout << "</table>\n";
488
489 delete (formatlistptr);
490
491 // output the footer
492 textout << outconvert << disp << "_query:footer_";
493 }
494
495 return true;
496}
497
498// define_query_macros sets the macros that couldn't be set until the
499// query had been done. Those macros are _freqmsg_, _quotedquery_,
500// _resultline_, _nextfirst_, _nextlast_, _prevfirst_, _prevlast_,
501// _thisfirst_, and _thislast_
502void queryaction::define_query_macros (cgiargsclass &args, displayclass &disp,
503 const FilterResponse_t &response) {
504 // set up _freqmsg_ and _quotedquery_ macros
505 text_t freqmsg = "_textfreqmsg1_";
506 TermInfo_tarray::const_iterator this_term = response.termInfo.begin();
507 TermInfo_tarray::const_iterator end_term = response.termInfo.end();
508 while (this_term != end_term) {
509 freqmsg += (*this_term).term + ": " + (*this_term).freq;
510 if ((this_term + 1) != end_term)
511 freqmsg += ", ";
512 this_term ++;
513 }
514
515 disp.setmacro ("freqmsg", "query", freqmsg);
516
517
518 // set up _resultline_ macro
519 text_t resline;
520 int maxdocs = args.getintarg("m");
521 int numdocs = response.numDocs;
522 isapprox isApprox = response.isApprox;
523
524 // if there were phrases (post-processing) we're not going to include
525 // those documents that didn't match
526 if (num_phrases > 0) {
527 numdocs = 0;
528 isApprox = Exact;
529 ResultDocInfo_tarray::const_iterator this_doc = response.docInfo.begin();
530 ResultDocInfo_tarray::const_iterator end_doc = response.docInfo.end();
531 while (this_doc != end_doc) {
532 if ((*this_doc).num_phrase_match == num_phrases) numdocs ++;
533 else break; // we can bail here as matching docs are sorted to top
534 this_doc++;
535 }
536 }
537
538 if (isApprox == MoreThan && numdocs > maxdocs) numdocs = maxdocs;
539
540 if (isApprox == Approximate) resline = "_textapprox_";
541 else if (isApprox == MoreThan) resline = "_textmorethan_";
542
543 if (numdocs == 0) resline = "_textnodocs_";
544 else if (numdocs == 1) resline += "_text1doc_";
545 else resline += text_t(numdocs) + " _textlotsdocs_";
546
547 disp.setmacro("resultline", "query", resline);
548
549
550 int firstdoc = args.getintarg("r");
551 int hitsperpage = args.getintarg("o");
552
553 // set up _thisfirst_ and _thislast_ macros
554 disp.setmacro ("thisfirst", "query", firstdoc);
555 int thislast = firstdoc + (hitsperpage - 1);
556 if (thislast > numdocs) thislast = numdocs;
557 disp.setmacro ("thislast", "query", thislast);
558
559 // set up _prevfirst_ and _prevlast_ macros
560 if (firstdoc > 1) {
561 disp.setmacro ("prevlast", "query", firstdoc - 1);
562 int prevfirst = firstdoc - hitsperpage;
563 if (prevfirst < 1) prevfirst = 1;
564 disp.setmacro ("prevfirst", "query", prevfirst);
565 }
566
567 // set up _nextfirst_ and _nextlast_ macros
568 if (thislast < numdocs) {
569 disp.setmacro ("nextfirst", "query", thislast + 1);
570 int nextlast = thislast + hitsperpage;
571 if (nextlast > numdocs) nextlast = numdocs;
572 disp.setmacro ("nextlast", "query", nextlast);
573 }
574}
575
576
577
Note: See TracBrowser for help on using the repository browser.