source: main/tags/2.10/gsdl/src/colservr/queryfilter.cpp@ 32704

Last change on this file since 32704 was 621, checked in by sjboddie, 25 years ago

Endresults queryfilter option may now take '-1' for 'all'

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 24.7 KB
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: queryfilter.cpp 621 1999-09-22 03:43:18Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.18 1999/09/22 03:43:18 sjboddie
31 Endresults queryfilter option may now take '-1' for 'all'
32
33 Revision 1.17 1999/09/21 12:01:07 sjboddie
34 added Maxdocs queryfilter option (which may be -1 for 'all')
35
36 Revision 1.16 1999/09/07 04:57:24 sjboddie
37 added gpl notice
38
39 Revision 1.15 1999/08/31 22:47:09 rjmcnab
40 Added matchmode option for some and all.
41
42 Revision 1.14 1999/07/16 03:42:21 sjboddie
43 changed isApprox
44
45 Revision 1.13 1999/07/16 00:17:06 sjboddie
46 got using phrasesearch for post-processing
47
48 Revision 1.12 1999/07/09 02:19:43 rjmcnab
49 Fixed a couple of compiler conflicts
50
51 Revision 1.11 1999/07/08 20:49:44 rjmcnab
52 Added result_num to the ResultDocInto_t structure.
53
54 Revision 1.10 1999/07/07 06:19:46 rjmcnab
55 Added ability to combine two or more independant queries.
56
57 Revision 1.9 1999/07/01 09:29:20 rjmcnab
58 Changes for better reporting of number documents which match a query. Changes
59 should still work as before with older versions of mg.
60
61 Revision 1.8 1999/07/01 03:59:54 rjmcnab
62 reduced MAXDOCS to 200 (more reasonable ???). I also added a virtual
63 method for post-processing the query.
64
65 Revision 1.7 1999/06/30 04:04:13 rjmcnab
66 made stemming functions available from mgsearch and made the stems
67 for the query terms available in queryinfo
68
69 Revision 1.6 1999/06/29 22:06:23 rjmcnab
70 Added a couple of fields to queryinfo to handle a special version
71 of mg.
72
73 Revision 1.5 1999/06/27 22:08:48 sjboddie
74 now check for defaultindex, defaultsubcollection, and defaultlanguage
75 entries in config files
76
77 Revision 1.4 1999/06/16 02:03:25 sjboddie
78 fixed bug in isApprox and set MAXDOCS to always be 500
79
80 Revision 1.3 1999/04/19 23:56:09 rjmcnab
81 Finished the gdbm metadata stuff
82
83 Revision 1.2 1999/04/12 03:45:03 rjmcnab
84 Finished the query filter.
85
86 Revision 1.1 1999/04/06 22:22:09 rjmcnab
87 Initial revision.
88
89 */
90
91
92#include "queryfilter.h"
93#include "fileutil.h"
94#include "queryinfo.h"
95#include "phrasesearch.h"
96#include <assert.h>
97
98
99// some useful functions
100
101// translate will return true if successful
102static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
103 infodbclass info;
104
105 trans_OID.clear();
106
107 // get the info
108 if (gdbmptr == NULL) return false;
109 if (!gdbmptr->getinfo(docnum, info)) return false;
110
111 // translate
112 if (info["section"].empty()) return false;
113
114 trans_OID = info["section"];
115 return true;
116}
117
118
119// whether document results are needed
120static bool need_matching_docs (int filterResultOptions) {
121 return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
122 (filterResultOptions & FRmetadata));
123}
124
125// whether term information is needed
126static bool need_term_info (int filterResultOptions) {
127 return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
128}
129
130///////////////////////////////
131// methods for resultsorderer_t
132///////////////////////////////
133
134resultsorderer_t::resultsorderer_t() {
135 clear ();
136}
137
138void resultsorderer_t::clear() {
139 compare_phrase_match = false;
140 compare_terms_match = false;
141 compare_doc_weight = true;
142
143 docset = NULL;
144}
145
146bool resultsorderer_t::operator()(const int &t1, const int &t2) const {
147 if (docset == NULL) return t1>t2;
148
149 docresultmap::iterator t1_here = docset->find(t1);
150 docresultmap::iterator t2_here = docset->find(t2);
151 docresultmap::iterator end = docset->end();
152
153 // sort all the document numbers not in the document set to
154 // the end of the list
155 if (t1_here == end) {
156 if (t2_here == end) return t1>t2;
157 else return true;
158 } else if (t2_here == end) return false;
159
160 if (compare_phrase_match) {
161 if ((*t1_here).second.num_phrase_match > (*t2_here).second.num_phrase_match) return true;
162 if ((*t1_here).second.num_phrase_match < (*t2_here).second.num_phrase_match) return false;
163 }
164
165 if (compare_terms_match) {
166 if ((*t1_here).second.num_query_terms_matched > (*t2_here).second.num_query_terms_matched) return true;
167 if ((*t1_here).second.num_query_terms_matched < (*t2_here).second.num_query_terms_matched) return false;
168 }
169
170 if (compare_doc_weight) {
171 if ((*t1_here).second.docweight > (*t2_here).second.docweight) return true;
172 if ((*t1_here).second.docweight < (*t2_here).second.docweight) return false;
173 }
174
175 return t1>t2;
176}
177
178
179
180
181/////////////////////////////////
182// functions for queryfilterclass
183/////////////////////////////////
184
185// loads up phrases data structure with any phrases (that's the quoted bits)
186// occuring in the querystring
187void queryfilterclass::get_phrase_terms (const text_t &querystring,
188 const termfreqclassarray &orgterms,
189 vector<termfreqclassarray> &phrases) {
190
191 text_t::const_iterator here = querystring.begin();
192 text_t::const_iterator end = querystring.end();
193
194 termfreqclassarray tmpterms;
195
196 int termcount = 0;
197 bool foundquote = false;
198 bool foundbreak = false;
199 bool start = true;
200 while (here != end) {
201 if (*here == '\"') {
202 if (foundquote) {
203 if (!foundbreak && !start) {
204 tmpterms.push_back (orgterms[termcount]);
205 termcount ++;
206 }
207 if (tmpterms.size() > 1) {
208 phrases.push_back (tmpterms);
209 tmpterms.erase (tmpterms.begin(), tmpterms.end());
210 }
211 foundquote = false;
212 foundbreak = true;
213 } else foundquote = true;
214 } else if (!is_unicode_letdig(*here)) {
215 // found a break between terms
216 if (!foundbreak && !start) {
217 if (foundquote)
218 tmpterms.push_back (orgterms[termcount]);
219 termcount ++;
220 }
221 foundbreak = true;
222 } else {
223 start = false;
224 foundbreak = false;
225 }
226 here++;
227 }
228}
229
230// do aditional query processing
231void queryfilterclass::post_process (const queryparamclass &queryparams,
232 queryresultsclass &queryresults) {
233
234 // post-process the results if needed
235 if (queryresults.orgterms.size() > 1 && !queryresults.docs.docset.empty()) {
236
237 // get the terms between quotes (if any)
238 vector<termfreqclassarray> phrases;
239 get_phrase_terms (queryparams.querystring, queryresults.orgterms, phrases);
240
241 if (phrases.size() > 0) {
242
243 // get the long version of the index
244 text_t longindex;
245 indexmap.to2from (queryparams.index, longindex);
246
247 vector<termfreqclassarray>::const_iterator this_phrase = phrases.begin();
248 vector<termfreqclassarray>::const_iterator end_phrase = phrases.end();
249
250 while (this_phrase != end_phrase) {
251
252 // process each of the matched documents
253 docresultmap::iterator docs_here = queryresults.docs.docset.begin();
254 docresultmap::iterator docs_end = queryresults.docs.docset.end();
255 while (docs_here != docs_end) {
256 if (OID_phrase_search (*mgsearchptr, *gdbmptr, queryparams.index,
257 queryparams.subcollection, queryparams.language,
258 longindex, queryparams.collection, *this_phrase,
259 (*docs_here).second.docnum)) {
260 (*docs_here).second.num_phrase_match++;
261 }
262
263 docs_here++;
264 }
265 this_phrase++;
266 }
267 }
268 }
269}
270
271// get the query parameters
272void queryfilterclass::parse_query_params (const FilterRequest_t &request,
273 vector<queryparamclass> &query_params,
274 int &startresults,
275 int &endresults,
276 ostream &logout) {
277 outconvertclass text_t2ascii;
278
279 // set defaults for the return parameters
280 query_params.erase(query_params.begin(), query_params.end());
281 startresults = filterOptions["StartResults"].defaultValue.getint();
282 endresults = filterOptions["EndResults"].defaultValue.getint();
283
284 // set defaults for query parameters
285 queryparamclass query;
286 query.combinequery = "or"; // first one must be "or"
287 query.collection = collection;
288 query.index = filterOptions["Index"].defaultValue;
289 query.subcollection = filterOptions["Subcollection"].defaultValue;
290 query.language = filterOptions["Language"].defaultValue;
291 query.querystring.clear();
292 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
293 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
294 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
295 query.stemming = (filterOptions["Stem"].defaultValue == "true");
296 query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
297
298 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
299 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
300 while (options_here != options_end) {
301 if ((*options_here).name == "CombineQuery") {
302 // add this query
303
304 // "all", needed when combining queries where the document results are needed
305 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
306 query_params.push_back (query);
307
308 // start on next query
309 query.clear();
310 query.combinequery = (*options_here).value;
311
312 // set defaults for query parameters
313 query.collection = collection;
314 query.index = filterOptions["Index"].defaultValue;
315 query.subcollection = filterOptions["Subcollection"].defaultValue;
316 query.language = filterOptions["Language"].defaultValue;
317 query.querystring.clear();
318 query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
319 query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
320 query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
321 query.stemming = (filterOptions["Stem"].defaultValue == "true");
322
323 // "all", needed when combining queries where the document results are needed
324 if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
325 else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
326
327 } else if ((*options_here).name == "StartResults") {
328 startresults = (*options_here).value.getint();
329 } else if ((*options_here).name == "EndResults") {
330 endresults = (*options_here).value.getint();
331 } else if ((*options_here).name == "QueryType") {
332 query.search_type = ((*options_here).value == "ranked");
333 } else if ((*options_here).name == "MatchMode") {
334 query.match_mode = ((*options_here).value == "all");
335 if (query.match_mode == 1) query.maxdocs = -1;
336 } else if ((*options_here).name == "Term") {
337 query.querystring = (*options_here).value;
338 } else if ((*options_here).name == "Casefold") {
339 query.casefolding = ((*options_here).value == "true");
340 } else if ((*options_here).name == "Stem") {
341 query.stemming = ((*options_here).value == "true");
342 } else if ((*options_here).name == "Index") {
343 query.index = (*options_here).value;
344 } else if ((*options_here).name == "Subcollection") {
345 query.subcollection = (*options_here).value;
346 } else if ((*options_here).name == "Language") {
347 query.language = (*options_here).value;
348 } else if ((*options_here).name == "Maxdocs") {
349 query.maxdocs = (*options_here).value.getint();
350 } else {
351 logout << text_t2ascii
352 << "warning: unknown queryfilter option \""
353 << (*options_here).name
354 << "\" ignored.\n\n";
355 }
356
357 options_here++;
358 }
359
360 // add the last query
361 query_params.push_back (query);
362}
363
364
365
366// do query that might involve multiple sub queries
367// mgsearchptr and gdbmptr are assumed to be valid
368void queryfilterclass::do_multi_query (const FilterRequest_t &request,
369 const vector<queryparamclass> &query_params,
370 queryresultsclass &multiresults,
371 comerror_t &err, ostream &logout) {
372 outconvertclass text_t2ascii;
373
374 err = noError;
375 mgsearchptr->setcollectdir (collectdir);
376 multiresults.clear();
377
378 vector<queryparamclass>::const_iterator query_here = query_params.begin();
379 vector<queryparamclass>::const_iterator query_end = query_params.end();
380 while (query_here != query_end) {
381 queryresultsclass thisqueryresults;
382
383 if (!mgsearchptr->search(*query_here, thisqueryresults)) {
384 // most likely a system problem
385 logout << text_t2ascii
386 << "system problem: could not do search with mg for index \""
387 << (*query_here).index << (*query_here).subcollection
388 << (*query_here).language << "\".\n\n";
389 err = systemProblem;
390 return;
391 }
392
393 // combine the results
394 if (need_matching_docs (request.filterResultOptions)) {
395 // post-process the results if needed
396 if (!thisqueryresults.postprocessed && thisqueryresults.orgterms.size() > 1 &&
397 !thisqueryresults.docs.docset.empty()) {
398 post_process (*query_here, thisqueryresults);
399 thisqueryresults.postprocessed = true;
400 multiresults.postprocessed = true;
401 }
402
403 if (query_params.size() == 1) {
404 multiresults.docs = thisqueryresults.docs; // just one set of results
405 multiresults.docs_matched = thisqueryresults.docs_matched;
406 multiresults.is_approx = thisqueryresults.is_approx;
407
408 } else {
409 if ((*query_here).combinequery == "and") {
410 multiresults.docs.combine_and (thisqueryresults.docs);
411 } else if ((*query_here).combinequery == "or") {
412 multiresults.docs.combine_or (thisqueryresults.docs);
413 } else if ((*query_here).combinequery == "not") {
414 multiresults.docs.combine_not (thisqueryresults.docs);
415 }
416 multiresults.docs_matched = multiresults.docs.docset.size();
417 multiresults.is_approx = Exact;
418 }
419 }
420
421 // combine the term information
422 if (need_term_info (request.filterResultOptions)) {
423 // append the terms
424 multiresults.orgterms.insert(multiresults.orgterms.end(),
425 thisqueryresults.orgterms.begin(),
426 thisqueryresults.orgterms.end());
427
428 // add the term variants
429 text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
430 text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
431 while (termvar_here != termvar_end) {
432 multiresults.termvariants.insert(*termvar_here);
433 termvar_here++;
434 }
435 }
436
437 query_here++;
438 }
439
440 // sort and unique the query terms
441 multiresults.sortuniqqueryterms ();
442}
443
444
445void queryfilterclass::sort_doc_results (const FilterRequest_t &/*request*/,
446 docresultsclass &docs) {
447 resultsorderer_t resultsorderer;
448 resultsorderer.compare_phrase_match = true;
449 resultsorderer.docset = &(docs.docset);
450
451 // first get a list of document numbers
452 docs.docnum_order();
453
454 sort (docs.docorder.begin(), docs.docorder.end(), resultsorderer);
455}
456
457
458
459queryfilterclass::queryfilterclass () {
460 gdbmptr = NULL;
461 mgsearchptr = NULL;
462
463 FilterOption_t filtopt;
464 filtopt.name = "CombineQuery";
465 filtopt.type = FilterOption_t::enumeratedt;
466 filtopt.repeatable = FilterOption_t::onePerQuery;
467 filtopt.defaultValue = "and";
468 filtopt.validValues.push_back("and");
469 filtopt.validValues.push_back("or");
470 filtopt.validValues.push_back("not");
471 filterOptions["CombineQuery"] = filtopt;
472
473 // -- onePerQuery StartResults integer
474 filtopt.clear();
475 filtopt.name = "StartResults";
476 filtopt.type = FilterOption_t::integert;
477 filtopt.repeatable = FilterOption_t::onePerQuery;
478 filtopt.defaultValue = "1";
479 filtopt.validValues.push_back("1");
480 filtopt.validValues.push_back("1000");
481 filterOptions["StartResults"] = filtopt;
482
483 // -- onePerQuery EndResults integer
484 filtopt.clear();
485 filtopt.name = "EndResults";
486 filtopt.type = FilterOption_t::integert;
487 filtopt.repeatable = FilterOption_t::onePerQuery;
488 filtopt.defaultValue = "10";
489 filtopt.validValues.push_back("-1");
490 filtopt.validValues.push_back("1000");
491 filterOptions["EndResults"] = filtopt;
492
493 // -- onePerQuery QueryType enumerated (boolean, ranked)
494 filtopt.clear();
495 filtopt.name = "QueryType";
496 filtopt.type = FilterOption_t::enumeratedt;
497 filtopt.repeatable = FilterOption_t::onePerQuery;
498 filtopt.defaultValue = "ranked";
499 filtopt.validValues.push_back("boolean");
500 filtopt.validValues.push_back("ranked");
501 filterOptions["QueryType"] = filtopt;
502
503 // -- onePerQuery MatchMode enumerated (some, all)
504 filtopt.clear();
505 filtopt.name = "MatchMode";
506 filtopt.type = FilterOption_t::enumeratedt;
507 filtopt.repeatable = FilterOption_t::onePerQuery;
508 filtopt.defaultValue = "some";
509 filtopt.validValues.push_back("some");
510 filtopt.validValues.push_back("all");
511 filterOptions["MatchMode"] = filtopt;
512
513 // -- onePerTerm Term string ???
514 filtopt.clear();
515 filtopt.name = "Term";
516 filtopt.type = FilterOption_t::stringt;
517 filtopt.repeatable = FilterOption_t::onePerTerm;
518 filtopt.defaultValue = "";
519 filterOptions["Term"] = filtopt;
520
521 // -- onePerTerm Casefold boolean
522 filtopt.clear();
523 filtopt.name = "Casefold";
524 filtopt.type = FilterOption_t::booleant;
525 filtopt.repeatable = FilterOption_t::onePerTerm;
526 filtopt.defaultValue = "true";
527 filtopt.validValues.push_back("false");
528 filtopt.validValues.push_back("true");
529 filterOptions["Casefold"] = filtopt;
530
531 // -- onePerTerm Stem boolean
532 filtopt.clear();
533 filtopt.name = "Stem";
534 filtopt.type = FilterOption_t::booleant;
535 filtopt.repeatable = FilterOption_t::onePerTerm;
536 filtopt.defaultValue = "false";
537 filtopt.validValues.push_back("false");
538 filtopt.validValues.push_back("true");
539 filterOptions["Stem"] = filtopt;
540
541 // -- onePerTerm Index enumerated
542 filtopt.clear();
543 filtopt.name = "Index";
544 filtopt.type = FilterOption_t::enumeratedt;
545 filtopt.repeatable = FilterOption_t::onePerTerm;
546 filtopt.defaultValue = "";
547 filterOptions["Index"] = filtopt;
548
549 // -- onePerTerm Subcollection enumerated
550 filtopt.clear();
551 filtopt.name = "Subcollection";
552 filtopt.type = FilterOption_t::enumeratedt;
553 filtopt.repeatable = FilterOption_t::onePerTerm;
554 filtopt.defaultValue = "";
555 filterOptions["Subcollection"] = filtopt;
556
557 // -- onePerTerm Language enumerated
558 filtopt.clear();
559 filtopt.name = "Language";
560 filtopt.type = FilterOption_t::enumeratedt;
561 filtopt.repeatable = FilterOption_t::onePerTerm;
562 filtopt.defaultValue = "";
563 filterOptions["Language"] = filtopt;
564
565 // -- onePerQuery Maxdocs integer
566 filtopt.clear();
567 filtopt.name = "Maxdocs";
568 filtopt.type = FilterOption_t::integert;
569 filtopt.repeatable = FilterOption_t::onePerQuery;
570 filtopt.defaultValue = "200";
571 filtopt.validValues.push_back("-1");
572 filtopt.validValues.push_back("1000");
573 filterOptions["Maxdocs"] = filtopt;
574}
575
576queryfilterclass::~queryfilterclass () {
577}
578
579void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
580 filterclass::configure (key, cfgline);
581
582 if (key == "indexmap") {
583 indexmap.importmap (cfgline);
584
585 // update the list of indexes in the filter information
586 text_tarray options;
587 indexmap.gettoarray (options);
588 filterOptions["Index"].validValues = options;
589
590 } else if (key == "defaultindex") {
591 indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
592
593 } else if (key == "subcollectionmap") {
594 subcollectionmap.importmap (cfgline);
595
596 // update the list of subcollections in the filter information
597 text_tarray options;
598 subcollectionmap.gettoarray (options);
599 filterOptions["Subcollection"].validValues = options;
600
601 } else if (key == "defaultsubcollection") {
602 subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
603
604 } else if (key == "languagemap") {
605 languagemap.importmap (cfgline);
606
607 // update the list of languages in the filter information
608 text_tarray options;
609 languagemap.gettoarray (options);
610 filterOptions["Language"].validValues = options;
611
612 } else if (key == "defaultlanguage")
613 languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
614}
615
616bool queryfilterclass::init (ostream &logout) {
617 outconvertclass text_t2ascii;
618
619 if (!filterclass::init(logout)) return false;
620
621 // get the filename for the database and make sure it exists
622 gdbm_filename = filename_cat(collectdir,"index","text",collection);
623
624#ifdef _LITTLE_ENDIAN
625 gdbm_filename += ".ldb";
626#else
627 gdbm_filename += ".bdb";
628#endif
629 if (!file_exists(gdbm_filename)) {
630 logout << text_t2ascii
631 << "error: gdbm database \""
632 << gdbm_filename << "\" does not exist\n\n";
633 return false;
634 }
635
636 return true;
637}
638
639void queryfilterclass::filter (const FilterRequest_t &request,
640 FilterResponse_t &response,
641 comerror_t &err, ostream &logout) {
642 outconvertclass text_t2ascii;
643
644 response.clear ();
645 err = noError;
646 if (gdbmptr == NULL) {
647 // most likely a configuration problem
648 logout << text_t2ascii
649 << "configuration error: queryfilter contains a null gdbmclass\n\n";
650 err = configurationError;
651 return;
652 }
653 if (mgsearchptr == NULL) {
654 // most likely a configuration problem
655 logout << text_t2ascii
656 << "configuration error: queryfilter contains a null mgsearchclass\n\n";
657 err = configurationError;
658 return;
659 }
660
661 // open the database
662 gdbmptr->setlogout(&logout);
663 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
664 // most likely a system problem (we have already checked that the
665 // gdbm database exists)
666 logout << text_t2ascii
667 << "system problem: open on gdbm database \""
668 << gdbm_filename << "\" failed\n\n";
669 err = systemProblem;
670 return;
671 }
672
673 // get the query parameters
674 int startresults = filterOptions["StartResults"].defaultValue.getint();
675 int endresults = filterOptions["EndResults"].defaultValue.getint();
676 vector<queryparamclass> queryfilterparams;
677 parse_query_params (request, queryfilterparams, startresults, endresults, logout);
678
679 // do query
680 queryresultsclass queryresults;
681 do_multi_query (request, queryfilterparams, queryresults, err, logout);
682 if (err != noError) return;
683
684 // assemble document results
685 if (need_matching_docs (request.filterResultOptions)) {
686 // sort the query results
687 sort_doc_results (request, queryresults.docs);
688
689 int resultnum = 1;
690 ResultDocInfo_t resultdoc;
691 text_t trans_OID;
692 vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
693 vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
694
695 if (endresults == -1) endresults = MAXNUMDOCS;
696 while (docorder_here != docorder_end) {
697 if (resultnum > endresults) break;
698
699 // translate the document number
700 if (!translate(gdbmptr, *docorder_here, trans_OID)) {
701 logout << text_t2ascii
702 << "warning: could not translate mg document number \""
703 << *docorder_here << "\"to OID.\n\n";
704
705 } else {
706 docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
707
708 // see if there is a result for this number,
709 // if it is in the request set (or the request set is empty)
710 if (docset_here != queryresults.docs.docset.end() &&
711 (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
712 if (resultnum >= startresults) {
713 // add this document
714 resultdoc.OID = trans_OID;
715 resultdoc.result_num = resultnum;
716 resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
717
718 // these next two are not available on all versions of mg
719 resultdoc.num_terms_matched = (*docset_here).second.num_query_terms_matched;
720 resultdoc.num_phrase_match = (*docset_here).second.num_phrase_match;
721
722 response.docInfo.push_back (resultdoc);
723 }
724
725 resultnum++;
726 }
727 }
728
729 docorder_here++;
730 }
731 }
732
733 // assemble the term results
734 if (need_term_info(request.filterResultOptions)) {
735 // note: the terms have already been sorted and uniqued
736
737 TermInfo_t terminfo;
738 bool terms_first = true;
739 termfreqclassarray::iterator terms_here = queryresults.terms.begin();
740 termfreqclassarray::iterator terms_end = queryresults.terms.end();
741
742 while (terms_here != terms_end) {
743 terminfo.clear();
744 terminfo.term = (*terms_here).termstr;
745 terminfo.freq = (*terms_here).termfreq;
746 if (terms_first) {
747 text_tset::iterator termvariants_here = queryresults.termvariants.begin();
748 text_tset::iterator termvariants_end = queryresults.termvariants.end();
749 while (termvariants_here != termvariants_end) {
750 terminfo.matchTerms.push_back (*termvariants_here);
751 termvariants_here++;
752 }
753 }
754 terms_first = false;
755
756 response.termInfo.push_back (terminfo);
757
758 terms_here++;
759 }
760 }
761
762 response.numDocs = queryresults.docs_matched;
763 response.isApprox = queryresults.is_approx;
764}
Note: See TracBrowser for help on using the repository browser.