Context Navigation

source: main/tags/2.10/gsdl/src/colservr/queryfilter.cpp@ 32704

Last change on this file since 32704 was 621, checked in by sjboddie, 25 years ago
Endresults queryfilter option may now take '-1' for 'all'
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 24.7 KB

Line
1	/**********************************************************************
2	*
3	* queryfilter.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	* $Id: queryfilter.cpp 621 1999-09-22 03:43:18Z sjboddie $
25	*
26	*********************************************************************/
27
28	/*
29	$Log$
30	Revision 1.18 1999/09/22 03:43:18 sjboddie
31	Endresults queryfilter option may now take '-1' for 'all'
32
33	Revision 1.17 1999/09/21 12:01:07 sjboddie
34	added Maxdocs queryfilter option (which may be -1 for 'all')
35
36	Revision 1.16 1999/09/07 04:57:24 sjboddie
37	added gpl notice
38
39	Revision 1.15 1999/08/31 22:47:09 rjmcnab
40	Added matchmode option for some and all.
41
42	Revision 1.14 1999/07/16 03:42:21 sjboddie
43	changed isApprox
44
45	Revision 1.13 1999/07/16 00:17:06 sjboddie
46	got using phrasesearch for post-processing
47
48	Revision 1.12 1999/07/09 02:19:43 rjmcnab
49	Fixed a couple of compiler conflicts
50
51	Revision 1.11 1999/07/08 20:49:44 rjmcnab
52	Added result_num to the ResultDocInto_t structure.
53
54	Revision 1.10 1999/07/07 06:19:46 rjmcnab
55	Added ability to combine two or more independant queries.
56
57	Revision 1.9 1999/07/01 09:29:20 rjmcnab
58	Changes for better reporting of number documents which match a query. Changes
59	should still work as before with older versions of mg.
60
61	Revision 1.8 1999/07/01 03:59:54 rjmcnab
62	reduced MAXDOCS to 200 (more reasonable ???). I also added a virtual
63	method for post-processing the query.
64
65	Revision 1.7 1999/06/30 04:04:13 rjmcnab
66	made stemming functions available from mgsearch and made the stems
67	for the query terms available in queryinfo
68
69	Revision 1.6 1999/06/29 22:06:23 rjmcnab
70	Added a couple of fields to queryinfo to handle a special version
71	of mg.
72
73	Revision 1.5 1999/06/27 22:08:48 sjboddie
74	now check for defaultindex, defaultsubcollection, and defaultlanguage
75	entries in config files
76
77	Revision 1.4 1999/06/16 02:03:25 sjboddie
78	fixed bug in isApprox and set MAXDOCS to always be 500
79
80	Revision 1.3 1999/04/19 23:56:09 rjmcnab
81	Finished the gdbm metadata stuff
82
83	Revision 1.2 1999/04/12 03:45:03 rjmcnab
84	Finished the query filter.
85
86	Revision 1.1 1999/04/06 22:22:09 rjmcnab
87	Initial revision.
88
89	*/
90
91
92	#include "queryfilter.h"
93	#include "fileutil.h"
94	#include "queryinfo.h"
95	#include "phrasesearch.h"
96	#include <assert.h>
97
98
99	// some useful functions
100
101	// translate will return true if successful
102	static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
103	infodbclass info;
104
105	trans_OID.clear();
106
107	// get the info
108	if (gdbmptr == NULL) return false;
109	if (!gdbmptr->getinfo(docnum, info)) return false;
110
111	// translate
112	if (info["section"].empty()) return false;
113
114	trans_OID = info["section"];
115	return true;
116	}
117
118
119	// whether document results are needed
120	static bool need_matching_docs (int filterResultOptions) {
121	return ((filterResultOptions & FROID) \|\| (filterResultOptions & FRranking) \|\|
122	(filterResultOptions & FRmetadata));
123	}
124
125	// whether term information is needed
126	static bool need_term_info (int filterResultOptions) {
127	return ((filterResultOptions & FRtermFreq) \|\| (filterResultOptions & FRmatchTerms));
128	}
129
130	///////////////////////////////
131	// methods for resultsorderer_t
132	///////////////////////////////
133
134	resultsorderer_t::resultsorderer_t() {
135	clear ();
136	}
137
138	void resultsorderer_t::clear() {
139	compare_phrase_match = false;
140	compare_terms_match = false;
141	compare_doc_weight = true;
142
143	docset = NULL;
144	}
145
146	bool resultsorderer_t::operator()(const int &t1, const int &t2) const {
147	if (docset == NULL) return t1>t2;
148
149	docresultmap::iterator t1_here = docset->find(t1);
150	docresultmap::iterator t2_here = docset->find(t2);
151	docresultmap::iterator end = docset->end();
152
153	// sort all the document numbers not in the document set to
154	// the end of the list
155	if (t1_here == end) {
156	if (t2_here == end) return t1>t2;
157	else return true;
158	} else if (t2_here == end) return false;
159
160	if (compare_phrase_match) {
161	if ((t1_here).second.num_phrase_match > (t2_here).second.num_phrase_match) return true;
162	if ((t1_here).second.num_phrase_match < (t2_here).second.num_phrase_match) return false;
163	}
164
165	if (compare_terms_match) {
166	if ((t1_here).second.num_query_terms_matched > (t2_here).second.num_query_terms_matched) return true;
167	if ((t1_here).second.num_query_terms_matched < (t2_here).second.num_query_terms_matched) return false;
168	}
169
170	if (compare_doc_weight) {
171	if ((t1_here).second.docweight > (t2_here).second.docweight) return true;
172	if ((t1_here).second.docweight < (t2_here).second.docweight) return false;
173	}
174
175	return t1>t2;
176	}
177
178
179
180
181	/////////////////////////////////
182	// functions for queryfilterclass
183	/////////////////////////////////
184
185	// loads up phrases data structure with any phrases (that's the quoted bits)
186	// occuring in the querystring
187	void queryfilterclass::get_phrase_terms (const text_t &querystring,
188	const termfreqclassarray &orgterms,
189	vector<termfreqclassarray> &phrases) {
190
191	text_t::const_iterator here = querystring.begin();
192	text_t::const_iterator end = querystring.end();
193
194	termfreqclassarray tmpterms;
195
196	int termcount = 0;
197	bool foundquote = false;
198	bool foundbreak = false;
199	bool start = true;
200	while (here != end) {
201	if (*here == '\"') {
202	if (foundquote) {
203	if (!foundbreak && !start) {
204	tmpterms.push_back (orgterms[termcount]);
205	termcount ++;
206	}
207	if (tmpterms.size() > 1) {
208	phrases.push_back (tmpterms);
209	tmpterms.erase (tmpterms.begin(), tmpterms.end());
210	}
211	foundquote = false;
212	foundbreak = true;
213	} else foundquote = true;
214	} else if (!is_unicode_letdig(*here)) {
215	// found a break between terms
216	if (!foundbreak && !start) {
217	if (foundquote)
218	tmpterms.push_back (orgterms[termcount]);
219	termcount ++;
220	}
221	foundbreak = true;
222	} else {
223	start = false;
224	foundbreak = false;
225	}
226	here++;
227	}
228	}
229
230	// do aditional query processing
231	void queryfilterclass::post_process (const queryparamclass &queryparams,
232	queryresultsclass &queryresults) {
233
234	// post-process the results if needed
235	if (queryresults.orgterms.size() > 1 && !queryresults.docs.docset.empty()) {
236
237	// get the terms between quotes (if any)
238	vector<termfreqclassarray> phrases;
239	get_phrase_terms (queryparams.querystring, queryresults.orgterms, phrases);
240
241	if (phrases.size() > 0) {
242
243	// get the long version of the index
244	text_t longindex;
245	indexmap.to2from (queryparams.index, longindex);
246
247	vector<termfreqclassarray>::const_iterator this_phrase = phrases.begin();
248	vector<termfreqclassarray>::const_iterator end_phrase = phrases.end();
249
250	while (this_phrase != end_phrase) {
251
252	// process each of the matched documents
253	docresultmap::iterator docs_here = queryresults.docs.docset.begin();
254	docresultmap::iterator docs_end = queryresults.docs.docset.end();
255	while (docs_here != docs_end) {
256	if (OID_phrase_search (mgsearchptr, gdbmptr, queryparams.index,
257	queryparams.subcollection, queryparams.language,
258	longindex, queryparams.collection, *this_phrase,
259	(*docs_here).second.docnum)) {
260	(*docs_here).second.num_phrase_match++;
261	}
262
263	docs_here++;
264	}
265	this_phrase++;
266	}
267	}
268	}
269	}
270
271	// get the query parameters
272	void queryfilterclass::parse_query_params (const FilterRequest_t &request,
273	vector<queryparamclass> &query_params,
274	int &startresults,
275	int &endresults,
276	ostream &logout) {
277	outconvertclass text_t2ascii;
278
279	// set defaults for the return parameters
280	query_params.erase(query_params.begin(), query_params.end());
281	startresults = filterOptions["StartResults"].defaultValue.getint();
282	endresults = filterOptions["EndResults"].defaultValue.getint();
283
284	// set defaults for query parameters
285	queryparamclass query;
286	query.combinequery = "or"; // first one must be "or"
287	query.collection = collection;
288	query.index = filterOptions["Index"].defaultValue;
289	query.subcollection = filterOptions["Subcollection"].defaultValue;
290	query.language = filterOptions["Language"].defaultValue;
291	query.querystring.clear();
292	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
293	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
294	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
295	query.stemming = (filterOptions["Stem"].defaultValue == "true");
296	query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
297
298	OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
299	OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
300	while (options_here != options_end) {
301	if ((*options_here).name == "CombineQuery") {
302	// add this query
303
304	// "all", needed when combining queries where the document results are needed
305	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
306	query_params.push_back (query);
307
308	// start on next query
309	query.clear();
310	query.combinequery = (*options_here).value;
311
312	// set defaults for query parameters
313	query.collection = collection;
314	query.index = filterOptions["Index"].defaultValue;
315	query.subcollection = filterOptions["Subcollection"].defaultValue;
316	query.language = filterOptions["Language"].defaultValue;
317	query.querystring.clear();
318	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
319	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
320	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
321	query.stemming = (filterOptions["Stem"].defaultValue == "true");
322
323	// "all", needed when combining queries where the document results are needed
324	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
325	else query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
326
327	} else if ((*options_here).name == "StartResults") {
328	startresults = (*options_here).value.getint();
329	} else if ((*options_here).name == "EndResults") {
330	endresults = (*options_here).value.getint();
331	} else if ((*options_here).name == "QueryType") {
332	query.search_type = ((*options_here).value == "ranked");
333	} else if ((*options_here).name == "MatchMode") {
334	query.match_mode = ((*options_here).value == "all");
335	if (query.match_mode == 1) query.maxdocs = -1;
336	} else if ((*options_here).name == "Term") {
337	query.querystring = (*options_here).value;
338	} else if ((*options_here).name == "Casefold") {
339	query.casefolding = ((*options_here).value == "true");
340	} else if ((*options_here).name == "Stem") {
341	query.stemming = ((*options_here).value == "true");
342	} else if ((*options_here).name == "Index") {
343	query.index = (*options_here).value;
344	} else if ((*options_here).name == "Subcollection") {
345	query.subcollection = (*options_here).value;
346	} else if ((*options_here).name == "Language") {
347	query.language = (*options_here).value;
348	} else if ((*options_here).name == "Maxdocs") {
349	query.maxdocs = (*options_here).value.getint();
350	} else {
351	logout << text_t2ascii
352	<< "warning: unknown queryfilter option \""
353	<< (*options_here).name
354	<< "\" ignored.\n\n";
355	}
356
357	options_here++;
358	}
359
360	// add the last query
361	query_params.push_back (query);
362	}
363
364
365
366	// do query that might involve multiple sub queries
367	// mgsearchptr and gdbmptr are assumed to be valid
368	void queryfilterclass::do_multi_query (const FilterRequest_t &request,
369	const vector<queryparamclass> &query_params,
370	queryresultsclass &multiresults,
371	comerror_t &err, ostream &logout) {
372	outconvertclass text_t2ascii;
373
374	err = noError;
375	mgsearchptr->setcollectdir (collectdir);
376	multiresults.clear();
377
378	vector<queryparamclass>::const_iterator query_here = query_params.begin();
379	vector<queryparamclass>::const_iterator query_end = query_params.end();
380	while (query_here != query_end) {
381	queryresultsclass thisqueryresults;
382
383	if (!mgsearchptr->search(*query_here, thisqueryresults)) {
384	// most likely a system problem
385	logout << text_t2ascii
386	<< "system problem: could not do search with mg for index \""
387	<< (query_here).index << (query_here).subcollection
388	<< (*query_here).language << "\".\n\n";
389	err = systemProblem;
390	return;
391	}
392
393	// combine the results
394	if (need_matching_docs (request.filterResultOptions)) {
395	// post-process the results if needed
396	if (!thisqueryresults.postprocessed && thisqueryresults.orgterms.size() > 1 &&
397	!thisqueryresults.docs.docset.empty()) {
398	post_process (*query_here, thisqueryresults);
399	thisqueryresults.postprocessed = true;
400	multiresults.postprocessed = true;
401	}
402
403	if (query_params.size() == 1) {
404	multiresults.docs = thisqueryresults.docs; // just one set of results
405	multiresults.docs_matched = thisqueryresults.docs_matched;
406	multiresults.is_approx = thisqueryresults.is_approx;
407
408	} else {
409	if ((*query_here).combinequery == "and") {
410	multiresults.docs.combine_and (thisqueryresults.docs);
411	} else if ((*query_here).combinequery == "or") {
412	multiresults.docs.combine_or (thisqueryresults.docs);
413	} else if ((*query_here).combinequery == "not") {
414	multiresults.docs.combine_not (thisqueryresults.docs);
415	}
416	multiresults.docs_matched = multiresults.docs.docset.size();
417	multiresults.is_approx = Exact;
418	}
419	}
420
421	// combine the term information
422	if (need_term_info (request.filterResultOptions)) {
423	// append the terms
424	multiresults.orgterms.insert(multiresults.orgterms.end(),
425	thisqueryresults.orgterms.begin(),
426	thisqueryresults.orgterms.end());
427
428	// add the term variants
429	text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
430	text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
431	while (termvar_here != termvar_end) {
432	multiresults.termvariants.insert(*termvar_here);
433	termvar_here++;
434	}
435	}
436
437	query_here++;
438	}
439
440	// sort and unique the query terms
441	multiresults.sortuniqqueryterms ();
442	}
443
444
445	void queryfilterclass::sort_doc_results (const FilterRequest_t &/request/,
446	docresultsclass &docs) {
447	resultsorderer_t resultsorderer;
448	resultsorderer.compare_phrase_match = true;
449	resultsorderer.docset = &(docs.docset);
450
451	// first get a list of document numbers
452	docs.docnum_order();
453
454	sort (docs.docorder.begin(), docs.docorder.end(), resultsorderer);
455	}
456
457
458
459	queryfilterclass::queryfilterclass () {
460	gdbmptr = NULL;
461	mgsearchptr = NULL;
462
463	FilterOption_t filtopt;
464	filtopt.name = "CombineQuery";
465	filtopt.type = FilterOption_t::enumeratedt;
466	filtopt.repeatable = FilterOption_t::onePerQuery;
467	filtopt.defaultValue = "and";
468	filtopt.validValues.push_back("and");
469	filtopt.validValues.push_back("or");
470	filtopt.validValues.push_back("not");
471	filterOptions["CombineQuery"] = filtopt;
472
473	// -- onePerQuery StartResults integer
474	filtopt.clear();
475	filtopt.name = "StartResults";
476	filtopt.type = FilterOption_t::integert;
477	filtopt.repeatable = FilterOption_t::onePerQuery;
478	filtopt.defaultValue = "1";
479	filtopt.validValues.push_back("1");
480	filtopt.validValues.push_back("1000");
481	filterOptions["StartResults"] = filtopt;
482
483	// -- onePerQuery EndResults integer
484	filtopt.clear();
485	filtopt.name = "EndResults";
486	filtopt.type = FilterOption_t::integert;
487	filtopt.repeatable = FilterOption_t::onePerQuery;
488	filtopt.defaultValue = "10";
489	filtopt.validValues.push_back("-1");
490	filtopt.validValues.push_back("1000");
491	filterOptions["EndResults"] = filtopt;
492
493	// -- onePerQuery QueryType enumerated (boolean, ranked)
494	filtopt.clear();
495	filtopt.name = "QueryType";
496	filtopt.type = FilterOption_t::enumeratedt;
497	filtopt.repeatable = FilterOption_t::onePerQuery;
498	filtopt.defaultValue = "ranked";
499	filtopt.validValues.push_back("boolean");
500	filtopt.validValues.push_back("ranked");
501	filterOptions["QueryType"] = filtopt;
502
503	// -- onePerQuery MatchMode enumerated (some, all)
504	filtopt.clear();
505	filtopt.name = "MatchMode";
506	filtopt.type = FilterOption_t::enumeratedt;
507	filtopt.repeatable = FilterOption_t::onePerQuery;
508	filtopt.defaultValue = "some";
509	filtopt.validValues.push_back("some");
510	filtopt.validValues.push_back("all");
511	filterOptions["MatchMode"] = filtopt;
512
513	// -- onePerTerm Term string ???
514	filtopt.clear();
515	filtopt.name = "Term";
516	filtopt.type = FilterOption_t::stringt;
517	filtopt.repeatable = FilterOption_t::onePerTerm;
518	filtopt.defaultValue = "";
519	filterOptions["Term"] = filtopt;
520
521	// -- onePerTerm Casefold boolean
522	filtopt.clear();
523	filtopt.name = "Casefold";
524	filtopt.type = FilterOption_t::booleant;
525	filtopt.repeatable = FilterOption_t::onePerTerm;
526	filtopt.defaultValue = "true";
527	filtopt.validValues.push_back("false");
528	filtopt.validValues.push_back("true");
529	filterOptions["Casefold"] = filtopt;
530
531	// -- onePerTerm Stem boolean
532	filtopt.clear();
533	filtopt.name = "Stem";
534	filtopt.type = FilterOption_t::booleant;
535	filtopt.repeatable = FilterOption_t::onePerTerm;
536	filtopt.defaultValue = "false";
537	filtopt.validValues.push_back("false");
538	filtopt.validValues.push_back("true");
539	filterOptions["Stem"] = filtopt;
540
541	// -- onePerTerm Index enumerated
542	filtopt.clear();
543	filtopt.name = "Index";
544	filtopt.type = FilterOption_t::enumeratedt;
545	filtopt.repeatable = FilterOption_t::onePerTerm;
546	filtopt.defaultValue = "";
547	filterOptions["Index"] = filtopt;
548
549	// -- onePerTerm Subcollection enumerated
550	filtopt.clear();
551	filtopt.name = "Subcollection";
552	filtopt.type = FilterOption_t::enumeratedt;
553	filtopt.repeatable = FilterOption_t::onePerTerm;
554	filtopt.defaultValue = "";
555	filterOptions["Subcollection"] = filtopt;
556
557	// -- onePerTerm Language enumerated
558	filtopt.clear();
559	filtopt.name = "Language";
560	filtopt.type = FilterOption_t::enumeratedt;
561	filtopt.repeatable = FilterOption_t::onePerTerm;
562	filtopt.defaultValue = "";
563	filterOptions["Language"] = filtopt;
564
565	// -- onePerQuery Maxdocs integer
566	filtopt.clear();
567	filtopt.name = "Maxdocs";
568	filtopt.type = FilterOption_t::integert;
569	filtopt.repeatable = FilterOption_t::onePerQuery;
570	filtopt.defaultValue = "200";
571	filtopt.validValues.push_back("-1");
572	filtopt.validValues.push_back("1000");
573	filterOptions["Maxdocs"] = filtopt;
574	}
575
576	queryfilterclass::~queryfilterclass () {
577	}
578
579	void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
580	filterclass::configure (key, cfgline);
581
582	if (key == "indexmap") {
583	indexmap.importmap (cfgline);
584
585	// update the list of indexes in the filter information
586	text_tarray options;
587	indexmap.gettoarray (options);
588	filterOptions["Index"].validValues = options;
589
590	} else if (key == "defaultindex") {
591	indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
592
593	} else if (key == "subcollectionmap") {
594	subcollectionmap.importmap (cfgline);
595
596	// update the list of subcollections in the filter information
597	text_tarray options;
598	subcollectionmap.gettoarray (options);
599	filterOptions["Subcollection"].validValues = options;
600
601	} else if (key == "defaultsubcollection") {
602	subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
603
604	} else if (key == "languagemap") {
605	languagemap.importmap (cfgline);
606
607	// update the list of languages in the filter information
608	text_tarray options;
609	languagemap.gettoarray (options);
610	filterOptions["Language"].validValues = options;
611
612	} else if (key == "defaultlanguage")
613	languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
614	}
615
616	bool queryfilterclass::init (ostream &logout) {
617	outconvertclass text_t2ascii;
618
619	if (!filterclass::init(logout)) return false;
620
621	// get the filename for the database and make sure it exists
622	gdbm_filename = filename_cat(collectdir,"index","text",collection);
623
624	#ifdef _LITTLE_ENDIAN
625	gdbm_filename += ".ldb";
626	#else
627	gdbm_filename += ".bdb";
628	#endif
629	if (!file_exists(gdbm_filename)) {
630	logout << text_t2ascii
631	<< "error: gdbm database \""
632	<< gdbm_filename << "\" does not exist\n\n";
633	return false;
634	}
635
636	return true;
637	}
638
639	void queryfilterclass::filter (const FilterRequest_t &request,
640	FilterResponse_t &response,
641	comerror_t &err, ostream &logout) {
642	outconvertclass text_t2ascii;
643
644	response.clear ();
645	err = noError;
646	if (gdbmptr == NULL) {
647	// most likely a configuration problem
648	logout << text_t2ascii
649	<< "configuration error: queryfilter contains a null gdbmclass\n\n";
650	err = configurationError;
651	return;
652	}
653	if (mgsearchptr == NULL) {
654	// most likely a configuration problem
655	logout << text_t2ascii
656	<< "configuration error: queryfilter contains a null mgsearchclass\n\n";
657	err = configurationError;
658	return;
659	}
660
661	// open the database
662	gdbmptr->setlogout(&logout);
663	if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
664	// most likely a system problem (we have already checked that the
665	// gdbm database exists)
666	logout << text_t2ascii
667	<< "system problem: open on gdbm database \""
668	<< gdbm_filename << "\" failed\n\n";
669	err = systemProblem;
670	return;
671	}
672
673	// get the query parameters
674	int startresults = filterOptions["StartResults"].defaultValue.getint();
675	int endresults = filterOptions["EndResults"].defaultValue.getint();
676	vector<queryparamclass> queryfilterparams;
677	parse_query_params (request, queryfilterparams, startresults, endresults, logout);
678
679	// do query
680	queryresultsclass queryresults;
681	do_multi_query (request, queryfilterparams, queryresults, err, logout);
682	if (err != noError) return;
683
684	// assemble document results
685	if (need_matching_docs (request.filterResultOptions)) {
686	// sort the query results
687	sort_doc_results (request, queryresults.docs);
688
689	int resultnum = 1;
690	ResultDocInfo_t resultdoc;
691	text_t trans_OID;
692	vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
693	vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
694
695	if (endresults == -1) endresults = MAXNUMDOCS;
696	while (docorder_here != docorder_end) {
697	if (resultnum > endresults) break;
698
699	// translate the document number
700	if (!translate(gdbmptr, *docorder_here, trans_OID)) {
701	logout << text_t2ascii
702	<< "warning: could not translate mg document number \""
703	<< *docorder_here << "\"to OID.\n\n";
704
705	} else {
706	docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
707
708	// see if there is a result for this number,
709	// if it is in the request set (or the request set is empty)
710	if (docset_here != queryresults.docs.docset.end() &&
711	(request.docSet.empty() \|\| in_set(request.docSet, trans_OID))) {
712	if (resultnum >= startresults) {
713	// add this document
714	resultdoc.OID = trans_OID;
715	resultdoc.result_num = resultnum;
716	resultdoc.ranking = (int)((docset_here).second.docweight 10000.0 + 0.5);
717
718	// these next two are not available on all versions of mg
719	resultdoc.num_terms_matched = (*docset_here).second.num_query_terms_matched;
720	resultdoc.num_phrase_match = (*docset_here).second.num_phrase_match;
721
722	response.docInfo.push_back (resultdoc);
723	}
724
725	resultnum++;
726	}
727	}
728
729	docorder_here++;
730	}
731	}
732
733	// assemble the term results
734	if (need_term_info(request.filterResultOptions)) {
735	// note: the terms have already been sorted and uniqued
736
737	TermInfo_t terminfo;
738	bool terms_first = true;
739	termfreqclassarray::iterator terms_here = queryresults.terms.begin();
740	termfreqclassarray::iterator terms_end = queryresults.terms.end();
741
742	while (terms_here != terms_end) {
743	terminfo.clear();
744	terminfo.term = (*terms_here).termstr;
745	terminfo.freq = (*terms_here).termfreq;
746	if (terms_first) {
747	text_tset::iterator termvariants_here = queryresults.termvariants.begin();
748	text_tset::iterator termvariants_end = queryresults.termvariants.end();
749	while (termvariants_here != termvariants_end) {
750	terminfo.matchTerms.push_back (*termvariants_here);
751	termvariants_here++;
752	}
753	}
754	terms_first = false;
755
756	response.termInfo.push_back (terminfo);
757
758	terms_here++;
759	}
760	}
761
762	response.numDocs = queryresults.docs_matched;
763	response.isApprox = queryresults.is_approx;
764	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: