Context Navigation

source: trunk/gsdl/src/colservr/queryfilter.cpp@ 501

Last change on this file since 501 was 501, checked in by rjmcnab, 25 years ago
Added matchmode option for some and all.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 23.4 KB

Line
1	/**********************************************************************
2	*
3	* queryfilter.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* PUT COPYRIGHT NOTICE HERE
7	*
8	* $Id: queryfilter.cpp 501 1999-08-31 22:47:09Z rjmcnab $
9	*
10	*********************************************************************/
11
12	/*
13	$Log$
14	Revision 1.15 1999/08/31 22:47:09 rjmcnab
15	Added matchmode option for some and all.
16
17	Revision 1.14 1999/07/16 03:42:21 sjboddie
18	changed isApprox
19
20	Revision 1.13 1999/07/16 00:17:06 sjboddie
21	got using phrasesearch for post-processing
22
23	Revision 1.12 1999/07/09 02:19:43 rjmcnab
24	Fixed a couple of compiler conflicts
25
26	Revision 1.11 1999/07/08 20:49:44 rjmcnab
27	Added result_num to the ResultDocInto_t structure.
28
29	Revision 1.10 1999/07/07 06:19:46 rjmcnab
30	Added ability to combine two or more independant queries.
31
32	Revision 1.9 1999/07/01 09:29:20 rjmcnab
33	Changes for better reporting of number documents which match a query. Changes
34	should still work as before with older versions of mg.
35
36	Revision 1.8 1999/07/01 03:59:54 rjmcnab
37	reduced MAXDOCS to 200 (more reasonable ???). I also added a virtual
38	method for post-processing the query.
39
40	Revision 1.7 1999/06/30 04:04:13 rjmcnab
41	made stemming functions available from mgsearch and made the stems
42	for the query terms available in queryinfo
43
44	Revision 1.6 1999/06/29 22:06:23 rjmcnab
45	Added a couple of fields to queryinfo to handle a special version
46	of mg.
47
48	Revision 1.5 1999/06/27 22:08:48 sjboddie
49	now check for defaultindex, defaultsubcollection, and defaultlanguage
50	entries in config files
51
52	Revision 1.4 1999/06/16 02:03:25 sjboddie
53	fixed bug in isApprox and set MAXDOCS to always be 500
54
55	Revision 1.3 1999/04/19 23:56:09 rjmcnab
56	Finished the gdbm metadata stuff
57
58	Revision 1.2 1999/04/12 03:45:03 rjmcnab
59	Finished the query filter.
60
61	Revision 1.1 1999/04/06 22:22:09 rjmcnab
62	Initial revision.
63
64	*/
65
66
67	#include "queryfilter.h"
68	#include "fileutil.h"
69	#include "queryinfo.h"
70	#include "phrasesearch.h"
71	#include <assert.h>
72
73	#define MAXDOCS 50000 // note that maxdocs must be at least as large
74	// as the highest possible value of EndResults
75
76	// some useful functions
77
78	// translate will return true if successful
79	static bool translate (gdbmclass *gdbmptr, int docnum, text_t &trans_OID) {
80	infodbclass info;
81
82	trans_OID.clear();
83
84	// get the info
85	if (gdbmptr == NULL) return false;
86	if (!gdbmptr->getinfo(docnum, info)) return false;
87
88	// translate
89	if (info["section"].empty()) return false;
90
91	trans_OID = info["section"];
92	return true;
93	}
94
95
96	// whether document results are needed
97	static bool need_matching_docs (int filterResultOptions) {
98	return ((filterResultOptions & FROID) \|\| (filterResultOptions & FRranking) \|\|
99	(filterResultOptions & FRmetadata));
100	}
101
102	// whether term information is needed
103	static bool need_term_info (int filterResultOptions) {
104	return ((filterResultOptions & FRtermFreq) \|\| (filterResultOptions & FRmatchTerms));
105	}
106
107	///////////////////////////////
108	// methods for resultsorderer_t
109	///////////////////////////////
110
111	resultsorderer_t::resultsorderer_t() {
112	clear ();
113	}
114
115	void resultsorderer_t::clear() {
116	compare_phrase_match = false;
117	compare_terms_match = false;
118	compare_doc_weight = true;
119
120	docset = NULL;
121	}
122
123	bool resultsorderer_t::operator()(const int &t1, const int &t2) const {
124	if (docset == NULL) return t1>t2;
125
126	docresultmap::iterator t1_here = docset->find(t1);
127	docresultmap::iterator t2_here = docset->find(t2);
128	docresultmap::iterator end = docset->end();
129
130	// sort all the document numbers not in the document set to
131	// the end of the list
132	if (t1_here == end) {
133	if (t2_here == end) return t1>t2;
134	else return true;
135	} else if (t2_here == end) return false;
136
137	if (compare_phrase_match) {
138	if ((t1_here).second.num_phrase_match > (t2_here).second.num_phrase_match) return true;
139	if ((t1_here).second.num_phrase_match < (t2_here).second.num_phrase_match) return false;
140	}
141
142	if (compare_terms_match) {
143	if ((t1_here).second.num_query_terms_matched > (t2_here).second.num_query_terms_matched) return true;
144	if ((t1_here).second.num_query_terms_matched < (t2_here).second.num_query_terms_matched) return false;
145	}
146
147	if (compare_doc_weight) {
148	if ((t1_here).second.docweight > (t2_here).second.docweight) return true;
149	if ((t1_here).second.docweight < (t2_here).second.docweight) return false;
150	}
151
152	return t1>t2;
153	}
154
155
156
157
158	/////////////////////////////////
159	// functions for queryfilterclass
160	/////////////////////////////////
161
162	// loads up phrases data structure with any phrases (that's the quoted bits)
163	// occuring in the querystring
164	void queryfilterclass::get_phrase_terms (const text_t &querystring,
165	const termfreqclassarray &orgterms,
166	vector<termfreqclassarray> &phrases) {
167
168	text_t::const_iterator here = querystring.begin();
169	text_t::const_iterator end = querystring.end();
170
171	termfreqclassarray tmpterms;
172
173	int termcount = 0;
174	bool foundquote = false;
175	bool foundbreak = false;
176	bool start = true;
177	while (here != end) {
178	if (*here == '\"') {
179	if (foundquote) {
180	if (!foundbreak && !start) {
181	tmpterms.push_back (orgterms[termcount]);
182	termcount ++;
183	}
184	if (tmpterms.size() > 1) {
185	phrases.push_back (tmpterms);
186	tmpterms.erase (tmpterms.begin(), tmpterms.end());
187	}
188	foundquote = false;
189	foundbreak = true;
190	} else foundquote = true;
191	} else if (!is_unicode_letdig(*here)) {
192	// found a break between terms
193	if (!foundbreak && !start) {
194	if (foundquote)
195	tmpterms.push_back (orgterms[termcount]);
196	termcount ++;
197	}
198	foundbreak = true;
199	} else {
200	start = false;
201	foundbreak = false;
202	}
203	here++;
204	}
205	}
206
207	// do aditional query processing
208	void queryfilterclass::post_process (const queryparamclass &queryparams,
209	queryresultsclass &queryresults) {
210
211	// post-process the results if needed
212	if (queryresults.orgterms.size() > 1 && !queryresults.docs.docset.empty()) {
213
214	// get the terms between quotes (if any)
215	vector<termfreqclassarray> phrases;
216	get_phrase_terms (queryparams.querystring, queryresults.orgterms, phrases);
217
218	if (phrases.size() > 0) {
219
220	// get the long version of the index
221	text_t longindex;
222	indexmap.to2from (queryparams.index, longindex);
223
224	vector<termfreqclassarray>::const_iterator this_phrase = phrases.begin();
225	vector<termfreqclassarray>::const_iterator end_phrase = phrases.end();
226
227	while (this_phrase != end_phrase) {
228
229	// process each of the matched documents
230	docresultmap::iterator docs_here = queryresults.docs.docset.begin();
231	docresultmap::iterator docs_end = queryresults.docs.docset.end();
232	while (docs_here != docs_end) {
233	if (OID_phrase_search (mgsearchptr, gdbmptr, queryparams.index,
234	queryparams.subcollection, queryparams.language,
235	longindex, queryparams.collection, *this_phrase,
236	(*docs_here).second.docnum)) {
237	(*docs_here).second.num_phrase_match++;
238	}
239
240	docs_here++;
241	}
242	this_phrase++;
243	}
244	}
245	}
246	}
247
248	// get the query parameters
249	void queryfilterclass::parse_query_params (const FilterRequest_t &request,
250	vector<queryparamclass> &query_params,
251	int &startresults,
252	int &endresults,
253	ostream &logout) {
254	outconvertclass text_t2ascii;
255
256	// set defaults for the return parameters
257	query_params.erase(query_params.begin(), query_params.end());
258	startresults = filterOptions["StartResults"].defaultValue.getint();
259	endresults = filterOptions["EndResults"].defaultValue.getint();
260
261	// set defaults for query parameters
262	queryparamclass query;
263	query.combinequery = "or"; // first one must be "or"
264	query.collection = collection;
265	query.index = filterOptions["Index"].defaultValue;
266	query.subcollection = filterOptions["Subcollection"].defaultValue;
267	query.language = filterOptions["Language"].defaultValue;
268	query.querystring.clear();
269	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
270	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
271	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
272	query.stemming = (filterOptions["Stem"].defaultValue == "true");
273	query.maxdocs = MAXDOCS; // default for single query
274
275	OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
276	OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
277	while (options_here != options_end) {
278	if ((*options_here).name == "CombineQuery") {
279	// add this query
280
281	// "all", needed when combining queries where the document results are needed
282	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
283	query_params.push_back (query);
284
285	// start on next query
286	query.clear();
287	query.combinequery = (*options_here).value;
288
289	// set defaults for query parameters
290	query.collection = collection;
291	query.index = filterOptions["Index"].defaultValue;
292	query.subcollection = filterOptions["Subcollection"].defaultValue;
293	query.language = filterOptions["Language"].defaultValue;
294	query.querystring.clear();
295	query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
296	query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
297	query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
298	query.stemming = (filterOptions["Stem"].defaultValue == "true");
299
300	// "all", needed when combining queries where the document results are needed
301	if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
302	else query.maxdocs = MAXDOCS; // "all"
303
304	} else if ((*options_here).name == "StartResults") {
305	startresults = (*options_here).value.getint();
306	} else if ((*options_here).name == "EndResults") {
307	endresults = (*options_here).value.getint();
308	} else if ((*options_here).name == "QueryType") {
309	query.search_type = ((*options_here).value == "ranked");
310	} else if ((*options_here).name == "MatchMode") {
311	query.match_mode = ((*options_here).value == "all");
312	if (query.match_mode == 1) query.maxdocs = -1;
313	} else if ((*options_here).name == "Term") {
314	query.querystring = (*options_here).value;
315	} else if ((*options_here).name == "Casefold") {
316	query.casefolding = ((*options_here).value == "true");
317	} else if ((*options_here).name == "Stem") {
318	query.stemming = ((*options_here).value == "true");
319	} else if ((*options_here).name == "Index") {
320	query.index = (*options_here).value;
321	} else if ((*options_here).name == "Subcollection") {
322	query.subcollection = (*options_here).value;
323	} else if ((*options_here).name == "Language") {
324	query.language = (*options_here).value;
325	} else {
326	logout << text_t2ascii
327	<< "warning: unknown queryfilter option \""
328	<< (*options_here).name
329	<< "\" ignored.\n\n";
330	}
331
332	options_here++;
333	}
334
335	// add the last query
336	query_params.push_back (query);
337	}
338
339
340
341	// do query that might involve multiple sub queries
342	// mgsearchptr and gdbmptr are assumed to be valid
343	void queryfilterclass::do_multi_query (const FilterRequest_t &request,
344	const vector<queryparamclass> &query_params,
345	queryresultsclass &multiresults,
346	comerror_t &err, ostream &logout) {
347	outconvertclass text_t2ascii;
348
349	err = noError;
350	mgsearchptr->setcollectdir (collectdir);
351	multiresults.clear();
352
353	vector<queryparamclass>::const_iterator query_here = query_params.begin();
354	vector<queryparamclass>::const_iterator query_end = query_params.end();
355	while (query_here != query_end) {
356	queryresultsclass thisqueryresults;
357
358	if (!mgsearchptr->search(*query_here, thisqueryresults)) {
359	// most likely a system problem
360	logout << text_t2ascii
361	<< "system problem: could not do search with mg for index \""
362	<< (query_here).index << (query_here).subcollection
363	<< (*query_here).language << "\".\n\n";
364	err = systemProblem;
365	return;
366	}
367
368	// combine the results
369	if (need_matching_docs (request.filterResultOptions)) {
370	// post-process the results if needed
371	if (!thisqueryresults.postprocessed && thisqueryresults.orgterms.size() > 1 &&
372	!thisqueryresults.docs.docset.empty()) {
373	post_process (*query_here, thisqueryresults);
374	thisqueryresults.postprocessed = true;
375	multiresults.postprocessed = true;
376	}
377
378	if (query_params.size() == 1) {
379	multiresults.docs = thisqueryresults.docs; // just one set of results
380	multiresults.docs_matched = thisqueryresults.docs_matched;
381	multiresults.is_approx = thisqueryresults.is_approx;
382
383	} else {
384	if ((*query_here).combinequery == "and") {
385	multiresults.docs.combine_and (thisqueryresults.docs);
386	} else if ((*query_here).combinequery == "or") {
387	multiresults.docs.combine_or (thisqueryresults.docs);
388	} else if ((*query_here).combinequery == "not") {
389	multiresults.docs.combine_not (thisqueryresults.docs);
390	}
391	multiresults.docs_matched = multiresults.docs.docset.size();
392	multiresults.is_approx = Exact;
393	}
394	}
395
396	// combine the term information
397	if (need_term_info (request.filterResultOptions)) {
398	// append the terms
399	multiresults.orgterms.insert(multiresults.orgterms.end(),
400	thisqueryresults.orgterms.begin(),
401	thisqueryresults.orgterms.end());
402
403	// add the term variants
404	text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
405	text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
406	while (termvar_here != termvar_end) {
407	multiresults.termvariants.insert(*termvar_here);
408	termvar_here++;
409	}
410	}
411
412	query_here++;
413	}
414
415	// sort and unique the query terms
416	multiresults.sortuniqqueryterms ();
417	}
418
419
420	void queryfilterclass::sort_doc_results (const FilterRequest_t &/request/,
421	docresultsclass &docs) {
422	resultsorderer_t resultsorderer;
423	resultsorderer.compare_phrase_match = true;
424	resultsorderer.docset = &(docs.docset);
425
426	// first get a list of document numbers
427	docs.docnum_order();
428
429	sort (docs.docorder.begin(), docs.docorder.end(), resultsorderer);
430	}
431
432
433
434	queryfilterclass::queryfilterclass () {
435	gdbmptr = NULL;
436	mgsearchptr = NULL;
437
438	FilterOption_t filtopt;
439	filtopt.name = "CombineQuery";
440	filtopt.type = FilterOption_t::enumeratedt;
441	filtopt.repeatable = FilterOption_t::onePerQuery;
442	filtopt.defaultValue = "and";
443	filtopt.validValues.push_back("and");
444	filtopt.validValues.push_back("or");
445	filtopt.validValues.push_back("not");
446	filterOptions["CombineQuery"] = filtopt;
447
448	// -- onePerQuery StartResults integer
449	filtopt.clear();
450	filtopt.name = "StartResults";
451	filtopt.type = FilterOption_t::integert;
452	filtopt.repeatable = FilterOption_t::onePerQuery;
453	filtopt.defaultValue = "1";
454	filtopt.validValues.push_back("1");
455	filtopt.validValues.push_back("1000");
456	filterOptions["StartResults"] = filtopt;
457
458	// -- onePerQuery EndResults integer
459	filtopt.clear();
460	filtopt.name = "EndResults";
461	filtopt.type = FilterOption_t::integert;
462	filtopt.repeatable = FilterOption_t::onePerQuery;
463	filtopt.defaultValue = "10";
464	filtopt.validValues.push_back("1");
465	filtopt.validValues.push_back("1000");
466	filterOptions["EndResults"] = filtopt;
467
468	// -- onePerQuery QueryType enumerated (boolean, ranked)
469	filtopt.clear();
470	filtopt.name = "QueryType";
471	filtopt.type = FilterOption_t::enumeratedt;
472	filtopt.repeatable = FilterOption_t::onePerQuery;
473	filtopt.defaultValue = "ranked";
474	filtopt.validValues.push_back("boolean");
475	filtopt.validValues.push_back("ranked");
476	filterOptions["QueryType"] = filtopt;
477
478	// -- onePerQuery MatchMode enumerated (some, all)
479	filtopt.clear();
480	filtopt.name = "MatchMode";
481	filtopt.type = FilterOption_t::enumeratedt;
482	filtopt.repeatable = FilterOption_t::onePerQuery;
483	filtopt.defaultValue = "some";
484	filtopt.validValues.push_back("some");
485	filtopt.validValues.push_back("all");
486	filterOptions["QueryType"] = filtopt;
487
488	// -- onePerTerm Term string ???
489	filtopt.clear();
490	filtopt.name = "Term";
491	filtopt.type = FilterOption_t::stringt;
492	filtopt.repeatable = FilterOption_t::onePerTerm;
493	filtopt.defaultValue = "";
494	filterOptions["Term"] = filtopt;
495
496	// -- onePerTerm Casefold boolean
497	filtopt.clear();
498	filtopt.name = "Casefold";
499	filtopt.type = FilterOption_t::booleant;
500	filtopt.repeatable = FilterOption_t::onePerTerm;
501	filtopt.defaultValue = "true";
502	filtopt.validValues.push_back("false");
503	filtopt.validValues.push_back("true");
504	filterOptions["Casefold"] = filtopt;
505
506	// -- onePerTerm Stem boolean
507	filtopt.clear();
508	filtopt.name = "Stem";
509	filtopt.type = FilterOption_t::booleant;
510	filtopt.repeatable = FilterOption_t::onePerTerm;
511	filtopt.defaultValue = "false";
512	filtopt.validValues.push_back("false");
513	filtopt.validValues.push_back("true");
514	filterOptions["Stem"] = filtopt;
515
516	// -- onePerTerm Index enumerated
517	filtopt.clear();
518	filtopt.name = "Index";
519	filtopt.type = FilterOption_t::enumeratedt;
520	filtopt.repeatable = FilterOption_t::onePerTerm;
521	filtopt.defaultValue = "";
522	filterOptions["Index"] = filtopt;
523
524	// -- onePerTerm Subcollection enumerated
525	filtopt.clear();
526	filtopt.name = "Subcollection";
527	filtopt.type = FilterOption_t::enumeratedt;
528	filtopt.repeatable = FilterOption_t::onePerTerm;
529	filtopt.defaultValue = "";
530	filterOptions["Subcollection"] = filtopt;
531
532	// -- onePerTerm Language enumerated
533	filtopt.clear();
534	filtopt.name = "Language";
535	filtopt.type = FilterOption_t::enumeratedt;
536	filtopt.repeatable = FilterOption_t::onePerTerm;
537	filtopt.defaultValue = "";
538	filterOptions["Language"] = filtopt;
539	}
540
541	queryfilterclass::~queryfilterclass () {
542	}
543
544	void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
545	filterclass::configure (key, cfgline);
546
547	if (key == "indexmap") {
548	indexmap.importmap (cfgline);
549
550	// update the list of indexes in the filter information
551	text_tarray options;
552	indexmap.gettoarray (options);
553	filterOptions["Index"].validValues = options;
554
555	} else if (key == "defaultindex") {
556	indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
557
558	} else if (key == "subcollectionmap") {
559	subcollectionmap.importmap (cfgline);
560
561	// update the list of subcollections in the filter information
562	text_tarray options;
563	subcollectionmap.gettoarray (options);
564	filterOptions["Subcollection"].validValues = options;
565
566	} else if (key == "defaultsubcollection") {
567	subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
568
569	} else if (key == "languagemap") {
570	languagemap.importmap (cfgline);
571
572	// update the list of languages in the filter information
573	text_tarray options;
574	languagemap.gettoarray (options);
575	filterOptions["Language"].validValues = options;
576
577	} else if (key == "defaultlanguage")
578	languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
579	}
580
581	bool queryfilterclass::init (ostream &logout) {
582	outconvertclass text_t2ascii;
583
584	if (!filterclass::init(logout)) return false;
585
586	// get the filename for the database and make sure it exists
587
588
589	// yet another hack for niupepa
590	if (collection == "niupepa")
591	gdbm_filename = filename_cat(collectdir,"index.new","text",collection);
592	else
593	gdbm_filename = filename_cat(collectdir,"index","text",collection);
594
595	#ifdef _LITTLE_ENDIAN
596	gdbm_filename += ".ldb";
597	#else
598	gdbm_filename += ".bdb";
599	#endif
600	if (!file_exists(gdbm_filename)) {
601	logout << text_t2ascii
602	<< "error: gdbm database \""
603	<< gdbm_filename << "\" does not exist\n\n";
604	return false;
605	}
606
607	return true;
608	}
609
610	void queryfilterclass::filter (const FilterRequest_t &request,
611	FilterResponse_t &response,
612	comerror_t &err, ostream &logout) {
613	outconvertclass text_t2ascii;
614
615	response.clear ();
616	err = noError;
617	if (gdbmptr == NULL) {
618	// most likely a configuration problem
619	logout << text_t2ascii
620	<< "configuration error: queryfilter contains a null gdbmclass\n\n";
621	err = configurationError;
622	return;
623	}
624	if (mgsearchptr == NULL) {
625	// most likely a configuration problem
626	logout << text_t2ascii
627	<< "configuration error: queryfilter contains a null mgsearchclass\n\n";
628	err = configurationError;
629	return;
630	}
631
632	// open the database
633	gdbmptr->setlogout(&logout);
634	if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
635	// most likely a system problem (we have already checked that the
636	// gdbm database exists)
637	logout << text_t2ascii
638	<< "system problem: open on gdbm database \""
639	<< gdbm_filename << "\" failed\n\n";
640	err = systemProblem;
641	return;
642	}
643
644	// get the query parameters
645	int startresults = filterOptions["StartResults"].defaultValue.getint();
646	int endresults = filterOptions["EndResults"].defaultValue.getint();
647	vector<queryparamclass> queryfilterparams;
648	parse_query_params (request, queryfilterparams, startresults, endresults, logout);
649
650	// do query
651	queryresultsclass queryresults;
652	do_multi_query (request, queryfilterparams, queryresults, err, logout);
653	if (err != noError) return;
654
655	// assemble document results
656	if (need_matching_docs (request.filterResultOptions)) {
657	// sort the query results
658	sort_doc_results (request, queryresults.docs);
659
660	int resultnum = 1;
661	ResultDocInfo_t resultdoc;
662	text_t trans_OID;
663	vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
664	vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
665
666	while (docorder_here != docorder_end) {
667	if (resultnum > endresults) break;
668
669	// translate the document number
670	if (!translate(gdbmptr, *docorder_here, trans_OID)) {
671	logout << text_t2ascii
672	<< "warning: could not translate mg document number \""
673	<< *docorder_here << "\"to OID.\n\n";
674
675	} else {
676	docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
677
678	// see if there is a result for this number,
679	// if it is in the request set (or the request set is empty)
680	if (docset_here != queryresults.docs.docset.end() &&
681	(request.docSet.empty() \|\| in_set(request.docSet, trans_OID))) {
682	if (resultnum >= startresults) {
683	// add this document
684	resultdoc.OID = trans_OID;
685	resultdoc.result_num = resultnum;
686	resultdoc.ranking = (int)((docset_here).second.docweight 10000.0 + 0.5);
687
688	// these next two are not available on all versions of mg
689	resultdoc.num_terms_matched = (*docset_here).second.num_query_terms_matched;
690	resultdoc.num_phrase_match = (*docset_here).second.num_phrase_match;
691
692	response.docInfo.push_back (resultdoc);
693	}
694
695	resultnum++;
696	}
697	}
698
699	docorder_here++;
700	}
701	}
702
703	// assemble the term results
704	if (need_term_info(request.filterResultOptions)) {
705	// note: the terms have already been sorted and uniqued
706
707	TermInfo_t terminfo;
708	bool terms_first = true;
709	termfreqclassarray::iterator terms_here = queryresults.terms.begin();
710	termfreqclassarray::iterator terms_end = queryresults.terms.end();
711
712	while (terms_here != terms_end) {
713	terminfo.clear();
714	terminfo.term = (*terms_here).termstr;
715	terminfo.freq = (*terms_here).termfreq;
716	if (terms_first) {
717	text_tset::iterator termvariants_here = queryresults.termvariants.begin();
718	text_tset::iterator termvariants_end = queryresults.termvariants.end();
719	while (termvariants_here != termvariants_end) {
720	terminfo.matchTerms.push_back (*termvariants_here);
721	termvariants_here++;
722	}
723	}
724	terms_first = false;
725
726	response.termInfo.push_back (terminfo);
727
728	terms_here++;
729	}
730	}
731
732	response.numDocs = queryresults.docs_matched;
733	response.isApprox = queryresults.is_approx;
734	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: