root/gsdl/trunk/src/colservr/mgppqueryfilter.cpp @ 15757

Revision 15681, 12.1 KB (checked in by mdewsnip, 12 years ago)

Removed some unnecessary inclusions of "assert.h".

  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * queryfilter.cpp --
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "mgppqueryfilter.h"
27#include "fileutil.h"
28#include "mgppsearch.h"
29
30/////////////////////////////////
31// functions for queryfilterclass
32/////////////////////////////////
33
34
35mgppqueryfilterclass::mgppqueryfilterclass ()
36  : queryfilterclass() {
37
38 
39  FilterOption_t filtopt;
40 
41  // -- onePerTerm  Level          enumerated
42  // likely to be Doc, Sec, Para, but we dont assume anything now
43  filtopt.clear();
44  filtopt.name = "Level";
45  filtopt.type = FilterOption_t::enumeratedt;
46  filtopt.repeatable = FilterOption_t::onePerTerm;
47  filterOptions["Level"] = filtopt;
48
49  // --  IndexField, enumerated, used to list available fields
50  filtopt.clear();
51  filtopt.name = "IndexField";
52  filtopt.type = FilterOption_t::enumeratedt;
53  filtopt.repeatable = FilterOption_t::onePerTerm;
54  filtopt.defaultValue = "";
55  filterOptions["IndexField"] = filtopt;
56
57}
58
59mgppqueryfilterclass::~mgppqueryfilterclass () {
60}
61
62
63//whether a query is a full text browse
64bool mgppqueryfilterclass::full_text_browse (int filterRequestOptions) {
65  return (filterRequestOptions & FRfullTextBrowse);
66}
67
68void mgppqueryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
69  queryfilterclass::configure(key, cfgline);
70
71  if (key == "indexfieldmap") {
72    indexfieldmap.importmap (cfgline);
73   
74    // update the list of indexes in the filter information
75    text_tarray options;
76    indexfieldmap.gettoarray (options);
77    filterOptions["IndexField"].validValues = options;
78   
79  } else if (key == "indexlevels") {
80    text_tarray::const_iterator here = cfgline.begin();
81    text_tarray::const_iterator end = cfgline.end();
82    bool first=true;
83    filterOptions["Level"].validValues.erase(filterOptions["Level"].validValues.begin(), filterOptions["Level"].validValues.end());
84    while (here != end) {
85      if (!(*here).empty()) {
86    if (first) {
87      first = false;
88      // the default is the first value
89      filterOptions["Level"].defaultValue = *here;
90    }
91    filterOptions["Level"].validValues.push_back(*here);
92      }
93      ++here;
94    }
95  } else if (key == "textlevel") {
96      ((mgppsearchclass *)textsearchptr)->set_text_level(cfgline[0]);
97  } else if (key == "indexstem") {
98    ((mgppsearchclass *)textsearchptr)->set_indexstem (cfgline[0]);
99  } else if (key == "defaultindex") { // used for fields in mgpp
100    indexfieldmap.from2to (cfgline[0], filterOptions["IndexField"].defaultValue);
101  }
102 
103}
104
105bool mgppqueryfilterclass::init (ostream &logout) {
106 
107  if (!queryfilterclass::init(logout)) {
108    return false;
109  }
110 
111  if (filterOptions["IndexField"].defaultValue.empty()) {
112    // use first index in map as default if no default is set explicitly
113    text_tarray fromarray;
114    indexfieldmap.getfromarray(fromarray);
115    if (fromarray.size()) {
116      filterOptions["IndexField"].defaultValue = fromarray[0];
117    }
118  }
119  return true;
120}
121
122void mgppqueryfilterclass::filter(const FilterRequest_t &request,
123                  FilterResponse_t &response,
124                  comerror_t &err, ostream &logout) { 
125
126
127  outconvertclass text_t2ascii;
128
129  response.clear ();
130  err = noError;
131  if (db_ptr == NULL) {
132    // most likely a configuration problem
133    logout << text_t2ascii
134       << "configuration error: queryfilter contains a null dbclass\n\n";
135    err = configurationError;
136    return;
137  }
138  if (textsearchptr == NULL) {
139    // most likely a configuration problem
140    logout << text_t2ascii
141       << "configuration error: queryfilter contains a null textsearchclass for mgpp\n\n";
142    err = configurationError;
143    return;
144  }
145  if (full_text_browse(request.filterResultOptions)) {
146    browsefilter(request, response, err, logout);
147    return;
148  }
149  // open the database
150  db_ptr->setlogout(&logout);
151  if (!db_ptr->opendatabase (db_filename, DB_READER, 100, false)) {
152    // most likely a system problem (we have already checked that the database exists)
153    logout << text_t2ascii
154       << "system problem: open on database \"" << db_filename << "\" failed\n\n";
155    err = systemProblem;
156    return;
157  }
158
159
160  // get the query parameters
161  int startresults, endresults;
162  text_t phrasematch; // not used here any more
163  vector<queryparamclass> queryfilterparams;
164  parse_query_params (request, queryfilterparams, startresults,
165              endresults, phrasematch, logout); 
166 
167   
168  // do query
169  queryresultsclass queryresults;
170  do_multi_query (request, queryfilterparams, queryresults, err, logout);
171  if (err != noError) return;
172  // assemble document results
173  if (need_matching_docs (request.filterResultOptions)) {
174   
175    int resultnum = 1;
176    ResultDocInfo_t resultdoc;
177    text_t trans_OID;
178    vector<int>::iterator docorder_here = queryresults.docs.docorder.begin();
179    vector<int>::iterator docorder_end = queryresults.docs.docorder.end();
180
181    if (endresults == -1) endresults = MAXNUMDOCS;
182    while (docorder_here != docorder_end) {
183      if (resultnum > endresults) break;
184     
185      // translate the document number
186      if (!translate(db_ptr, *docorder_here, trans_OID)) {
187    logout << text_t2ascii
188           << "warning: could not translate mgpp document number \""
189           << *docorder_here << "\"to OID.\n\n";
190   
191      } else {
192    docresultmap::iterator docset_here = queryresults.docs.docset.find (*docorder_here);
193
194    // see if there is a result for this number,
195    // if it is in the request set (or the request set is empty)
196    if (docset_here != queryresults.docs.docset.end() &&
197        (request.docSet.empty() || in_set(request.docSet, trans_OID))) {
198      if (resultnum >= startresults) {
199        // add this document
200        resultdoc.OID = trans_OID;
201        resultdoc.result_num = resultnum;
202        resultdoc.ranking = (int)((*docset_here).second.docweight * 10000.0 + 0.5);
203
204        response.docInfo.push_back (resultdoc);
205      }
206     
207      ++resultnum;
208    }
209      } // else
210     
211      ++docorder_here;
212    }
213  } // if need matching docs
214
215  // assemble the term results
216  if (need_term_info(request.filterResultOptions)) {
217    // note: the terms have already been sorted and uniqued - ?? have they??
218
219    TermInfo_t terminfo;
220    bool terms_first = true;
221
222    termfreqclassarray::iterator terms_here = queryresults.terms.begin();
223    termfreqclassarray::iterator terms_end = queryresults.terms.end();
224
225    while (terms_here != terms_end) {
226      terminfo.clear();
227      terminfo.term = (*terms_here).termstr;
228      terminfo.freq = (*terms_here).termfreq;
229
230      // this bit gets the matchTerms ie the equivalent (stem/casefold) terms
231      if (terms_first) {
232    text_tset::iterator termvariants_here = queryresults.termvariants.begin();
233    text_tset::iterator termvariants_end = queryresults.termvariants.end();
234    while (termvariants_here != termvariants_end) {
235      terminfo.matchTerms.push_back (*termvariants_here);
236      ++termvariants_here;
237    }
238      }
239      terms_first = false;
240     
241      response.termInfo.push_back (terminfo);
242
243      ++terms_here;
244    }
245  }
246
247  db_ptr->closedatabase();  // Important that local library doesn't leave any files open
248  response.numDocs = queryresults.docs_matched;
249  response.isApprox = queryresults.is_approx;
250}
251
252void mgppqueryfilterclass::browsefilter(const FilterRequest_t &request,
253                    FilterResponse_t &response,
254                    comerror_t &err, ostream &logout) { 
255
256  outconvertclass text_t2ascii;
257
258  // get the query parameters
259  int startresults, endresults;
260  text_t phrasematch; // not used here any more, just have it so can use
261                      // parse_query_params function
262 
263  vector<queryparamclass> queryfilterparams;
264  parse_query_params (request, queryfilterparams, startresults,
265              endresults, phrasematch, logout); 
266
267    vector<queryparamclass>::const_iterator query_here = queryfilterparams.begin();
268   
269  // do query
270  queryresultsclass queryresults;
271  queryresults.clear();
272 
273  int numDocs = endresults-startresults;
274  textsearchptr->setcollectdir (collectdir);
275
276  if (!((mgppsearchclass*)textsearchptr)->browse_search((*query_here), startresults, numDocs, queryresults)) {
277    // most likely a system problem
278    logout << text_t2ascii
279       << "system problem: could not do full text browse with mgpp for index \""
280       << (*query_here).index << (*query_here).subcollection
281       << (*query_here).language << "\".\n\n";
282    err = systemProblem;
283    return;
284  }
285
286  // assemble the term results
287  TermInfo_t terminfo;
288 
289  termfreqclassarray::iterator terms_here = queryresults.terms.begin();
290  termfreqclassarray::iterator terms_end = queryresults.terms.end();
291
292  while (terms_here != terms_end) {
293    terminfo.clear();
294    terminfo.term = (*terms_here).termstr;
295    terminfo.freq = (*terms_here).termfreq;
296   
297    response.termInfo.push_back (terminfo);
298
299    ++terms_here;
300  }
301 
302
303}
304
305// textsearchptr and db_ptr are assumed to be valid
306void mgppqueryfilterclass::do_multi_query (const FilterRequest_t &request,
307                       const vector<queryparamclass> &query_params,
308                       queryresultsclass &multiresults,
309                       comerror_t &err, ostream &logout) {
310  outconvertclass text_t2ascii;
311
312  err = noError;
313  textsearchptr->setcollectdir (collectdir);
314  multiresults.clear();
315 
316  vector<queryparamclass>::const_iterator query_here = query_params.begin();
317  vector<queryparamclass>::const_iterator query_end = query_params.end();
318  while (query_here != query_end) {
319    queryresultsclass thisqueryresults;
320    text_t indx((*query_here).index);
321    if (!textsearchptr->search((*query_here), thisqueryresults)) {
322      // most likely a system problem
323      logout << text_t2ascii
324         << "system problem: could not do search with mgpp for index \""
325         << (*query_here).index << (*query_here).subcollection
326         << (*query_here).language << "\".\n\n";
327      err = systemProblem;
328      return;
329    }
330
331    // check for syntax error
332    if (thisqueryresults.syntax_error==true) {
333      logout << text_t2ascii
334         << "syntax problem: invalid query string \""
335         << (*query_here).querystring<<"\".\n";
336      err = syntaxError;
337      return;
338    }
339    // combine the results
340    if (need_matching_docs (request.filterResultOptions)) {
341           
342      if (query_params.size() == 1) {
343    multiresults.docs = thisqueryresults.docs; // just one set of results
344    multiresults.docs_matched = thisqueryresults.docs_matched;
345    multiresults.is_approx = thisqueryresults.is_approx;
346   
347      } else {
348    if ((*query_here).combinequery == "and") {
349      multiresults.docs.combine_and (thisqueryresults.docs);
350    } else if ((*query_here).combinequery == "or") {
351      multiresults.docs.combine_or (thisqueryresults.docs);
352    } else if ((*query_here).combinequery == "not") {
353      multiresults.docs.combine_not (thisqueryresults.docs);
354    }
355    multiresults.docs_matched = multiresults.docs.docset.size();
356    multiresults.is_approx = Exact;
357      }
358    }
359
360    // combine the term information
361    if (need_term_info (request.filterResultOptions)) {
362      // append the terms
363      multiresults.orgterms.insert(multiresults.orgterms.end(),
364                   thisqueryresults.orgterms.begin(),
365                   thisqueryresults.orgterms.end());
366
367     
368      // add the term variants -
369      text_tset::iterator termvar_here = thisqueryresults.termvariants.begin();
370      text_tset::iterator termvar_end = thisqueryresults.termvariants.end();
371      while (termvar_here != termvar_end) {
372    multiresults.termvariants.insert(*termvar_here);
373    ++termvar_here;
374      }
375    }
376   
377    ++query_here;
378  }
379
380  // sort and unique the query terms
381  multiresults.sortuniqqueryterms ();
382}
383
384
385
Note: See TracBrowser for help on using the browser.