/********************************************************************** * * queryinfo.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: queryinfo.cpp 534 1999-09-07 04:57:43Z sjboddie $ * *********************************************************************/ /* $Log$ Revision 1.12 1999/09/07 04:57:24 sjboddie added gpl notice Revision 1.11 1999/08/31 22:47:57 rjmcnab Added MatchMode. Revision 1.10 1999/07/16 03:42:23 sjboddie changed isApprox Revision 1.9 1999/07/16 00:14:01 sjboddie added termfreqclassarray type Revision 1.8 1999/07/09 02:19:44 rjmcnab Fixed a couple of compiler conflicts Revision 1.7 1999/07/07 06:19:47 rjmcnab Added ability to combine two or more independant queries. Revision 1.6 1999/07/01 09:29:21 rjmcnab Changes for better reporting of number documents which match a query. Changes should still work as before with older versions of mg. Revision 1.5 1999/07/01 03:56:17 rjmcnab Added a set of utf8 encoded equivalent terms of a query term. I also added a flag for handling post-processing of the query. Revision 1.4 1999/06/30 04:04:13 rjmcnab made stemming functions available from mgsearch and made the stems for the query terms available in queryinfo Revision 1.3 1999/06/29 22:06:23 rjmcnab Added a couple of fields to queryinfo to handle a special version of mg. Revision 1.2 1999/01/12 01:51:02 rjmcnab Standard header. Revision 1.1 1999/01/08 09:02:18 rjmcnab Moved from src/library. */ #include "queryinfo.h" // query parameters queryparamclass::queryparamclass () { clear (); } void queryparamclass::clear () { combinequery.clear(); collection.clear(); index.clear(); subcollection.clear(); language.clear(); querystring.clear(); search_type = 0; // 0 = boolean, 1 = ranked match_mode = 0; // 0 = some, 1 = all casefolding = 0; stemming = 0; maxdocs = -1; // all } queryparamclass &queryparamclass::operator=(const queryparamclass &q) { combinequery = q.combinequery; collection = q.collection; index = q.index; subcollection = q.subcollection; language = q.language; querystring = q.querystring; search_type = q.search_type; match_mode = q.match_mode; casefolding = q.casefolding; stemming = q.stemming; maxdocs = q.maxdocs; return *this; } bool operator==(const queryparamclass &x, const queryparamclass &y) { return ((x.combinequery == y.combinequery) && (x.collection == y.collection) && (x.index == y.index) && (x.subcollection == y.subcollection) && (x.language == y.language) && (x.querystring == y.querystring) && (x.search_type == y.search_type) && (x.match_mode == y.match_mode) && (x.casefolding == y.casefolding) && (x.stemming == y.stemming) && (x.maxdocs == y.maxdocs)); } bool operator!=(const queryparamclass &x, const queryparamclass &y) { return !(x == y); } ostream &operator<< (ostream &outs, queryparamclass &q) { outconvertclass text_t2ascii; outs << "*** queryparamclass\n"; outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n"; outs << text_t2ascii << " collection = \"" << q.collection << "\"\n"; outs << text_t2ascii << " index = \"" << q.index << "\"\n"; outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n"; outs << text_t2ascii << " language = \"" << q.language << "\"\n"; outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n"; outs << " search_type = \"" << q.search_type << "\"\n"; outs << " match_mode = \"" << q.match_mode << "\"\n"; outs << " casefolding = \"" << q.casefolding << "\"\n"; outs << " stemming = \"" << q.stemming << "\"\n"; outs << " maxdocs = \"" << q.maxdocs << "\"\n"; outs << "\n"; return outs; } // term frequencies termfreqclass::termfreqclass () { clear(); } void termfreqclass::clear() { termstr.clear(); termstemstr.clear(); utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end()); termfreq = 0; } termfreqclass &termfreqclass::operator=(const termfreqclass &t) { termstr = t.termstr; termstemstr = t.termstemstr; utf8equivterms = t.utf8equivterms; termfreq = t.termfreq; return *this; } bool operator==(const termfreqclass &x, const termfreqclass &y) { return ((x.termstr == y.termstr) && (x.termstemstr == y.termstemstr) && (x.termfreq == y.termfreq)); } bool operator!=(const termfreqclass &x, const termfreqclass &y) { return !(x == y); } // ordered by termfreq and then by termstr bool operator<(const termfreqclass &x, const termfreqclass &y) { return ((x.termfreq < y.termfreq) || ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) || ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr))); } bool operator>(const termfreqclass &x, const termfreqclass &y) { return ((x.termfreq > y.termfreq) || ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) || ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr))); } // stream output for debugging purposes ostream &operator<< (ostream &outs, termfreqclass &t) { outconvertclass text_t2ascii; outs << text_t2ascii << " t:\"" << t.termstr << "\""; outs << text_t2ascii << " s:\"" << t.termstemstr << "\""; outs << " f:" << t.termfreq << "\n"; return outs; } // one query result docresultclass::docresultclass() { clear (); } void docresultclass::clear () { docnum=-1; docweight=0.0; num_query_terms_matched=0; num_phrase_match=0; } // merges two result classes relating to a single docnum docresultclass &docresultclass::combine(const docresultclass &d) { docweight += d.docweight; // budget! num_query_terms_matched += d.num_query_terms_matched; num_phrase_match += d.num_phrase_match; return *this; } docresultclass &docresultclass::operator=(const docresultclass &d) { docnum = d.docnum; docweight = d.docweight; num_query_terms_matched = d.num_query_terms_matched; num_phrase_match = d.num_phrase_match; return *this; } bool operator==(const docresultclass &x, const docresultclass &y) { return ((x.docnum == y.docnum) && (x.docweight == y.docweight) && (x.num_query_terms_matched == y.num_query_terms_matched) && (x.num_phrase_match == y.num_phrase_match)); } bool operator<(const docresultclass &x, const docresultclass &y) { return ((x.docnum < y.docnum) || ((x.docnum == y.docnum) && ((x.docweight < y.docweight) || ((x.docweight == y.docweight) && ((x.num_query_terms_matched < y.num_query_terms_matched) || ((x.num_query_terms_matched == y.num_query_terms_matched) && ((x.num_phrase_match < y.num_phrase_match)))))))); } // stream output for debugging purposes ostream &operator<< (ostream &outs, docresultclass &a) { outs << " d:" << a.docnum << " w:" << a.docweight << "\n"; return outs; } // many document results docresultsclass::docresultsclass () { clear (); } void docresultsclass::clear () { docset.erase(docset.begin(), docset.end()); docorder.erase(docorder.begin(), docorder.end()); } void docresultsclass::docnum_order() { docorder.erase(docorder.begin(), docorder.end()); docresultmap::iterator here = docset.begin(); docresultmap::iterator end = docset.end(); while (here != end) { docorder.push_back ((*here).first); here++; } } void docresultsclass::combine_and (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); // put the resulting set in tempresults docresultmap tempresults; docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) { (*found).second.combine ((*d_here).second); tempresults[(*found).first] = (*found).second; } d_here++; } // then copy it back to docset docset = tempresults; } void docresultsclass::combine_or (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) { (*found).second.combine ((*d_here).second); } else { docset[(*d_here).first] = (*d_here).second; } d_here++; } } void docresultsclass::combine_not (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) docset.erase (found); d_here++; } } docresultsclass &docresultsclass::operator=(const docresultsclass &d) { docset = d.docset; docorder = d.docorder; return *this; } // query results void queryresultsclass::clear () { docs_matched = 0; is_approx = Exact; postprocessed = false; docs.clear(); orgterms.erase(orgterms.begin(),orgterms.end()); terms.erase(terms.begin(),terms.end()); } queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) { docs_matched = q.docs_matched; is_approx = q.is_approx; postprocessed = q.postprocessed; docs = q.docs; terms = q.terms; termvariants = q.termvariants; return *this; } void queryresultsclass::sortuniqqueryterms() { termfreqclassarray tempterms = orgterms; text_tset seenterms; terms.erase(terms.begin(), terms.end()); // sort the terms to get the frequencies in ascending order sort (tempterms.begin(), tempterms.end()); // insert first occurance of each term (maximum) termfreqclassarray::reverse_iterator here = tempterms.rbegin(); termfreqclassarray::reverse_iterator end = tempterms.rend(); while (here != end) { if (seenterms.find((*here).termstr) == seenterms.end()) { // the termstemstr and utf8equivterms might be different for // different occurances of the term (*here).termstemstr.clear(); (*here).utf8equivterms.erase((*here).utf8equivterms.begin(), (*here).utf8equivterms.end()); terms.push_back(*here); seenterms.insert((*here).termstr); } here++; } // now re-sort in ascending order sort (terms.begin(), terms.end()); } // stream output for debugging purposes ostream &operator<< (ostream &outs, queryresultsclass &q) { outs << "*** queryresultsclass\n"; outs << "docs\n"; docresultmap::iterator docshere = q.docs.docset.begin(); docresultmap::iterator docsend = q.docs.docset.end(); while (docshere != docsend) { outs << (*docshere).second; docshere++; } outs << "orgterms\n"; termfreqclassarray::iterator orgtermshere = q.orgterms.begin(); termfreqclassarray::iterator orgtermsend = q.orgterms.end(); while (orgtermshere != orgtermsend) { outs << (*orgtermshere); orgtermshere++; } outs << "terms\n"; termfreqclassarray::iterator termshere = q.terms.begin(); termfreqclassarray::iterator termsend = q.terms.end(); while (termshere != termsend) { outs << (*termshere); termshere++; } outs << "\n"; return outs; }