/********************************************************************** * * queryinfo.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "queryinfo.h" // query parameters queryparamclass::queryparamclass () { clear (); } void queryparamclass::clear () { combinequery.clear(); collection.clear(); index.clear(); subcollection.clear(); language.clear(); level.clear(); querystring.clear(); search_type = 0; // 0 = boolean, 1 = ranked match_mode = 0; // 0 = some, 1 = all casefolding = 0; stemming = 0; accentfolding = 0; maxdocs = -1; // all maxnumeric = 4; // must default to the same value as mg_passes filterstring.clear(); sortfield.clear(); fuzziness.clear(); startresults = 1; // all endresults = 10; // all } queryparamclass &queryparamclass::operator=(const queryparamclass &q) { combinequery = q.combinequery; collection = q.collection; index = q.index; subcollection = q.subcollection; language = q.language; level = q.level; querystring = q.querystring; search_type = q.search_type; match_mode = q.match_mode; casefolding = q.casefolding; stemming = q.stemming; accentfolding = q.accentfolding; maxdocs = q.maxdocs; maxnumeric = q.maxnumeric; filterstring = q.filterstring; sortfield = q.sortfield; fuzziness = q.fuzziness; startresults = q.startresults; endresults = q.endresults; return *this; } bool operator==(const queryparamclass &x, const queryparamclass &y) { return ((x.combinequery == y.combinequery) && (x.collection == y.collection) && (x.index == y.index) && (x.subcollection == y.subcollection) && (x.language == y.language) && (x.level == y.level) && (x.querystring == y.querystring) && (x.search_type == y.search_type) && (x.match_mode == y.match_mode) && (x.casefolding == y.casefolding) && (x.stemming == y.stemming) && (x.accentfolding == y.accentfolding) && (x.maxdocs == y.maxdocs) && (x.maxnumeric == y.maxnumeric) && (x.filterstring == y.filterstring) && (x.sortfield == y.sortfield) && (x.fuzziness == y.fuzziness) && (x.startresults == y.startresults) && (x.startresults == y.startresults)); } bool operator!=(const queryparamclass &x, const queryparamclass &y) { return !(x == y); } ostream &operator<< (ostream &outs, queryparamclass &q) { outconvertclass text_t2ascii; outs << "*** queryparamclass\n"; outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n"; outs << text_t2ascii << " collection = \"" << q.collection << "\"\n"; outs << text_t2ascii << " index = \"" << q.index << "\"\n"; outs << text_t2ascii << " level = \"" << q.level << "\"\n"; outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n"; outs << text_t2ascii << " language = \"" << q.language << "\"\n"; outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n"; outs << " search_type = \"" << q.search_type << "\"\n"; outs << " match_mode = \"" << q.match_mode << "\"\n"; outs << " casefolding = \"" << q.casefolding << "\"\n"; outs << " stemming = \"" << q.stemming << "\"\n"; outs << " accentfolding = \"" << q.accentfolding << "\"\n"; outs << " maxdocs = \"" << q.maxdocs << "\"\n"; outs << " maxnumeric = \"" << q.maxnumeric << "\"\n"; outs << " filterstring = \"" << q.filterstring << "\"\n"; outs << " sortfield = \"" << q.sortfield << "\"\n"; outs << " fuzziness = \"" << q.fuzziness << "\"\n"; outs << " startresults = \"" << q.startresults << "\"\n"; outs << " endresults = \"" << q.endresults << "\"\n"; outs << "\n"; return outs; } // term frequencies termfreqclass::termfreqclass () { clear(); } void termfreqclass::clear() { termstr.clear(); termstemstr.clear(); utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end()); termfreq = 0; } termfreqclass &termfreqclass::operator=(const termfreqclass &t) { termstr = t.termstr; termstemstr = t.termstemstr; utf8equivterms = t.utf8equivterms; termfreq = t.termfreq; return *this; } bool operator==(const termfreqclass &x, const termfreqclass &y) { return ((x.termstr == y.termstr) && (x.termstemstr == y.termstemstr) && (x.termfreq == y.termfreq)); } bool operator!=(const termfreqclass &x, const termfreqclass &y) { return !(x == y); } // ordered by termfreq and then by termstr bool operator<(const termfreqclass &x, const termfreqclass &y) { return ((x.termfreq < y.termfreq) || ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) || ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr))); } bool operator>(const termfreqclass &x, const termfreqclass &y) { return ((x.termfreq > y.termfreq) || ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) || ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr))); } // stream output for debugging purposes ostream &operator<< (ostream &outs, termfreqclass &t) { outconvertclass text_t2ascii; outs << text_t2ascii << " t:\"" << t.termstr << "\""; outs << text_t2ascii << " s:\"" << t.termstemstr << "\""; outs << " f:" << t.termfreq << "\n"; return outs; } // one query result docresultclass::docresultclass() { clear (); } void docresultclass::clear () { docnum=-1; docweight=0.0; num_query_terms_matched=0; num_phrase_match=0; } // merges two result classes relating to a single docnum docresultclass &docresultclass::combine(const docresultclass &d) { docweight += d.docweight; // budget! num_query_terms_matched += d.num_query_terms_matched; num_phrase_match += d.num_phrase_match; return *this; } docresultclass &docresultclass::operator=(const docresultclass &d) { docnum = d.docnum; docweight = d.docweight; num_query_terms_matched = d.num_query_terms_matched; num_phrase_match = d.num_phrase_match; return *this; } bool operator==(const docresultclass &x, const docresultclass &y) { return ((x.docnum == y.docnum) && (x.docweight == y.docweight) && (x.num_query_terms_matched == y.num_query_terms_matched) && (x.num_phrase_match == y.num_phrase_match)); } bool operator<(const docresultclass &x, const docresultclass &y) { return ((x.docnum < y.docnum) || ((x.docnum == y.docnum) && ((x.docweight < y.docweight) || ((x.docweight == y.docweight) && ((x.num_query_terms_matched < y.num_query_terms_matched) || ((x.num_query_terms_matched == y.num_query_terms_matched) && ((x.num_phrase_match < y.num_phrase_match)))))))); } // stream output for debugging purposes ostream &operator<< (ostream &outs, docresultclass &a) { outs << " d:" << a.docnum << " w:" << a.docweight << "\n"; return outs; } // many document results docresultsclass::docresultsclass () { clear (); } void docresultsclass::clear () { docset.erase(docset.begin(), docset.end()); docorder.erase(docorder.begin(), docorder.end()); } void docresultsclass::docnum_order() { docorder.erase(docorder.begin(), docorder.end()); docresultmap::iterator here = docset.begin(); docresultmap::iterator end = docset.end(); while (here != end) { docorder.push_back ((*here).first); ++here; } } void docresultsclass::combine_and (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); // put the resulting set in tempresults docresultmap tempresults; docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) { (*found).second.combine ((*d_here).second); tempresults[(*found).first] = (*found).second; } ++d_here; } // then copy it back to docset docset = tempresults; } void docresultsclass::combine_or (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) { (*found).second.combine ((*d_here).second); } else { docset[(*d_here).first] = (*d_here).second; } ++d_here; } } void docresultsclass::combine_not (const docresultsclass &d) { docorder.erase(docorder.begin(), docorder.end()); docresultmap::const_iterator d_here = d.docset.begin(); docresultmap::const_iterator d_end = d.docset.end(); docresultmap::iterator found = docset.end(); while (d_here != d_end) { found = docset.find((*d_here).first); if (found != docset.end()) docset.erase (found); ++d_here; } } docresultsclass &docresultsclass::operator=(const docresultsclass &d) { docset = d.docset; docorder = d.docorder; return *this; } // query results void queryresultsclass::clear () { error_message = g_EmptyText; docs_matched = 0; is_approx = Exact; syntax_error = false; postprocessed = false; docs.clear(); orgterms.erase(orgterms.begin(),orgterms.end()); terms.erase(terms.begin(),terms.end()); } queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) { error_message = q.error_message; docs_matched = q.docs_matched; is_approx = q.is_approx; syntax_error = q.syntax_error; postprocessed = q.postprocessed; docs = q.docs; terms = q.terms; termvariants = q.termvariants; return *this; } void queryresultsclass::sortuniqqueryterms() { termfreqclassarray tempterms = orgterms; text_tset seenterms; terms.erase(terms.begin(), terms.end()); // sort the terms to get the frequencies in ascending order sort (tempterms.begin(), tempterms.end()); // insert first occurance of each term (maximum) termfreqclassarray::reverse_iterator here = tempterms.rbegin(); termfreqclassarray::reverse_iterator end = tempterms.rend(); while (here != end) { if (seenterms.find((*here).termstr) == seenterms.end()) { // the termstemstr and utf8equivterms might be different for // different occurances of the term (*here).termstemstr.clear(); (*here).utf8equivterms.erase((*here).utf8equivterms.begin(), (*here).utf8equivterms.end()); terms.push_back(*here); seenterms.insert((*here).termstr); } ++here; } // now re-sort in ascending order sort (terms.begin(), terms.end()); } // stream output for debugging purposes ostream &operator<< (ostream &outs, queryresultsclass &q) { outs << "*** queryresultsclass\n"; outs << "docs\n"; docresultmap::iterator docshere = q.docs.docset.begin(); docresultmap::iterator docsend = q.docs.docset.end(); while (docshere != docsend) { outs << (*docshere).second; ++docshere; } outs << "orgterms\n"; termfreqclassarray::iterator orgtermshere = q.orgterms.begin(); termfreqclassarray::iterator orgtermsend = q.orgterms.end(); while (orgtermshere != orgtermsend) { outs << (*orgtermshere); ++orgtermshere; } outs << "terms\n"; termfreqclassarray::iterator termshere = q.terms.begin(); termfreqclassarray::iterator termsend = q.terms.end(); while (termshere != termsend) { outs << (*termshere); ++termshere; } outs << "\n"; return outs; }