Ignore:
Timestamp:
1999-07-07T18:19:47+12:00 (25 years ago)
Author:
rjmcnab
Message:

Added ability to combine two or more independant queries.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/colservr/queryinfo.cpp

    r334 r351  
    1212/*
    1313   $Log$
     14   Revision 1.7  1999/07/07 06:19:47  rjmcnab
     15   Added ability to combine two or more independant queries.
     16
    1417   Revision 1.6  1999/07/01 09:29:21  rjmcnab
    1518   Changes for better reporting of number documents which match a query. Changes
     
    4447// query parameters
    4548
    46 queryparamclass &queryparamclass::operator=(const queryparamclass &q)
    47 {
     49queryparamclass::queryparamclass () {
     50  clear ();
     51}
     52
     53void queryparamclass::clear () {
     54  combinequery.clear();
     55  collection.clear();
     56  index.clear();
     57  subcollection.clear();
     58  language.clear();
     59  querystring.clear();
     60  search_type = 0; // 0 = boolean, 1 = ranked
     61  casefolding = 0;
     62  stemming = 0;
     63  maxdocs = -1;    // all
     64}
     65
     66
     67queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
     68  combinequery = q.combinequery;
    4869  collection = q.collection;
    49   search_index = q.search_index;
     70  index = q.index;
     71  subcollection = q.subcollection;
     72  language = q.language;
    5073  querystring = q.querystring;
    5174  search_type = q.search_type;
     
    5881
    5982
    60 bool operator==(const queryparamclass &x, const queryparamclass &y)
    61 {
    62   return ((x.collection == y.collection) &&
    63       (x.search_index == y.search_index) &&
     83bool operator==(const queryparamclass &x, const queryparamclass &y) {
     84  return ((x.combinequery == y.combinequery) &&
     85      (x.collection == y.collection) &&
     86      (x.index == y.index) &&
     87      (x.subcollection == y.subcollection) &&
     88      (x.language == y.language) &&
    6489      (x.querystring == y.querystring) &&
    6590      (x.search_type == y.search_type) &&
     
    6994}
    7095
    71 bool operator!=(const queryparamclass &x, const queryparamclass &y)
    72 {
     96bool operator!=(const queryparamclass &x, const queryparamclass &y) {
    7397  return !(x == y);
    7498}
    7599
    76100
    77 ostream &operator<< (ostream &outs, queryparamclass &q)
    78 {
     101ostream &operator<< (ostream &outs, queryparamclass &q) {
    79102  outconvertclass text_t2ascii;
    80103
    81104  outs << "*** queryparamclass\n";
     105  outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
    82106  outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
    83   outs << text_t2ascii << " search_index = \"" << q.search_index << "\"\n";
     107  outs << text_t2ascii << " index = \"" << q.index << "\"\n";
     108  outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
     109  outs << text_t2ascii << " language = \"" << q.language << "\"\n";
    84110  outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
    85111  outs << " search_type = \"" << q.search_type << "\"\n";
     
    97123// term frequencies
    98124
    99 termfreqclass &termfreqclass::operator=(const termfreqclass &t)
    100 {
     125termfreqclass::termfreqclass () {
     126  clear();
     127}
     128
     129void termfreqclass::clear() {
     130  termstr.clear();
     131  termstemstr.clear();
     132  utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
     133  termfreq = 0;
     134}
     135
     136termfreqclass &termfreqclass::operator=(const termfreqclass &t)  {
    101137  termstr = t.termstr;
    102138  termstemstr = t.termstemstr;
     
    107143}
    108144   
    109 bool operator==(const termfreqclass &x, const termfreqclass &y)
    110 {
     145bool operator==(const termfreqclass &x, const termfreqclass &y) {
    111146  return ((x.termstr == y.termstr) &&
    112147      (x.termstemstr == y.termstemstr) &&
     
    114149}
    115150
    116 bool operator!=(const termfreqclass &x, const termfreqclass &y)
    117 {
     151bool operator!=(const termfreqclass &x, const termfreqclass &y) {
    118152  return !(x == y);
    119153}
    120154
    121155// ordered by termfreq and then by termstr
    122 bool operator<(const termfreqclass &x, const termfreqclass &y)
    123 {
     156bool operator<(const termfreqclass &x, const termfreqclass &y) {
    124157  return ((x.termfreq < y.termfreq) ||
    125158      ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
     
    127160}
    128161
    129 bool operator>(const termfreqclass &x, const termfreqclass &y)
    130 {
     162bool operator>(const termfreqclass &x, const termfreqclass &y) {
    131163  return ((x.termfreq > y.termfreq) ||
    132164      ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
     
    135167
    136168// stream output for debugging purposes
    137 ostream &operator<< (ostream &outs, termfreqclass &t)
    138 {
     169ostream &operator<< (ostream &outs, termfreqclass &t) {
    139170  outconvertclass text_t2ascii;
    140171
     
    150181// one query result
    151182
     183docresultclass::docresultclass() {
     184  clear ();
     185}
     186
     187void docresultclass::clear () {
     188  docnum=-1;
     189  docweight=0.0;
     190  num_query_terms_matched=0;
     191  num_phrase_match=0;
     192}
     193
     194// merges two result classes relating to a single docnum
     195docresultclass &docresultclass::combine(const docresultclass &d) {
     196  docweight += d.docweight; // budget!
     197  num_query_terms_matched += d.num_query_terms_matched;
     198  num_phrase_match += d.num_phrase_match;
     199
     200  return *this;
     201}
     202
     203docresultclass &docresultclass::operator=(const docresultclass &d) {
     204  docnum = d.docnum;
     205  docweight = d.docweight;
     206  num_query_terms_matched = d.num_query_terms_matched;
     207  num_phrase_match = d.num_phrase_match;
     208
     209  return *this;
     210}
     211
     212
    152213// stream output for debugging purposes
    153 ostream &operator<< (ostream &outs, docresultclass &a)
    154 {
     214ostream &operator<< (ostream &outs, docresultclass &a) {
    155215  outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
    156216  return outs;
    157217}
     218
     219
     220
     221// many document results
     222
     223docresultsclass::docresultsclass () {
     224  clear ();
     225}
     226
     227void docresultsclass::clear () {
     228  docset.erase(docset.begin(), docset.end());
     229  docorder.erase(docorder.begin(), docorder.end());
     230}
     231
     232void docresultsclass::docnum_order() {
     233  docorder.erase(docorder.begin(), docorder.end());
     234 
     235  docresultmap::iterator here = docset.begin();
     236  docresultmap::iterator end = docset.end();
     237  while (here != end) {
     238    docorder.push_back ((*here).first);
     239    here++;
     240  }
     241}
     242
     243void docresultsclass::combine_and (const docresultsclass &d) {
     244  docorder.erase(docorder.begin(), docorder.end());
     245
     246  // put the resulting set in tempresults
     247  docresultmap tempresults;
     248 
     249  docresultmap::const_iterator d_here = d.docset.begin();
     250  docresultmap::const_iterator d_end = d.docset.end();
     251  docresultmap::iterator found = docset.end();
     252  while (d_here != d_end) {
     253    found = docset.find((*d_here).first);
     254    if (found != docset.end()) {
     255      (*found).second.combine ((*d_here).second);
     256      tempresults[(*found).first] = (*found).second;
     257    }
     258    d_here++;
     259  }
     260
     261  // then copy it back to docset
     262  docset = tempresults;
     263}
     264
     265void docresultsclass::combine_or (const docresultsclass &d) {
     266  docorder.erase(docorder.begin(), docorder.end());
     267   
     268  docresultmap::const_iterator d_here = d.docset.begin();
     269  docresultmap::const_iterator d_end = d.docset.end();
     270  docresultmap::iterator found = docset.end();
     271  while (d_here != d_end) {
     272    found = docset.find((*d_here).first);
     273    if (found != docset.end()) {
     274      (*found).second.combine ((*d_here).second);
     275    } else {
     276      docset[(*d_here).first] = (*d_here).second;
     277    }
     278    d_here++;
     279  }
     280}
     281
     282void docresultsclass::combine_not (const docresultsclass &d) {
     283  docorder.erase(docorder.begin(), docorder.end());
     284
     285 docresultmap::const_iterator d_here = d.docset.begin();
     286 docresultmap::const_iterator d_end = d.docset.end();
     287 docresultmap::iterator found = docset.end();
     288 while (d_here != d_end) {
     289   found = docset.find((*d_here).first);
     290   if (found != docset.end()) docset.erase (found);
     291   d_here++;
     292 }
     293}
     294
     295docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
     296  docset = d.docset;
     297  docorder = d.docorder;
     298
     299  return *this;
     300}
     301
    158302
    159303
     
    167311  postprocessed = false;
    168312
    169   docs.erase(docs.begin(),docs.end());
     313  docs.clear();
    170314  orgterms.erase(orgterms.begin(),orgterms.end());
    171315  terms.erase(terms.begin(),terms.end());
     
    186330
    187331void queryresultsclass::sortuniqqueryterms() {
    188   terms = orgterms;
    189 
    190   // sort the terms
     332  vector<termfreqclass> tempterms = orgterms;
     333  text_tset seenterms;
     334  terms.clear();
     335
     336  // sort the terms to get the frequencies in ascending order
     337  sort (tempterms.begin(), tempterms.end());
     338
     339  // insert first occurance of each term (maximum)
     340  vector<termfreqclass>::reverse_iterator here = tempterms.rbegin();
     341  vector<termfreqclass>::reverse_iterator end = tempterms.rend();
     342  while (here != end) {
     343    if (seenterms.find((*here).termstr) == seenterms.end()) {
     344      // the termstemstr and utf8equivterms might be different for
     345      // different occurances of the term
     346      (*here).termstemstr.clear();
     347      (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
     348                   (*here).utf8equivterms.end());
     349      terms.push_back(*here);
     350      seenterms.insert((*here).termstr);
     351    }
     352    here++;
     353  }
     354
     355  // now re-sort in ascending order
    191356  sort (terms.begin(), terms.end());
    192 
    193   // and then unique them
    194   vector<termfreqclass>::iterator new_end = unique (terms.begin(), terms.end());
    195   terms.erase(new_end, terms.end());
    196357}
    197358
    198359
    199360// stream output for debugging purposes
    200 ostream &operator<< (ostream &outs, queryresultsclass &q)
    201 {
     361ostream &operator<< (ostream &outs, queryresultsclass &q) {
    202362  outs << "*** queryresultsclass\n";
    203363  outs << "docs\n";
    204364
    205   vector<docresultclass>::iterator docshere = q.docs.begin();
    206   vector<docresultclass>::iterator docsend = q.docs.end();
     365  docresultmap::iterator docshere = q.docs.docset.begin();
     366  docresultmap::iterator docsend = q.docs.docset.end();
    207367  while (docshere != docsend) {
    208     outs << (*docshere);
     368    outs << (*docshere).second;
    209369    docshere++;
    210370  }
Note: See TracChangeset for help on using the changeset viewer.