/**********************************************************************
 *
 * queryinfo.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: queryinfo.cpp 534 1999-09-07 04:57:43Z sjboddie $
 *
 *********************************************************************/

/*
   $Log$
   Revision 1.12  1999/09/07 04:57:24  sjboddie
   added gpl notice

   Revision 1.11  1999/08/31 22:47:57  rjmcnab
   Added MatchMode.

   Revision 1.10  1999/07/16 03:42:23  sjboddie
   changed isApprox

   Revision 1.9  1999/07/16 00:14:01  sjboddie
   added termfreqclassarray type

   Revision 1.8  1999/07/09 02:19:44  rjmcnab
   Fixed a couple of compiler conflicts

   Revision 1.7  1999/07/07 06:19:47  rjmcnab
   Added ability to combine two or more independant queries.

   Revision 1.6  1999/07/01 09:29:21  rjmcnab
   Changes for better reporting of number documents which match a query. Changes
   should still work as before with older versions of mg.

   Revision 1.5  1999/07/01 03:56:17  rjmcnab
   Added a set of utf8 encoded equivalent terms of a query term. I also
   added a flag for handling post-processing of the query.

   Revision 1.4  1999/06/30 04:04:13  rjmcnab
   made stemming functions available from mgsearch and made the stems
   for the query terms available in queryinfo

   Revision 1.3  1999/06/29 22:06:23  rjmcnab
   Added a couple of fields to queryinfo to handle a special version
   of mg.

   Revision 1.2  1999/01/12 01:51:02  rjmcnab

   Standard header.

   Revision 1.1  1999/01/08 09:02:18  rjmcnab

   Moved from src/library.

 */


#include "queryinfo.h"


// query parameters

queryparamclass::queryparamclass () {
  clear ();
}

void queryparamclass::clear () {
  combinequery.clear();
  collection.clear();
  index.clear();
  subcollection.clear();
  language.clear();
  querystring.clear();
  search_type = 0; // 0 = boolean, 1 = ranked
  match_mode = 0; // 0 = some, 1 = all
  casefolding = 0;
  stemming = 0;
  maxdocs = -1;    // all
}


queryparamclass &queryparamclass::operator=(const queryparamclass &q) {
  combinequery = q.combinequery;
  collection = q.collection;
  index = q.index;
  subcollection = q.subcollection;
  language = q.language;
  querystring = q.querystring;
  search_type = q.search_type;
  match_mode = q.match_mode;
  casefolding = q.casefolding;
  stemming = q.stemming;
  maxdocs = q.maxdocs;

  return *this;
}


bool operator==(const queryparamclass &x, const queryparamclass &y) {
  return ((x.combinequery == y.combinequery) &&
	  (x.collection == y.collection) &&
	  (x.index == y.index) &&
	  (x.subcollection == y.subcollection) &&
	  (x.language == y.language) &&
	  (x.querystring == y.querystring) &&
	  (x.search_type == y.search_type) &&
	  (x.match_mode == y.match_mode) &&
	  (x.casefolding == y.casefolding) &&
	  (x.stemming == y.stemming) &&
	  (x.maxdocs == y.maxdocs));
}

bool operator!=(const queryparamclass &x, const queryparamclass &y) {
  return !(x == y);
}


ostream &operator<< (ostream &outs, queryparamclass &q) {
  outconvertclass text_t2ascii;

  outs << "*** queryparamclass\n";
  outs << text_t2ascii << " combinequery = \"" << q.combinequery << "\"\n";
  outs << text_t2ascii << " collection = \"" << q.collection << "\"\n";
  outs << text_t2ascii << " index = \"" << q.index << "\"\n";
  outs << text_t2ascii << " subcollection = \"" << q.subcollection << "\"\n";
  outs << text_t2ascii << " language = \"" << q.language << "\"\n";
  outs << text_t2ascii << " querystring = \"" << q.querystring << "\"\n";
  outs << " search_type = \"" << q.search_type << "\"\n";
  outs << " match_mode = \"" << q.match_mode << "\"\n";
  outs << " casefolding = \"" << q.casefolding << "\"\n";
  outs << " stemming = \"" << q.stemming << "\"\n";
  outs << " maxdocs = \"" << q.maxdocs << "\"\n";
  outs << "\n";

  return outs;
}


// term frequencies

termfreqclass::termfreqclass () {
  clear();
}

void termfreqclass::clear() {
  termstr.clear();
  termstemstr.clear();
  utf8equivterms.erase(utf8equivterms.begin(), utf8equivterms.end());
  termfreq = 0;
}

termfreqclass &termfreqclass::operator=(const termfreqclass &t)  {
  termstr = t.termstr;
  termstemstr = t.termstemstr;
  utf8equivterms = t.utf8equivterms;
  termfreq = t.termfreq;

  return *this;
}
	
bool operator==(const termfreqclass &x, const termfreqclass &y) {
  return ((x.termstr == y.termstr) &&
	  (x.termstemstr == y.termstemstr) &&
	  (x.termfreq == y.termfreq));
}

bool operator!=(const termfreqclass &x, const termfreqclass &y) {
  return !(x == y);
}

// ordered by termfreq and then by termstr
bool operator<(const termfreqclass &x, const termfreqclass &y) {
  return ((x.termfreq < y.termfreq) ||
	  ((x.termfreq == y.termfreq) && (x.termstemstr < y.termstemstr)) ||
	  ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr < y.termstr)));
}

bool operator>(const termfreqclass &x, const termfreqclass &y) {
  return ((x.termfreq > y.termfreq) ||
	  ((x.termfreq == y.termfreq) && (x.termstemstr > y.termstemstr)) ||
	  ((x.termfreq == y.termfreq) && (x.termstemstr == y.termstemstr) && (x.termstr > y.termstr)));
}

// stream output for debugging purposes
ostream &operator<< (ostream &outs, termfreqclass &t) {
  outconvertclass text_t2ascii;

  outs << text_t2ascii << " t:\"" << t.termstr << "\"";
  outs << text_t2ascii << " s:\"" << t.termstemstr << "\"";
  outs << " f:" << t.termfreq << "\n";

  return outs;
}


// one query result

docresultclass::docresultclass() {
  clear ();
}

void docresultclass::clear () {
  docnum=-1;
  docweight=0.0;
  num_query_terms_matched=0;
  num_phrase_match=0;
}

// merges two result classes relating to a single docnum
docresultclass &docresultclass::combine(const docresultclass &d) {
  docweight += d.docweight; // budget!
  num_query_terms_matched += d.num_query_terms_matched;
  num_phrase_match += d.num_phrase_match;

  return *this;
}

docresultclass &docresultclass::operator=(const docresultclass &d) {
  docnum = d.docnum;
  docweight = d.docweight;
  num_query_terms_matched = d.num_query_terms_matched;
  num_phrase_match = d.num_phrase_match;

  return *this;
}


bool operator==(const docresultclass &x, const docresultclass &y) {
  return ((x.docnum == y.docnum) && (x.docweight == y.docweight) &&
	  (x.num_query_terms_matched == y.num_query_terms_matched) &&
	  (x.num_phrase_match == y.num_phrase_match));
}

bool operator<(const docresultclass &x, const docresultclass &y) {
  return ((x.docnum < y.docnum) ||
	  ((x.docnum == y.docnum) &&
	   ((x.docweight < y.docweight) ||
	    ((x.docweight == y.docweight) &&
	     ((x.num_query_terms_matched < y.num_query_terms_matched) ||
	      ((x.num_query_terms_matched == y.num_query_terms_matched) &&
	       ((x.num_phrase_match < y.num_phrase_match))))))));
}


// stream output for debugging purposes
ostream &operator<< (ostream &outs, docresultclass &a) {
  outs << " d:" << a.docnum << " w:" << a.docweight << "\n";
  return outs;
}


// many document results

docresultsclass::docresultsclass () {
  clear ();
}

void docresultsclass::clear () {
  docset.erase(docset.begin(), docset.end());
  docorder.erase(docorder.begin(), docorder.end());
}

void docresultsclass::docnum_order() {
  docorder.erase(docorder.begin(), docorder.end());
  
  docresultmap::iterator here = docset.begin();
  docresultmap::iterator end = docset.end();
  while (here != end) {
    docorder.push_back ((*here).first);
    here++;
  }
}

void docresultsclass::combine_and (const docresultsclass &d) {
  docorder.erase(docorder.begin(), docorder.end());

  // put the resulting set in tempresults
  docresultmap tempresults;
  
  docresultmap::const_iterator d_here = d.docset.begin();
  docresultmap::const_iterator d_end = d.docset.end();
  docresultmap::iterator found = docset.end();
  while (d_here != d_end) {
    found = docset.find((*d_here).first);
    if (found != docset.end()) {
      (*found).second.combine ((*d_here).second);
      tempresults[(*found).first] = (*found).second;
    }
    d_here++;
  }

  // then copy it back to docset
  docset = tempresults;
}

void docresultsclass::combine_or (const docresultsclass &d) {
  docorder.erase(docorder.begin(), docorder.end());
    
  docresultmap::const_iterator d_here = d.docset.begin();
  docresultmap::const_iterator d_end = d.docset.end();
  docresultmap::iterator found = docset.end();
  while (d_here != d_end) {
    found = docset.find((*d_here).first);
    if (found != docset.end()) {
      (*found).second.combine ((*d_here).second);
    } else {
      docset[(*d_here).first] = (*d_here).second;
    }
    d_here++;
  }
}

void docresultsclass::combine_not (const docresultsclass &d) {
  docorder.erase(docorder.begin(), docorder.end());

 docresultmap::const_iterator d_here = d.docset.begin();
 docresultmap::const_iterator d_end = d.docset.end();
 docresultmap::iterator found = docset.end();
 while (d_here != d_end) {
   found = docset.find((*d_here).first);
   if (found != docset.end()) docset.erase (found);
   d_here++;
 }
}

docresultsclass &docresultsclass::operator=(const docresultsclass &d) {
  docset = d.docset;
  docorder = d.docorder;

  return *this;
}


// query results

void queryresultsclass::clear () {
  docs_matched = 0;
  is_approx = Exact;

  postprocessed = false;

  docs.clear();
  orgterms.erase(orgterms.begin(),orgterms.end());
  terms.erase(terms.begin(),terms.end());
}

queryresultsclass &queryresultsclass::operator=(const queryresultsclass &q) {
  docs_matched = q.docs_matched;
  is_approx = q.is_approx;

  postprocessed = q.postprocessed;
  
  docs = q.docs;
  terms = q.terms;
  termvariants = q.termvariants;

  return *this;
}

void queryresultsclass::sortuniqqueryterms() {
  termfreqclassarray tempterms = orgterms;
  text_tset seenterms;
  terms.erase(terms.begin(), terms.end());

  // sort the terms to get the frequencies in ascending order
  sort (tempterms.begin(), tempterms.end());

  // insert first occurance of each term (maximum)
  termfreqclassarray::reverse_iterator here = tempterms.rbegin();
  termfreqclassarray::reverse_iterator end = tempterms.rend();
  while (here != end) {
    if (seenterms.find((*here).termstr) == seenterms.end()) {
      // the termstemstr and utf8equivterms might be different for
      // different occurances of the term
      (*here).termstemstr.clear();
      (*here).utf8equivterms.erase((*here).utf8equivterms.begin(),
				   (*here).utf8equivterms.end());
      terms.push_back(*here);
      seenterms.insert((*here).termstr);
    }
    here++;
  }

  // now re-sort in ascending order
  sort (terms.begin(), terms.end());
}


// stream output for debugging purposes
ostream &operator<< (ostream &outs, queryresultsclass &q) {
  outs << "*** queryresultsclass\n";
  outs << "docs\n";

  docresultmap::iterator docshere = q.docs.docset.begin();
  docresultmap::iterator docsend = q.docs.docset.end();
  while (docshere != docsend) {
    outs << (*docshere).second;
    docshere++;
  }

  outs << "orgterms\n";
  termfreqclassarray::iterator orgtermshere = q.orgterms.begin();
  termfreqclassarray::iterator orgtermsend = q.orgterms.end();
  while (orgtermshere != orgtermsend) {
    outs << (*orgtermshere);
    orgtermshere++;
  }

  outs << "terms\n";
  termfreqclassarray::iterator termshere = q.terms.begin();
  termfreqclassarray::iterator termsend = q.terms.end();
  while (termshere != termsend) {
    outs << (*termshere);
    termshere++;
  }

  outs << "\n";

  return outs;
}