/********************************************************************** * * queryaction.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "queryaction.h" #include "querytools.h" #include "formattools.h" #include "cgiutils.h" #include "OIDtools.h" #include "fileutil.h" #include "text_t.h" #include "historydb.h" #include "htmlutils.h" // for html_safe in do_action #include "gsdltools.h" #include "phrases.h" // for get_phrases #include // for strtol #include queryaction::queryaction () : basequeryaction() { num_phrases = 0; cgiarginfo arg_ainfo; // this action uses cgi variable "a" arg_ainfo.shortname = "a"; arg_ainfo.longname = "action"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "q"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ct" - 0 = mg, 1 = mgpp, 2=lucene arg_ainfo.shortname = "ct"; arg_ainfo.longname = "collection type"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "b" - 0 = simple, 1 = advanced arg_ainfo.shortname = "b"; arg_ainfo.longname = "query mode"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "h" arg_ainfo.shortname = "h"; arg_ainfo.longname = "main index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "h2" arg_ainfo.shortname = "h2"; arg_ainfo.longname = "main index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "j" arg_ainfo.shortname = "j"; arg_ainfo.longname = "sub collection index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "j2" arg_ainfo.shortname = "j2"; arg_ainfo.longname = "sub collection index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "n" arg_ainfo.shortname = "n"; arg_ainfo.longname = "language index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "n2" arg_ainfo.shortname = "n2"; arg_ainfo.longname = "language index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "t" - 1 = ranked 0 = boolean arg_ainfo.shortname = "t"; arg_ainfo.longname = "search type"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "k" arg_ainfo.shortname = "k"; arg_ainfo.longname = "casefolding"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ks" arg_ainfo.shortname = "ks"; arg_ainfo.longname = "casefolding support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "s" arg_ainfo.shortname = "s"; arg_ainfo.longname = "stemming"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ss" arg_ainfo.shortname = "ss"; arg_ainfo.longname = "stemming support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "af" arg_ainfo.shortname = "af"; arg_ainfo.longname = "accentfolding"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "afs" arg_ainfo.shortname = "afs"; arg_ainfo.longname = "accentfolding support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ccs" arg_ainfo.shortname = "ccs"; arg_ainfo.longname = "cross collection searching"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ccp" arg_ainfo.shortname = "ccp"; arg_ainfo.longname = "cross collection page"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "g" - new arg for granularity, for mgpp collections arg_ainfo.shortname = "g"; arg_ainfo.longname = "granularity"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ds" - start date arg_ainfo.shortname = "ds"; arg_ainfo.longname = "start date"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "de" - end date arg_ainfo.shortname = "de"; arg_ainfo.longname = "end date"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "dsbc" - whether or not start date is prechristian arg_ainfo.shortname = "dsbc"; arg_ainfo.longname = "start date bc"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "debc" - whether or not end date is prechristian arg_ainfo.shortname = "debc"; arg_ainfo.longname = "end date bc"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qt" - 0 = text, 1 = form arg_ainfo.shortname = "qt"; arg_ainfo.longname = "query type"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qto" - 1 = text only, 2 = form only, 3 = text and form arg_ainfo.shortname = "qto"; arg_ainfo.longname = "query type options"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qb" - 0 = regular, 1 = large arg_ainfo.shortname = "qb"; arg_ainfo.longname = "query box type"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "fqs" - the list of stemming options in the form query // - a comma separated list arg_ainfo.shortname = "fqs"; arg_ainfo.longname = "form query stems"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "fqk" - the list of casefolding options in the form query // - a comma separated list arg_ainfo.shortname = "fqk"; arg_ainfo.longname = "form query casefolds"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "cc" arg_ainfo.shortname = "cc"; arg_ainfo.longname = "collections to search"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // **** // should this even be here??? // seems to be mixed up between "sf" and "sqlsf" // "sf" - Sort field. Set to field to be used for sorting search reult // set (only implemented for lucene collections at present). arg_ainfo.shortname = "sqlsf"; arg_ainfo.longname = "sql sort field"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); } queryaction::~queryaction () { } void queryaction::configure (const text_t &key, const text_tarray &cfgline) { basequeryaction::configure (key, cfgline); } bool queryaction::init (ostream &logout) { return basequeryaction::init (logout); } bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, recptprotolistclass* protos, ostream &logout) { // check t argument int arg_t = args.getintarg("t"); if (arg_t != 0 && arg_t != 1) { logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n"; cgiarginfo *tinfo = argsinfo.getarginfo ("t"); if (tinfo != NULL) args["t"] = tinfo->argdefault; } // check k argument int arg_k = args.getintarg("k"); if (arg_k != 0 && arg_k != 1) { logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n"; cgiarginfo *kinfo = argsinfo.getarginfo ("k"); if (kinfo != NULL) args["k"] = kinfo->argdefault; } // check s argument int arg_s = args.getintarg("s"); if (arg_s != 0 && arg_s != 1) { logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n"; cgiarginfo *sinfo = argsinfo.getarginfo ("s"); if (sinfo != NULL) args["s"] = sinfo->argdefault; } // check ct argument int arg_ct = args.getintarg("ct"); if (arg_ct < 0 || arg_ct > 2) { logout << "Warning: \"ct\" argument out of range (" << arg_ct << ")\n"; cgiarginfo *ctinfo = argsinfo.getarginfo ("ct"); if (ctinfo != NULL) args["ct"] = ctinfo->argdefault; } // check qt argument int arg_qt = args.getintarg("qt"); if (arg_qt<0 || arg_qt>2) { logout << "Warning: \"qt\" argument out of range (" << arg_qt << ")\n"; cgiarginfo *qtinfo = argsinfo.getarginfo ("qt"); if (qtinfo != NULL) args["qt"] = qtinfo->argdefault; } // check qb argument int arg_qb = args.getintarg("qb"); if (arg_qb !=0 && arg_qb !=1) { logout << "Warning: \"qb\" argument out of range (" << arg_qb << ")\n"; cgiarginfo *qbinfo = argsinfo.getarginfo ("qb"); if (qbinfo != NULL) args["qb"] = qbinfo->argdefault; } // check fqa argument int arg_fqa = args.getintarg("fqa"); if (arg_fqa !=0 && arg_fqa !=1) { logout << "Warning: \"fqa\" argument out of range (" << arg_fqa << ")\n"; cgiarginfo *fqainfo = argsinfo.getarginfo ("fqa"); if (fqainfo != NULL) args["fqa"] = fqainfo->argdefault; } // check fqn argument int arg_fqn = args.getintarg("fqn"); if (arg_fqn < -1) { logout << "Warning: \"fqn\" argument less than -1 (" << arg_fqn << ")\n"; cgiarginfo *fqninfo = argsinfo.getarginfo ("fqn"); if (fqninfo != NULL) args["fqn"] = fqninfo->argdefault; } return basequeryaction::check_cgiargs(argsinfo,args,protos,logout); } void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass * protos, ostream &logout) { basequeryaction::define_internal_macros(disp,args,protos,logout); define_query_interface(disp, args, protos, logout); } void queryaction::define_query_interface(displayclass &disp, cgiargsclass &args, recptprotolistclass * protos, ostream &logout){ text_t collection = args["c"]; //check that the protocol is alive recptproto* colproto = protos->getrecptproto (collection, logout); if(colproto == NULL) { logout << "ERROR: Null collection protocol trying to query" << collection.getcstr() << "\n"; return; } //check the collection is responding/in place ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(colproto, collection, logout); if(colinfo == NULL){ logout << "ERROR: Null returned for get_collectinfo_ptr on " << collection.getcstr() << "in queryaction::define_query_interface\n"; return; } text_tmap::iterator check = colinfo->format.find("QueryInterface"); if(check != colinfo->format.end()){ if((*check).second=="DateSearch"){ text_t current = "_datesearch_"; disp.setmacro("optdatesearch","query",current); } } } void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // define_external_macros sets the following macros: // some or all of these may not be required to be set // _hselection_, _h2selection_ the selection box for the main part of the index // _jselection_, _j2selection_ the selection box for the subcollection part of the index // _nselection_, _n2selection_ the selection box for the language part of the index // _cq2selection the selection box for combining two queries // _gselection_, the selection box forlevels (mgpp) // _fqfselection_, the selection box for index/fields (mgpp) // can't do anything if collectproto is null (i.e. no collection was specified) recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (collectproto == NULL) return; ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(collectproto, args["c"], logout); set_query_type_args(colinfo, args); set_stem_index_args(colinfo, args); comerror_t err; InfoFilterOptionsResponse_t response; InfoFilterOptionsRequest_t request; request.filterName = "QueryFilter"; collectproto->get_filteroptions (args["c"], request, response, err, logout); if (err == noError) { FilterOption_tmap::const_iterator it; FilterOption_tmap::const_iterator end = response.filterOptions.end(); // _hselection_ and _h2selection_ (Index) it = response.filterOptions.find ("Index"); if (it != end) set_option_macro ("h", args["h"], true, false, (*it).second, disp); if (it != end) set_option_macro ("h2", args["h2"], true,false, (*it).second, disp); // _jselection_ and _j2selection_ (Subcollection) it = response.filterOptions.find ("Subcollection"); if (it != end) set_option_macro ("j", args["j"], true,false, (*it).second, disp); if (it != end) set_option_macro ("j2", args["j2"], true,false, (*it).second, disp); // _nselection_ and _n2selection_ (Language) it = response.filterOptions.find ("Language"); if (it != end) set_option_macro ("n", args["n"], true,false, (*it).second, disp); if (it != end) set_option_macro ("n2", args["n2"], true,false, (*it).second, disp); // _cq2selection_ (CombineQuery) it = response.filterOptions.find ("CombineQuery"); if (it != end) set_option_macro ("cq2", args["cq2"], true,false, (*it).second, disp); if ((args["ct"] == "1") || (args["ct"] == "2")) { // mgpp/lucene collections // _gselection_ (Level) it = response.filterOptions.find("Level"); if (it!=end) { set_option_macro("g", args["g"], false, false, (*it).second, disp); if (args["qt"]=="1") { // form search set_gformselection_macro(args["g"], (*it).second, disp); } } // _fqfselection_ field list it = response.filterOptions.find("IndexField"); if (it!=end) { bool form_search = false; if (args["qto"]=="2" || args["qt"]=="1") { form_search = true; } set_option_macro ("fqf", args["fqf"], true, form_search, (*it).second, disp); if (args["ct"] == "2") {// lucene // set the sort field macro set_sfselection_macro(args["sf"], (*it).second, disp); } } } // add a queryterms macro for plain version of search terms if (!args["q"].empty()|| !args["fqv"].empty()) { text_t query_arg = ""; if (args["qt"]=="0" && args["qto"] != "2") { // normal text search query_arg = args["q"]; } else if (args["qt"]=="1" || args["qto"]=="2"){ // form search if (args["b"]=="1" && args["fqa"]=="1") { // explicit query query_arg = args["q"]; } else { // form search query_arg = args["fqv"]; } } disp.setmacro ("queryterms", displayclass::defaultpackage, get_plain_query_terms(query_arg, args["ct"])); } } } // define external macros void queryaction::set_sfselection_macro(text_t current_value, const FilterOption_t &option, displayclass &disp) { // we need at least one option here to continue if (option.validValues.size() < 1) { return; } text_t macrovalue = "\n"; if (current_value.empty()) current_value = option.defaultValue; thisvalue = option.validValues.begin(); while (thisvalue != endvalue) { if (*thisvalue != "Para") { macrovalue += "