/********************************************************************** * * queryaction.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "queryaction.h" #include "querytools.h" #include "formattools.h" #include "cgiutils.h" #include "OIDtools.h" #include "fileutil.h" #include "text_t.h" #include "historydb.h" #include "htmlutils.h" // for html_safe in do_action #include "gsdltools.h" #include "phrases.h" // for get_phrases #include // for strtol #include queryaction::queryaction () : basequeryaction() { num_phrases = 0; cgiarginfo arg_ainfo; // this action uses cgi variable "a" arg_ainfo.shortname = "a"; arg_ainfo.longname = "action"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "q"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ct" - 0 = mg, 1 = mgpp, 2=lucene arg_ainfo.shortname = "ct"; arg_ainfo.longname = "collection type"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "b" - 0 = simple, 1 = advanced arg_ainfo.shortname = "b"; arg_ainfo.longname = "query mode"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "h" arg_ainfo.shortname = "h"; arg_ainfo.longname = "main index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "h2" arg_ainfo.shortname = "h2"; arg_ainfo.longname = "main index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "j" arg_ainfo.shortname = "j"; arg_ainfo.longname = "sub collection index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "j2" arg_ainfo.shortname = "j2"; arg_ainfo.longname = "sub collection index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "n" arg_ainfo.shortname = "n"; arg_ainfo.longname = "language index"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "n2" arg_ainfo.shortname = "n2"; arg_ainfo.longname = "language index for second query"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "t" - 1 = ranked 0 = boolean arg_ainfo.shortname = "t"; arg_ainfo.longname = "search type"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "k" arg_ainfo.shortname = "k"; arg_ainfo.longname = "casefolding"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "1"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ks" arg_ainfo.shortname = "ks"; arg_ainfo.longname = "casefolding support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "s" arg_ainfo.shortname = "s"; arg_ainfo.longname = "stemming"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ss" arg_ainfo.shortname = "ss"; arg_ainfo.longname = "stemming support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "af" arg_ainfo.shortname = "af"; arg_ainfo.longname = "accentfolding"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "afs" arg_ainfo.shortname = "afs"; arg_ainfo.longname = "accentfolding support"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ccs" arg_ainfo.shortname = "ccs"; arg_ainfo.longname = "cross collection searching"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ccp" arg_ainfo.shortname = "ccp"; arg_ainfo.longname = "cross collection page"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "g" - new arg for granularity, for mgpp collections arg_ainfo.shortname = "g"; arg_ainfo.longname = "granularity"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "ds" - start date arg_ainfo.shortname = "ds"; arg_ainfo.longname = "start date"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "de" - end date arg_ainfo.shortname = "de"; arg_ainfo.longname = "end date"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "dsbc" - whether or not start date is prechristian arg_ainfo.shortname = "dsbc"; arg_ainfo.longname = "start date bc"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "debc" - whether or not end date is prechristian arg_ainfo.shortname = "debc"; arg_ainfo.longname = "end date bc"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qt" - 0 = text, 1 = form arg_ainfo.shortname = "qt"; arg_ainfo.longname = "query type"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qto" - 1 = text only, 2 = form only, 3 = text and form arg_ainfo.shortname = "qto"; arg_ainfo.longname = "query type options"; arg_ainfo.multiplechar = true; // can be empty or single char arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "qb" - 0 = regular, 1 = large arg_ainfo.shortname = "qb"; arg_ainfo.longname = "query box type"; arg_ainfo.multiplechar = false; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = "0"; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "fqs" - the list of stemming options in the form query // - a comma separated list arg_ainfo.shortname = "fqs"; arg_ainfo.longname = "form query stems"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "fqk" - the list of casefolding options in the form query // - a comma separated list arg_ainfo.shortname = "fqk"; arg_ainfo.longname = "form query casefolds"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "fqaf" - the list of accentfolding options in the form query // - a comma separated list arg_ainfo.shortname = "fqaf"; arg_ainfo.longname = "form query accentfold"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // "cc" arg_ainfo.shortname = "cc"; arg_ainfo.longname = "collections to search"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = true; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); // **** // should this even be here??? // seems to be mixed up between "sf" and "sqlsf" // "sf" - Sort field. Set to field to be used for sorting search reult // set (only implemented for lucene collections at present). arg_ainfo.shortname = "sqlsf"; arg_ainfo.longname = "sql sort field"; arg_ainfo.multiplechar = true; arg_ainfo.multiplevalue = false; arg_ainfo.defaultstatus = cgiarginfo::weak; arg_ainfo.argdefault = g_EmptyText; arg_ainfo.savedarginfo = cgiarginfo::must; argsinfo.addarginfo (NULL, arg_ainfo); } queryaction::~queryaction () { } void queryaction::configure (const text_t &key, const text_tarray &cfgline) { basequeryaction::configure (key, cfgline); } bool queryaction::init (ostream &logout) { return basequeryaction::init (logout); } bool queryaction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args, recptprotolistclass* protos, ostream &logout) { // check t argument int arg_t = args.getintarg("t"); if (arg_t != 0 && arg_t != 1) { logout << "Warning: \"t\" argument out of range (" << arg_t << ")\n"; cgiarginfo *tinfo = argsinfo.getarginfo ("t"); if (tinfo != NULL) args["t"] = tinfo->argdefault; } // check k argument int arg_k = args.getintarg("k"); if (arg_k != 0 && arg_k != 1) { logout << "Warning: \"k\" argument out of range (" << arg_k << ")\n"; cgiarginfo *kinfo = argsinfo.getarginfo ("k"); if (kinfo != NULL) args["k"] = kinfo->argdefault; } // check s argument int arg_s = args.getintarg("s"); if (arg_s != 0 && arg_s != 1) { logout << "Warning: \"s\" argument out of range (" << arg_s << ")\n"; cgiarginfo *sinfo = argsinfo.getarginfo ("s"); if (sinfo != NULL) args["s"] = sinfo->argdefault; } // check ct argument int arg_ct = args.getintarg("ct"); if (arg_ct < 0 || arg_ct > 2) { logout << "Warning: \"ct\" argument out of range (" << arg_ct << ")\n"; cgiarginfo *ctinfo = argsinfo.getarginfo ("ct"); if (ctinfo != NULL) args["ct"] = ctinfo->argdefault; } // check qt argument int arg_qt = args.getintarg("qt"); if (arg_qt<0 || arg_qt>2) { logout << "Warning: \"qt\" argument out of range (" << arg_qt << ")\n"; cgiarginfo *qtinfo = argsinfo.getarginfo ("qt"); if (qtinfo != NULL) args["qt"] = qtinfo->argdefault; } // check qb argument int arg_qb = args.getintarg("qb"); if (arg_qb !=0 && arg_qb !=1) { logout << "Warning: \"qb\" argument out of range (" << arg_qb << ")\n"; cgiarginfo *qbinfo = argsinfo.getarginfo ("qb"); if (qbinfo != NULL) args["qb"] = qbinfo->argdefault; } // check fqa argument int arg_fqa = args.getintarg("fqa"); if (arg_fqa !=0 && arg_fqa !=1) { logout << "Warning: \"fqa\" argument out of range (" << arg_fqa << ")\n"; cgiarginfo *fqainfo = argsinfo.getarginfo ("fqa"); if (fqainfo != NULL) args["fqa"] = fqainfo->argdefault; } // check fqn argument int arg_fqn = args.getintarg("fqn"); if (arg_fqn < -1) { logout << "Warning: \"fqn\" argument less than -1 (" << arg_fqn << ")\n"; cgiarginfo *fqninfo = argsinfo.getarginfo ("fqn"); if (fqninfo != NULL) args["fqn"] = fqninfo->argdefault; } return basequeryaction::check_cgiargs(argsinfo,args,protos,logout); } void queryaction::define_internal_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass * protos, ostream &logout) { basequeryaction::define_internal_macros(disp,args,protos,logout); define_query_interface(disp, args, protos, logout); } void queryaction::define_query_interface(displayclass &disp, cgiargsclass &args, recptprotolistclass * protos, ostream &logout){ text_t collection = args["c"]; //check that the protocol is alive recptproto* colproto = protos->getrecptproto (collection, logout); if(colproto == NULL) { logout << "ERROR: Null collection protocol trying to query" << collection.getcstr() << "\n"; return; } //check the collection is responding/in place ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(colproto, collection, logout); if(colinfo == NULL){ logout << "ERROR: Null returned for get_collectinfo_ptr on " << collection.getcstr() << "in queryaction::define_query_interface\n"; return; } text_tmap::iterator check = colinfo->format.find("QueryInterface"); if(check != colinfo->format.end()){ if((*check).second=="DateSearch"){ text_t current = "_datesearch_"; disp.setmacro("optdatesearch","query",current); } } } void queryaction::define_external_macros (displayclass &disp, cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { // define_external_macros sets the following macros: // some or all of these may not be required to be set // _hselection_, _h2selection_ the selection box for the main part of the index // _jselection_, _j2selection_ the selection box for the subcollection part of the index // _nselection_, _n2selection_ the selection box for the language part of the index // _cq2selection the selection box for combining two queries // _gselection_, the selection box forlevels (mgpp) // _fqfselection_, the selection box for index/fields (mgpp) // can't do anything if collectproto is null (i.e. no collection was specified) recptproto *collectproto = protos->getrecptproto (args["c"], logout); if (collectproto == NULL) return; ColInfoResponse_t *colinfo = recpt->get_collectinfo_ptr(collectproto, args["c"], logout); set_query_type_args(colinfo, args); set_stem_index_args(colinfo, args); comerror_t err; InfoFilterOptionsResponse_t response; InfoFilterOptionsRequest_t request; request.filterName = "QueryFilter"; collectproto->get_filteroptions (args["c"], request, response, err, logout); if (err == noError) { FilterOption_tmap::const_iterator it; FilterOption_tmap::const_iterator end = response.filterOptions.end(); // _hselection_ and _h2selection_ (Index) it = response.filterOptions.find ("Index"); if (it != end) set_option_macro ("h", args["h"], true, false, (*it).second, disp); if (it != end) set_option_macro ("h2", args["h2"], true,false, (*it).second, disp); // _jselection_ and _j2selection_ (Subcollection) it = response.filterOptions.find ("Subcollection"); if (it != end) set_option_macro ("j", args["j"], true,false, (*it).second, disp); if (it != end) set_option_macro ("j2", args["j2"], true,false, (*it).second, disp); // _nselection_ and _n2selection_ (Language) it = response.filterOptions.find ("Language"); if (it != end) set_option_macro ("n", args["n"], true,false, (*it).second, disp); if (it != end) set_option_macro ("n2", args["n2"], true,false, (*it).second, disp); // _cq2selection_ (CombineQuery) it = response.filterOptions.find ("CombineQuery"); if (it != end) set_option_macro ("cq2", args["cq2"], true,false, (*it).second, disp); if ((args["ct"] == "1") || (args["ct"] == "2")) { // mgpp/lucene collections // _gselection_ (Level) it = response.filterOptions.find("Level"); if (it!=end) { set_option_macro("g", args["g"], false, false, (*it).second, disp); if (args["qt"]=="1") { // form search set_gformselection_macro(args["g"], (*it).second, disp); } } // _fqfselection_ field list it = response.filterOptions.find("IndexField"); if (it!=end) { bool form_search = false; if (args["qto"]=="2" || args["qt"]=="1") { form_search = true; } set_option_macro ("fqf", args["fqf"], true, form_search, (*it).second, disp); } if (args["ct"] == "2") {// lucene it = response.filterOptions.find("SortField"); // set the sort field macro set_sfselection_macro(args["sf"], (*it).second, disp); } } // add a queryterms macro for plain version of search terms if (!args["q"].empty()|| !args["fqv"].empty()) { text_t query_arg = ""; if (args["qt"]=="0" && args["qto"] != "2") { // normal text search query_arg = args["q"]; } else if (args["qt"]=="1" || args["qto"]=="2"){ // form search if (args["b"]=="1" && args["fqa"]=="1") { // explicit query query_arg = args["q"]; } else { // form search query_arg = args["fqv"]; } } disp.setmacro ("queryterms", displayclass::defaultpackage, get_plain_query_terms(query_arg, args["ct"])); } } } // define external macros void queryaction::set_sfselection_macro(text_t current_value, const FilterOption_t &option, displayclass &disp) { // we need at one sort option here to continue if (option.validValues.size() < 1) { return; } if (option.validValues.size() == 1) { // we don't need a drop down list, just the value text_t value = option.defaultValue; text_t macrovalue = ""; if (value == "rank") { macrovalue = "_query:textsortbyrank_"; } else if (value == "none") { return; // no sorting is the only option, so don't display anything } else { macrovalue = "_"+value+"_"; } disp.setmacro ("sfselection", displayclass::defaultpackage, macrovalue); return; } // if we have more than two options, make a drop down list text_t macrovalue = "\n" << "\n" << "\n" << "
\n" << "\n" << "\n" << "
Select collections to search for \"" << encodeForHTML(args["q"]) << "\" (index=" << encodeForHTML(index) << " subcollection=" << encodeForHTML(subcollection) << " language=" << encodeForHTML(language) << ")
\n" << "
\n" << "
\n"; recptprotolistclass::iterator rprotolist_here = protos->begin(); recptprotolistclass::iterator rprotolist_end = protos->end(); while (rprotolist_here != rprotolist_end) { if ((*rprotolist_here).p != NULL) { text_tarray collist; (*rprotolist_here).p->get_collection_list (collist, err, logout); if (err == noError) { text_tarray::iterator collist_here = collist.begin(); text_tarray::iterator collist_end = collist.end(); while (collist_here != collist_end) { cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout); // if (err == noError && cinfo.isPublic && (cinfo.buildDate > 0)) { if (cinfo != NULL && (cinfo->buildDate > 0)) { (*rprotolist_here).p->get_filteroptions (*collist_here, frequest, fresponse, err, logout); if (err == noError) { FilterOption_tmap::const_iterator it; FilterOption_tmap::const_iterator end = fresponse.filterOptions.end(); if (!index.empty()) { it = fresponse.filterOptions.find ("Index"); if (it == end) {++collist_here; continue;} text_tarray::const_iterator there = (*it).second.validValues.begin(); text_tarray::const_iterator tend = (*it).second.validValues.end(); while (there != tend) { if (*there == index) break; ++there; } if (there == tend) {++collist_here; continue;} } if (!subcollection.empty()) { it = fresponse.filterOptions.find ("Subcollection"); if (it == end) {++collist_here; continue;} text_tarray::const_iterator there = (*it).second.validValues.begin(); text_tarray::const_iterator tend = (*it).second.validValues.end(); while (there != tend) { if (*there == subcollection) break; ++there; } if (there == tend) {++collist_here; continue;} } if (!language.empty()) { it = fresponse.filterOptions.find ("Language"); if (it == end) {++collist_here; continue;} text_tarray::const_iterator there = (*it).second.validValues.begin(); text_tarray::const_iterator tend = (*it).second.validValues.end(); while (there != tend) { if (*there == language) break; ++there; } if (there == tend) {++collist_here; continue;} } // we've got a matching collection textout << outconvert << "get_collectionmeta("collectionname", args["l"]); if (collectionname.empty()) { collectionname = *collist_here; } textout << outconvert << disp << " name=\"cc\" value=\"" << *collist_here << "\">" << collectionname << "
\n"; } } ++collist_here; } } } ++rprotolist_here; } textout << outconvert << disp << "
\n" << "\n" << "_query:footer_\n"; } bool queryaction::user_groups_match(const text_t &collection_groups, const text_t &user_groups) { text_tset splitgrps; text_t::const_iterator split_here = collection_groups.begin(); text_t::const_iterator split_end = collection_groups.end(); splitchar(split_here,split_end,',',splitgrps); text_t::const_iterator ugroup_here = user_groups.begin(); text_t::const_iterator ugroup_end = user_groups.end(); text_t thisugroup; while (ugroup_here != ugroup_end) { ugroup_here = getdelimitstr (ugroup_here, ugroup_end, ',', thisugroup); if (splitgrps.find(thisugroup) != splitgrps.end() ) { // we have permission! return true; } } return false; } // If we are currently authenticated to be in this collection, then check all // collections in the list against the groups of the current user - if there is an overlap of groups, then add the collection into ccs list // If there had been no authentication needed to get to this collection, then // we'll ignore any collections that have collection level authentication void queryaction::validate_ccs_collection_list(cgiargsclass &args, recptprotolistclass *protos, ostream &logout) { text_tarray collections; text_t arg_cc = args["cc"]; text_t arg_c = args["c"]; decode_cgi_arg (arg_cc); splitchar (arg_cc.begin(), arg_cc.end(), ',', collections); bool currently_authenticated = false; if (!args["uan"].empty()) { // uan=1 means needs authentication. We'll only get here if we have passed authentication, otherwise the page would have been redirected to login page currently_authenticated = true; } args["cc"] = ""; // we will add colls in one by one if they are valid text_tarray::iterator col_here = collections.begin(); text_tarray::iterator col_end = collections.end(); bool first = true; text_t current_user_name = args["un"]; userinfo_t thisuser; if (currently_authenticated) { int status = user_database->get_user_info (current_user_name, thisuser); if (status != ERRNO_SUCCEED) { // something has gone wrong, so assume not // authenticated currently_authenticated = false; } } while (col_here != col_end) { bool include_coll = false; if (*col_here == arg_c) { // current collection must be accessible otherwise we wouldn't be here. include_coll = true; } else { recptproto *collectproto = protos->getrecptproto (*col_here, logout); if (collectproto != NULL) { ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout); text_t authenticate = cinfo->authenticate; if (authenticate == "collection") { if (currently_authenticated) { text_t collection_groups = cinfo->auth_group; if (user_groups_match(collection_groups, thisuser.groups)) { include_coll = true; } } // else we'll not include it } else { // not authenticated, or document level authentication - can include in the list include_coll = true; } } } if (include_coll) { if (!first) args["cc"].push_back (','); args["cc"] += *col_here; first = false; } ++col_here; } } bool queryaction::do_action (cgiargsclass &args, recptprotolistclass *protos, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { if (recpt == NULL) { logout << "ERROR (queryaction::do_action): This action does not contain information\n" << " about any receptionists. The method set_receptionist was probably\n" << " not called from the module which instantiated this action.\n"; return true; } if (args["ccs"] == "1") { if (!args["cc"].empty()) { validate_ccs_collection_list(args, protos, logout); // include only those which current user has access to // query the selected collections text_t::const_iterator b = args["cc"].begin(); text_t::const_iterator e = args["cc"].end(); if (findchar (b, e, ',') != e) { if (!search_multiple_collections (args, protos, browsers, disp, outconvert, textout, logout)) return false; return true; } else { if (!search_single_collection (args, args["cc"], protos, browsers, disp, outconvert, textout, logout)) return false; return true; } } } // simply query the current collection if (!search_single_collection (args, args["c"], protos, browsers, disp, outconvert, textout, logout)) return false; return true; } // request.filterResultOptions and request.fields (if required) should // be set from the calling code void queryaction::set_queryfilter_options (FilterRequest_t &request, const text_t &querystring, cgiargsclass &args) { set_fulltext_queryfilter_options(request,querystring,args); } void queryaction::set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1, const text_t &querystring2, cgiargsclass &args) { set_fulltext_queryfilter_options(request,querystring1,querystring2,args); } bool queryaction::search_multiple_collections (cgiargsclass &args, recptprotolistclass *protos, browsermapclass *browsers, displayclass &disp, outconvertclass &outconvert, ostream &textout, ostream &logout) { text_tarray collections; text_t arg_cc = args["cc"]; decode_cgi_arg (arg_cc); splitchar (arg_cc.begin(), arg_cc.end(), ',', collections); if (collections.empty()) { logout << "queryaction::search_multiple_collections: No collections " << "set for doing multiple query - will search current collection\n"; textout << outconvert << disp << "_query:textwarningnocollections_\n"; return search_single_collection (args, args["c"], protos, browsers, disp, outconvert, textout, logout); } // check the main coll text_t main_collection = args["c"]; recptproto *collectproto = protos->getrecptproto (main_collection, logout); if (collectproto == NULL) { logout << outconvert << "queryaction::search_multiple_collection: " << main_collection << " collection has a NULL collectproto\n"; // Display the "this collection is not installed on this system" page disp.setmacro("cvariable", displayclass::defaultpackage, encodeForHTML(main_collection)); disp.setmacro("content", "query", "

_textbadcollection_

"); textout << outconvert << disp << "_query:header_\n" << "_query:content_\n" << "_query:footer_\n"; return true; } ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr (collectproto, main_collection, logout); if (cinfo == NULL) { logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL for '"< colinfomap; comerror_t err; FilterRequest_t request; FilterResponse_t response; request.filterResultOptions = FROID | FRmetadata | FRtermFreq | FRranking; text_t freqmsg = "_textfreqmsg1_"; int numdocs = 0; isapprox isApprox = Exact; bool syntax_error = false; set_queryfilter_options (request, formattedstring, args); // need to retrieve maxdocs matches for each collection // (will eventually want to tidy this up, do so caching etc.) OptionValue_t option; option.name = "StartResults"; option.value = "1"; request.filterOptions.push_back (option); option.name = "EndResults"; option.value = args["m"]; request.filterOptions.push_back (option); // check the main collection for uniform formatting info - do we use // individual format statements, or just the main one? bool use_main_col_format = false; if (cinfo->ccsOptions & CCSUniformSearchResultsFormatting) { use_main_col_format = true; } browserclass *bptr = browsers->getbrowser (browsertype); request.fields.erase (request.fields.begin(), request.fields.end()); request.getParents = false; bptr->load_metadata_defaults (request.fields); text_t formatstring; format_t *formatlistptr = new format_t(); if (use_main_col_format) { // just get one format for main coll and use it for each subcol if (!get_formatstring (classification, browsertype, cinfo->format, formatstring)) { formatstring = bptr->get_default_formatstring(); } parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents); } text_tarray::iterator col_here = collections.begin(); text_tarray::iterator col_end = collections.end(); map termfreqs; ColInfoResponse_t *tmp_cinfo; while (col_here != col_end) { collectproto = protos->getrecptproto (*col_here, logout); if (collectproto == NULL) { // skip this collection logout << outconvert << "queryaction::search_multiple_collections: " << *col_here << " collection has a NULL collectproto, ignoring\n"; ++col_here; continue; } tmp_cinfo = recpt->get_collectinfo_ptr (collectproto, *col_here, logout); if (tmp_cinfo == NULL) { // skip this collection logout << "ERROR (query_action::search_multiple_collections): get_collectinfo_ptr returned NULL\n"; ++col_here; continue; } if (!use_main_col_format) { request.fields.erase (request.fields.begin(), request.fields.end()); request.getParents = false; bptr->load_metadata_defaults (request.fields); // get the formatstring if there is one if (!get_formatstring (classification, browsertype, tmp_cinfo->format, formatstring)) { formatstring = bptr->get_default_formatstring(); } formatlistptr = new format_t(); parse_formatstring (formatstring, formatlistptr, request.fields, request.getParents); } colinfo_t thiscolinfo; thiscolinfo.formatlistptr = formatlistptr; thiscolinfo.browserptr = bptr; colinfomap[*col_here] = thiscolinfo; // do the query collectproto->filter (*col_here, request, response, err, logout); if (err != noError && err != syntaxError) { outconvertclass text_t2ascii; logout << text_t2ascii << "queryaction::search_multiple_collections: call to QueryFilter failed " << "for " << *col_here << " collection (" << get_comerror_string (err) << ")\n"; return false; } if (err == syntaxError) { syntax_error = true; freqmsg = "_textinvalidquery_"; // assume the syntax will be invalid for all colls break; } if (response.error_message == "TOO_MANY_CLAUSES") { freqmsg = "_textlucenetoomanyclauses_"; break; } if (isApprox == Exact) isApprox = response.isApprox; else if (isApprox == MoreThan) if (response.isApprox == Approximate) isApprox = response.isApprox; TermInfo_tarray::const_iterator this_term = response.termInfo.begin(); TermInfo_tarray::const_iterator end_term = response.termInfo.end(); while (this_term != end_term) { termfreqs[(*this_term).term] += (*this_term).freq; if ((col_here+1) == col_end) { freqmsg += (*this_term).term + ": " + termfreqs[(*this_term).term]; if ((this_term+1) != end_term) freqmsg += ", "; } ++this_term; } if (response.numDocs > 0) { numdocs += response.numDocs; QueryResult_t thisresult; thisresult.collection = *col_here; ResultDocInfo_tarray::iterator doc_here = response.docInfo.begin(); ResultDocInfo_tarray::iterator doc_end = response.docInfo.end(); while (doc_here != doc_end) { thisresult.doc = *doc_here; results.insert (thisresult); // this is ordering based on doc rank ++doc_here; } } ++col_here; } // for each coll // now we have an ordered list of results. If ifl (I feel lucky) is set, then pick out the one we want if (args["ifl"] == 1 || (args["ifl"] == 2 && numdocs == 1)) { //Find whether DocumentSearchResultLinks is enabled bool show_links = false; text_tmap::const_iterator format_here = cinfo->format.begin(); text_tmap::const_iterator format_end = cinfo->format.end(); while (format_here != format_end) { if (((*format_here).first == "DocumentSearchResultLinks") && ((*format_here).second == "true")){ show_links = true; break; } ++format_here; } // which doc do we want? int docnum; int ifl; int srn = 0; int srp = 0; if (args["ifl"] == 1) { ifl = args["ifln"].getint(); docnum = ifl - 1; if (show_links) { // set the values for next and prev search result number srn = ifl + 1; if (srn > numdocs) { srn = 0; } srp = ifl - 1; if (srp < 0) { srp = 0; } } } else { // we just want the first (and only) result docnum = 0; } if (docnum >= 0 && docnum < numdocs) { // get the docnum'th item from the results QueryResult_tset::iterator res_here = results.begin(); for (int i=0; i< docnum; i++) { ++res_here; } textout << outconvert << disp << "Location: _gwcgi_?e=_compressedoptions_&a=d&c=" << (*res_here).collection << "&cl=search&d=" << (*res_here).doc.OID << "&srn=" << srn << "&srp=" << srp << "\n\n"; textout << flush; return true; } } if (!args["ifl"].empty()) { // if we get here, and ifl was set but we haven't output a document, then we'll carry on as if ifl wasn't set. The only catch is that get_cgihead_info won't have // done the right thing (because ifl was set), so we need to make sure the output is html textout << "Content-type: text/html\n\n"; } text_t numdocs_t = numdocs; args["nmd"] = numdocs_t; disp.setmacro ("freqmsg", "query", freqmsg); define_query_macros( args, disp, numdocs, isApprox); // save the query if appropriate save_search_history(args, numdocs, isApprox); define_history_macros (disp, args, protos, logout); textout << outconvert << disp << "_query:header_\n" << "_query:content_"; if (!syntax_error) { // now go through each result and output it QueryResult_tset::iterator res_here = results.begin(); QueryResult_tset::iterator res_end = results.end(); text_tset metadata; // empty !! bool getParents = false; // don't care !! bool use_table; ResultDocInfo_t thisdoc; format_t *formatlistptr = NULL; browserclass *browserptr = NULL; int count = 1; int firstdoc = args.getintarg("r"); int hitsperpage = args.getintarg("o"); int thislast = firstdoc + (hitsperpage - 1); // output results while (res_here != res_end) { if (count < firstdoc) {++count; ++res_here; continue;} if (count > thislast) break; formatlistptr = colinfomap[(*res_here).collection].formatlistptr; browserptr = colinfomap[(*res_here).collection].browserptr; thisdoc = (*res_here).doc; use_table = is_table_content (formatlistptr); collectproto = protos->getrecptproto ((*res_here).collection, logout); if (collectproto == NULL) { logout << outconvert << "queryaction::search_multiple_collections: " << (*res_here).collection << " collection has a NULL collectproto, ignoring results\n"; ++res_here; continue; } browserptr->output_section_group (thisdoc, args, (*res_here).collection, 0, formatlistptr, use_table, metadata, getParents, collectproto, disp, outconvert, textout, logout); // textout << outconvert << "(ranking: " << (*res_here).doc.ranking << ")\n"; ++res_here; ++count; } } textout << outconvert << disp << "_query:footer_"; // clean up the format_t pointers map::iterator here = colinfomap.begin(); map::iterator end = colinfomap.end(); while (here != end) { delete ((*here).second.formatlistptr); ++here; } return true; } // does the formatting of the query string - either uses q for a text search // or the form values for an form search // also adds dates if appropriate in text search void queryaction::get_formatted_query_string (text_t &formattedstring, bool segment, cgiargsclass &args, displayclass &disp, ostream &logout) { if (args["qt"]=="0" && args["qto"] != "2") { // normal text search formattedstring = args["q"]; // remove & | ! for simple search,do segmentation if necessary // To url-decode the '&', format_querystring() will call unsafe_cgi_arg() first format_querystring (formattedstring, args.getintarg("b"), segment); if (args["ct"]!=0) { // mgpp and lucene - need to add in tag info if appropriate format_field_info(formattedstring, args["fqf"], args.getintarg("ct"), args.getintarg("t"), args.getintarg("b")); } add_dates(formattedstring, args.getintarg("ds"), args.getintarg("de"), args.getintarg("dsbc"), args.getintarg("debc"), args.getintarg("ct")); args["q"] = formattedstring; } else if (args["qt"]=="1" || args["qto"]=="2"){ // form search if (args["b"]=="1" && args["fqa"]=="1") { // explicit query formattedstring = args["q"]; // Replace %22 and %26 with " and & respectively, since these characters have meaning // in queries: " are used in phrases and & is used in boolean advanced searches. // For form searches below, unsafe_cgi_arg is called in the parse_..._form() functions unsafe_cgi_arg("ALL", formattedstring); } else { // form search if (args["b"]=="0") { // regular form parse_reg_query_form(formattedstring, args, segment); // will call unsafe_cgi_arg to decode url encoding } else { // advanced form parse_adv_query_form(formattedstring, args, segment); // will call unsafe_cgi_arg to decode url encoding } args["q"] = formattedstring; // reset the cgiargfqv macro - need to escape any quotes in it disp.setmacro("cgiargfqv", "query", escape_quotes(args["fqv"])); // also reset the _cgiargq_ macro as it has changed now disp.setmacro("cgiargq", displayclass::defaultpackage, html_safe(args["q"])); // reset the compressed options to include the q arg text_t compressedoptions = recpt->get_compressed_arg(args, logout); if (!compressedoptions.empty()) { disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions)); // need a decoded version of compressedoptions for use within forms // as browsers encode values from forms before sending to server // (e.g. %25 becomes %2525) decode_cgi_arg (compressedoptions); if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode. // if encoding wasn't utf-8, then compressed options may be screwed up, but seems to work for 8 bit encodings? compressedoptions = to_uni(compressedoptions); } text_t macrovalue = dm_safe(compressedoptions); disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, macrovalue); disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(macrovalue)); } } // form search } // args["qt"]=1 else { logout << "ERROR (queryaction::get_formatted_query_string): querytype not defined\n"; } } // define_query_macros sets the macros that couldn't be set until the // query had been done. Those macros are // _resultline_, _nextfirst_, _nextlast_, _prevfirst_, _prevlast_, // _thisfirst_, and _thislast_ and _quotedquery_ // this has been simplified so it can be used with both search_single_coll // and search_multiple_coll void queryaction::define_query_macros (cgiargsclass &args, displayclass &disp, int numdocs, isapprox isApprox) { // The following 'if' statatment is placed here to be keep the semantics // the same as the version before basequeryaction was introduced if (num_phrases > 0) isApprox = Exact; basequeryaction::define_query_macros(args,disp,numdocs,isApprox); if (args["ct"]==0) { // mg queries only, not mgpp // get the quoted bits of the query string and set _quotedquery_ text_tarray phrases; get_phrases (args["q"], phrases); num_phrases = phrases.size(); text_tarray::const_iterator phere = phrases.begin(); text_tarray::const_iterator pend = phrases.end(); bool first = true; text_t quotedquery; while (phere != pend) { if (!first) if ((phere +1) == pend) quotedquery += " and "; else quotedquery += ", "; quotedquery += "\"" + *phere + "\""; first = false; ++phere; } if (args.getintarg("s") && !quotedquery.empty()) quotedquery += "_textstemon_"; disp.setmacro ("quotedquery", "query", quotedquery); } } // should this change for cross coll search?? bool queryaction::save_search_history (cgiargsclass &args, int numdocs, isapprox isApprox) { if (args["q"]=="") return true; // null query, dont save if (args["hs"]=="0") return true; // only save when submit query pressed // get userid text_t userid = args["z"]; // the number of docs goes on the front of the query string text_t query = text_t(numdocs); if (isApprox==MoreThan) { // there were more docs found query.push_back('+'); } query += "c="+args["c"]; query += ";h="+args["h"]; query += ";t="+args["t"]; query += ";b="+args["b"]; query += ";j="+args["j"]; query += ";n="+args["n"]; query += ";s="+args["s"]; query += ";k="+args["k"]; query += ";g="+args["g"]; text_t qstring = args["q"]; //text_t formattedquery =cgi_safe(qstring); //query += "&q="+formattedquery; query += ";q="+qstring; bool display=false; int hd = args.getintarg("hd"); if (hd > 0) display=true; if (set_history_info(userid, query, dbhome, display)) return true; else return false; }