/********************************************************************** * * cstrquerytools.cpp -- * Copyright (C) 1999 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: cstrquerytools.cpp 891 2000-02-01 22:32:36Z sjboddie $ * *********************************************************************/ /* $Log$ Revision 1.1 2000/02/01 22:32:37 sjboddie Initial revision Revision 1.1 1999/09/07 21:49:44 sjboddie new cstr receptionist */ // this is overridden for cstr as we're always going to get // maxdocs matches. They can then be sorted in the // cstrqueryaction #include "cstrquerytools.h" // do_query sets the filter options and makes the protocol call to // do a query. The results are returned in response. // request.filterResultOptions and request.fields (if required) // should be set by the calling function. bool do_query (FilterRequest_t &request, cgiargsclass &args, recptproto *collectproto, FilterResponse_t &response, ostream &logout) { request.filterName = "QueryFilter"; comerror_t err; OptionValue_t option; text_t formattedstring = args["q"]; format_querystring (formattedstring, args.getintarg("b")); option.name = "Term"; option.value = formattedstring; request.filterOptions.push_back (option); option.name = "QueryType"; option.value = (args.getintarg("t")) ? "ranked" : "boolean"; request.filterOptions.push_back (option); option.name = "Casefold"; option.value = (args.getintarg("k")) ? "true" : "false"; request.filterOptions.push_back (option); option.name = "Stem"; option.value = (args.getintarg("s")) ? "true" : "false"; request.filterOptions.push_back (option); if (!args["h"].empty()) { option.name = "Index"; option.value = args["h"]; request.filterOptions.push_back (option); } if (!args["j"].empty()) { option.name = "Subcollection"; option.value = args["j"]; request.filterOptions.push_back (option); } if (!args["n"].empty()) { option.name = "Language"; option.value = args["n"]; request.filterOptions.push_back (option); } // fill in the second query if needed if (!args["cq2"].empty()) { option.name = "CombineQuery"; option.value = args["cq2"]; request.filterOptions.push_back (option); text_t formattedstring2 = args["q2"]; format_querystring (formattedstring2, args.getintarg("b")); option.name = "Term"; option.value = formattedstring2; request.filterOptions.push_back (option); option.name = "QueryType"; option.value = (args.getintarg("t")) ? "ranked" : "boolean"; request.filterOptions.push_back (option); option.name = "Casefold"; option.value = (args.getintarg("k")) ? "true" : "false"; request.filterOptions.push_back (option); option.name = "Stem"; option.value = (args.getintarg("s")) ? "true" : "false"; request.filterOptions.push_back (option); if (!args["h2"].empty()) { option.name = "Index"; option.value = args["h2"]; request.filterOptions.push_back (option); } if (!args["j2"].empty()) { option.name = "Subcollection"; option.value = args["j2"]; request.filterOptions.push_back (option); } if (!args["n2"].empty()) { option.name = "Language"; option.value = args["n2"]; request.filterOptions.push_back (option); } } // we always want to retrieve all documents (i.e. 1 to maxdocs) // for this collection option.name = "Maxdocs"; option.value = args["m"]; request.filterOptions.push_back (option); option.name = "StartResults"; option.value = 1; request.filterOptions.push_back (option); option.name = "EndResults"; option.value = args["m"]; request.filterOptions.push_back (option); collectproto->filter (args["c"], request, response, err, logout); if (err != noError) { outconvertclass text_t2ascii; logout << text_t2ascii << "Error: call to QueryFilter failed in queryaction (" << get_comerror_string (err) << ")\n"; return false; } return true; } void format_querystring (text_t &querystring, int querymode) { text_t formattedstring; text_t::const_iterator here = querystring.begin(); text_t::const_iterator end = querystring.end(); // space is used to insert spaces between Chinese // characters. No space is needed before the first // Chinese character. bool space = false; // want to remove ()|!& from querystring so boolean queries are just // "all the words" queries (unless querymode is advanced) while (here != end) { if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' || *here == '!' || *here == '&')) { formattedstring.push_back(' '); } else { if ((*here >= 0x4e00 && *here <= 0x9fa5) || (*here >= 0xf900 && *here <= 0xfa2d)) { // Chinese character if (space) formattedstring.push_back (0x200b); formattedstring.push_back (*here); formattedstring.push_back (0x200b); space = true; } else { // non-Chinese character formattedstring.push_back (*here); space = false; } } here ++; } querystring = formattedstring; } void get_phrases (const text_t &querystring, text_tarray &phrases) { phrases.erase (phrases.begin(), phrases.end()); if (!querystring.empty()) { text_t::const_iterator end = querystring.end(); text_t::const_iterator here = findchar (querystring.begin(), end, '"'); if (here != end) { text_t tmptext; bool foundquote = false; while (here != end) { if (*here == '"') { if (foundquote) { if (!tmptext.empty()) { phrases.push_back(tmptext); tmptext.clear(); } foundquote = false; } else foundquote = true; } else { if (foundquote) tmptext.push_back (*here); } here ++; } } } }