[227] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
[1324] | 3 | * queryfilter.cpp -- base class for queryfilters
|
---|
[227] | 4 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
| 5 | *
|
---|
[534] | 6 | * A component of the Greenstone digital library software
|
---|
| 7 | * from the New Zealand Digital Library Project at the
|
---|
| 8 | * University of Waikato, New Zealand.
|
---|
[227] | 9 | *
|
---|
[534] | 10 | * This program is free software; you can redistribute it and/or modify
|
---|
| 11 | * it under the terms of the GNU General Public License as published by
|
---|
| 12 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | * (at your option) any later version.
|
---|
| 14 | *
|
---|
| 15 | * This program is distributed in the hope that it will be useful,
|
---|
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | * GNU General Public License for more details.
|
---|
| 19 | *
|
---|
| 20 | * You should have received a copy of the GNU General Public License
|
---|
| 21 | * along with this program; if not, write to the Free Software
|
---|
| 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | *
|
---|
[227] | 24 | *********************************************************************/
|
---|
| 25 |
|
---|
| 26 | #include "queryfilter.h"
|
---|
| 27 | #include "fileutil.h"
|
---|
| 28 |
|
---|
| 29 |
|
---|
[235] | 30 |
|
---|
[351] | 31 | /////////////////////////////////
|
---|
| 32 | // functions for queryfilterclass
|
---|
| 33 | /////////////////////////////////
|
---|
| 34 |
|
---|
[327] | 35 |
|
---|
[227] | 36 | queryfilterclass::queryfilterclass () {
|
---|
[15558] | 37 | db_ptr = NULL;
|
---|
[8024] | 38 | textsearchptr = NULL;
|
---|
[12314] | 39 | maxnumeric = 4;
|
---|
[227] | 40 |
|
---|
[351] | 41 | FilterOption_t filtopt;
|
---|
| 42 | filtopt.name = "CombineQuery";
|
---|
| 43 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 44 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 45 | filtopt.defaultValue = "and";
|
---|
| 46 | filtopt.validValues.push_back("and");
|
---|
| 47 | filtopt.validValues.push_back("or");
|
---|
| 48 | filtopt.validValues.push_back("not");
|
---|
| 49 | filterOptions["CombineQuery"] = filtopt;
|
---|
| 50 |
|
---|
[227] | 51 | // -- onePerQuery StartResults integer
|
---|
[351] | 52 | filtopt.clear();
|
---|
[227] | 53 | filtopt.name = "StartResults";
|
---|
| 54 | filtopt.type = FilterOption_t::integert;
|
---|
| 55 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 56 | filtopt.defaultValue = "1";
|
---|
| 57 | filtopt.validValues.push_back("1");
|
---|
| 58 | filtopt.validValues.push_back("1000");
|
---|
| 59 | filterOptions["StartResults"] = filtopt;
|
---|
| 60 |
|
---|
| 61 | // -- onePerQuery EndResults integer
|
---|
| 62 | filtopt.clear();
|
---|
| 63 | filtopt.name = "EndResults";
|
---|
| 64 | filtopt.type = FilterOption_t::integert;
|
---|
| 65 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 66 | filtopt.defaultValue = "10";
|
---|
[621] | 67 | filtopt.validValues.push_back("-1");
|
---|
[227] | 68 | filtopt.validValues.push_back("1000");
|
---|
| 69 | filterOptions["EndResults"] = filtopt;
|
---|
| 70 |
|
---|
| 71 | // -- onePerQuery QueryType enumerated (boolean, ranked)
|
---|
| 72 | filtopt.clear();
|
---|
| 73 | filtopt.name = "QueryType";
|
---|
| 74 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 75 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 76 | filtopt.defaultValue = "ranked";
|
---|
| 77 | filtopt.validValues.push_back("boolean");
|
---|
| 78 | filtopt.validValues.push_back("ranked");
|
---|
| 79 | filterOptions["QueryType"] = filtopt;
|
---|
| 80 |
|
---|
[501] | 81 | // -- onePerQuery MatchMode enumerated (some, all)
|
---|
| 82 | filtopt.clear();
|
---|
| 83 | filtopt.name = "MatchMode";
|
---|
| 84 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 85 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 86 | filtopt.defaultValue = "some";
|
---|
| 87 | filtopt.validValues.push_back("some");
|
---|
| 88 | filtopt.validValues.push_back("all");
|
---|
[613] | 89 | filterOptions["MatchMode"] = filtopt;
|
---|
[501] | 90 |
|
---|
[227] | 91 | // -- onePerTerm Term string ???
|
---|
| 92 | filtopt.clear();
|
---|
| 93 | filtopt.name = "Term";
|
---|
| 94 | filtopt.type = FilterOption_t::stringt;
|
---|
| 95 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 96 | filtopt.defaultValue = "";
|
---|
| 97 | filterOptions["Term"] = filtopt;
|
---|
| 98 |
|
---|
| 99 | // -- onePerTerm Casefold boolean
|
---|
| 100 | filtopt.clear();
|
---|
| 101 | filtopt.name = "Casefold";
|
---|
| 102 | filtopt.type = FilterOption_t::booleant;
|
---|
| 103 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 104 | filtopt.defaultValue = "true";
|
---|
| 105 | filtopt.validValues.push_back("false");
|
---|
| 106 | filtopt.validValues.push_back("true");
|
---|
| 107 | filterOptions["Casefold"] = filtopt;
|
---|
| 108 |
|
---|
| 109 | // -- onePerTerm Stem boolean
|
---|
| 110 | filtopt.clear();
|
---|
| 111 | filtopt.name = "Stem";
|
---|
| 112 | filtopt.type = FilterOption_t::booleant;
|
---|
| 113 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 114 | filtopt.defaultValue = "false";
|
---|
| 115 | filtopt.validValues.push_back("false");
|
---|
| 116 | filtopt.validValues.push_back("true");
|
---|
| 117 | filterOptions["Stem"] = filtopt;
|
---|
| 118 |
|
---|
[12871] | 119 | // -- onePerTerm AccentFold boolean
|
---|
| 120 | filtopt.clear();
|
---|
| 121 | filtopt.name = "AccentFold";
|
---|
| 122 | filtopt.type = FilterOption_t::booleant;
|
---|
| 123 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 124 | filtopt.defaultValue = "false";
|
---|
| 125 | filtopt.validValues.push_back("false");
|
---|
| 126 | filtopt.validValues.push_back("true");
|
---|
| 127 | filterOptions["AccentFold"] = filtopt;
|
---|
| 128 |
|
---|
[227] | 129 | // -- onePerTerm Index enumerated
|
---|
| 130 | filtopt.clear();
|
---|
| 131 | filtopt.name = "Index";
|
---|
| 132 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 133 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 134 | filtopt.defaultValue = "";
|
---|
| 135 | filterOptions["Index"] = filtopt;
|
---|
| 136 |
|
---|
| 137 | // -- onePerTerm Subcollection enumerated
|
---|
| 138 | filtopt.clear();
|
---|
| 139 | filtopt.name = "Subcollection";
|
---|
| 140 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 141 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 142 | filtopt.defaultValue = "";
|
---|
| 143 | filterOptions["Subcollection"] = filtopt;
|
---|
| 144 |
|
---|
| 145 | // -- onePerTerm Language enumerated
|
---|
| 146 | filtopt.clear();
|
---|
| 147 | filtopt.name = "Language";
|
---|
| 148 | filtopt.type = FilterOption_t::enumeratedt;
|
---|
| 149 | filtopt.repeatable = FilterOption_t::onePerTerm;
|
---|
| 150 | filtopt.defaultValue = "";
|
---|
| 151 | filterOptions["Language"] = filtopt;
|
---|
[613] | 152 |
|
---|
| 153 | // -- onePerQuery Maxdocs integer
|
---|
| 154 | filtopt.clear();
|
---|
| 155 | filtopt.name = "Maxdocs";
|
---|
| 156 | filtopt.type = FilterOption_t::integert;
|
---|
| 157 | filtopt.repeatable = FilterOption_t::onePerQuery;
|
---|
| 158 | filtopt.defaultValue = "200";
|
---|
| 159 | filtopt.validValues.push_back("-1");
|
---|
| 160 | filtopt.validValues.push_back("1000");
|
---|
| 161 | filterOptions["Maxdocs"] = filtopt;
|
---|
[766] | 162 |
|
---|
[227] | 163 | }
|
---|
| 164 |
|
---|
| 165 | queryfilterclass::~queryfilterclass () {
|
---|
[15595] | 166 | // don't delete db_ptr or textsearchptr here, they'll be cleaned up by the source
|
---|
[227] | 167 | }
|
---|
| 168 |
|
---|
| 169 | void queryfilterclass::configure (const text_t &key, const text_tarray &cfgline) {
|
---|
| 170 | filterclass::configure (key, cfgline);
|
---|
| 171 |
|
---|
| 172 | if (key == "indexmap") {
|
---|
| 173 | indexmap.importmap (cfgline);
|
---|
| 174 |
|
---|
| 175 | // update the list of indexes in the filter information
|
---|
| 176 | text_tarray options;
|
---|
| 177 | indexmap.gettoarray (options);
|
---|
| 178 | filterOptions["Index"].validValues = options;
|
---|
| 179 |
|
---|
[302] | 180 | } else if (key == "defaultindex") {
|
---|
| 181 | indexmap.from2to (cfgline[0], filterOptions["Index"].defaultValue);
|
---|
| 182 |
|
---|
[227] | 183 | } else if (key == "subcollectionmap") {
|
---|
| 184 | subcollectionmap.importmap (cfgline);
|
---|
| 185 |
|
---|
| 186 | // update the list of subcollections in the filter information
|
---|
| 187 | text_tarray options;
|
---|
| 188 | subcollectionmap.gettoarray (options);
|
---|
| 189 | filterOptions["Subcollection"].validValues = options;
|
---|
| 190 |
|
---|
[302] | 191 | } else if (key == "defaultsubcollection") {
|
---|
| 192 | subcollectionmap.from2to (cfgline[0], filterOptions["Subcollection"].defaultValue);
|
---|
| 193 |
|
---|
[227] | 194 | } else if (key == "languagemap") {
|
---|
| 195 | languagemap.importmap (cfgline);
|
---|
| 196 |
|
---|
| 197 | // update the list of languages in the filter information
|
---|
| 198 | text_tarray options;
|
---|
| 199 | languagemap.gettoarray (options);
|
---|
| 200 | filterOptions["Language"].validValues = options;
|
---|
[302] | 201 |
|
---|
[4738] | 202 | } else if (key == "defaultlanguage") {
|
---|
[302] | 203 | languagemap.from2to (cfgline[0], filterOptions["Language"].defaultValue);
|
---|
[9937] | 204 | } else if (key == "indexstem") {
|
---|
| 205 | indexstem = cfgline[0];
|
---|
[12314] | 206 | } else if (key == "maxnumeric") {
|
---|
| 207 | maxnumeric = cfgline[0].getint();
|
---|
[4738] | 208 | }
|
---|
[12314] | 209 |
|
---|
[227] | 210 | }
|
---|
| 211 |
|
---|
| 212 | bool queryfilterclass::init (ostream &logout) {
|
---|
| 213 | outconvertclass text_t2ascii;
|
---|
| 214 |
|
---|
| 215 | if (!filterclass::init(logout)) return false;
|
---|
| 216 |
|
---|
[4738] | 217 | if (filterOptions["Index"].defaultValue.empty()) {
|
---|
| 218 | // use first index in map as default if no default is set explicitly
|
---|
| 219 | text_tarray fromarray;
|
---|
| 220 | indexmap.getfromarray(fromarray);
|
---|
| 221 | if (fromarray.size()) {
|
---|
| 222 | filterOptions["Index"].defaultValue = fromarray[0];
|
---|
| 223 | }
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 | if (filterOptions["Subcollection"].defaultValue.empty()) {
|
---|
| 227 | // use first subcollection in map as default if no default is set explicitly
|
---|
| 228 | text_tarray fromarray;
|
---|
| 229 | subcollectionmap.getfromarray(fromarray);
|
---|
| 230 | if (fromarray.size()) {
|
---|
| 231 | filterOptions["Subcollection"].defaultValue = fromarray[0];
|
---|
| 232 | }
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | if (filterOptions["Language"].defaultValue.empty()) {
|
---|
| 236 | // use first language in map as default if no default is set explicitly
|
---|
| 237 | text_tarray fromarray;
|
---|
| 238 | languagemap.getfromarray(fromarray);
|
---|
| 239 | if (fromarray.size()) {
|
---|
| 240 | filterOptions["Language"].defaultValue = fromarray[0];
|
---|
| 241 | }
|
---|
| 242 | }
|
---|
| 243 |
|
---|
[15680] | 244 | if (db_ptr == NULL) {
|
---|
| 245 | // most likely a configuration problem
|
---|
| 246 | logout << text_t2ascii
|
---|
| 247 | << "configuration error: queryfilter contains a null dbclass\n\n";
|
---|
| 248 | return false;
|
---|
| 249 | }
|
---|
| 250 |
|
---|
[227] | 251 | // get the filename for the database and make sure it exists
|
---|
[9937] | 252 | if (indexstem.empty()) {
|
---|
| 253 | indexstem = collection;
|
---|
| 254 | }
|
---|
[16310] | 255 | db_filename = resolve_db_filename(indexstem,db_ptr->getfileextension());
|
---|
[15558] | 256 | if (!file_exists(db_filename)) {
|
---|
[227] | 257 | logout << text_t2ascii
|
---|
[15558] | 258 | << "warning: database \"" << db_filename << "\" does not exist\n\n";
|
---|
| 259 | //return false;
|
---|
[227] | 260 | }
|
---|
| 261 |
|
---|
| 262 | return true;
|
---|
| 263 | }
|
---|
| 264 |
|
---|
[27064] | 265 | void queryfilterclass::set_queryparam_defaults(queryparamclass &query ) {
|
---|
| 266 |
|
---|
| 267 | query.collection = collection;
|
---|
| 268 | query.index = filterOptions["Index"].defaultValue;
|
---|
| 269 | query.subcollection = filterOptions["Subcollection"].defaultValue;
|
---|
| 270 | query.language = filterOptions["Language"].defaultValue;
|
---|
| 271 | query.querystring.clear();
|
---|
| 272 | query.search_type = (filterOptions["QueryType"].defaultValue == "ranked");
|
---|
| 273 | query.match_mode = (filterOptions["MatchMode"].defaultValue == "all");
|
---|
| 274 | query.casefolding = (filterOptions["Casefold"].defaultValue == "true");
|
---|
| 275 | query.stemming = (filterOptions["Stem"].defaultValue == "true");
|
---|
| 276 | query.accentfolding = (filterOptions["AccentFold"].defaultValue == "true");
|
---|
| 277 | query.maxdocs = filterOptions["Maxdocs"].defaultValue.getint();
|
---|
| 278 | query.level = filterOptions["Level"].defaultValue;
|
---|
| 279 | query.maxnumeric = maxnumeric;
|
---|
| 280 |
|
---|
| 281 | }
|
---|
| 282 |
|
---|
| 283 | bool queryfilterclass::set_queryparam_field(OptionValue_t option, queryparamclass &query) {
|
---|
| 284 |
|
---|
| 285 | if (option.name == "QueryType") {
|
---|
| 286 | query.search_type = (option.value == "ranked");
|
---|
| 287 | } else if (option.name == "MatchMode") {
|
---|
| 288 | query.match_mode = (option.value == "all");
|
---|
| 289 | if (query.match_mode == 1) query.maxdocs = -1;
|
---|
| 290 | } else if (option.name == "Term") {
|
---|
| 291 | query.querystring = option.value;
|
---|
| 292 | } else if (option.name == "Casefold") {
|
---|
| 293 | query.casefolding = (option.value == "true");
|
---|
| 294 | } else if (option.name == "Stem") {
|
---|
| 295 | query.stemming = (option.value == "true");
|
---|
| 296 | } else if (option.name == "AccentFold") {
|
---|
| 297 | query.accentfolding = (option.value == "true");
|
---|
| 298 | } else if (option.name == "Index"&& option.value !="") {
|
---|
| 299 | query.index = option.value;
|
---|
| 300 | } else if (option.name == "Subcollection") {
|
---|
| 301 | query.subcollection = option.value;
|
---|
| 302 | } else if (option.name == "Language") {
|
---|
| 303 | query.language = option.value;
|
---|
| 304 | } else if (option.name == "Maxdocs") {
|
---|
| 305 | query.maxdocs = option.value.getint();
|
---|
| 306 | // } else if (option.name == "PhraseMatch") {
|
---|
| 307 | // phrasematch = option.value;
|
---|
| 308 | } else if (option.name == "Level") {
|
---|
| 309 | query.level = option.value;
|
---|
| 310 | } else if (option.name == "FilterString") {
|
---|
| 311 | query.filterstring = option.value;
|
---|
| 312 | } else if (option.name == "SortField") {
|
---|
| 313 | query.sortfield = option.value;
|
---|
| 314 | } else if (option.name == "SortOrder") {
|
---|
| 315 | query.sortorder = (option.value == "descending");
|
---|
| 316 | } else if (option.name == "Fuzziness") {
|
---|
| 317 | query.fuzziness = option.value;
|
---|
| 318 | }
|
---|
| 319 | }
|
---|
| 320 | // get the query parameters
|
---|
| 321 | void queryfilterclass::parse_query_params (const FilterRequest_t &request,
|
---|
| 322 | vector<queryparamclass> &query_params,
|
---|
| 323 | int &startresults, int &endresults,
|
---|
| 324 | ostream &logout) {
|
---|
| 325 | outconvertclass text_t2ascii;
|
---|
| 326 |
|
---|
| 327 | // set defaults for the return parameters
|
---|
| 328 | query_params.erase(query_params.begin(), query_params.end());
|
---|
| 329 | startresults = filterOptions["StartResults"].defaultValue.getint();
|
---|
| 330 | endresults = filterOptions["EndResults"].defaultValue.getint();
|
---|
| 331 |
|
---|
| 332 | // set defaults for query parameters
|
---|
| 333 | queryparamclass query;
|
---|
| 334 | query.combinequery = "or"; // first one must be "or"
|
---|
| 335 | set_queryparam_defaults(query);
|
---|
| 336 | OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
|
---|
| 337 | OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
|
---|
| 338 | while (options_here != options_end) {
|
---|
| 339 | if ((*options_here).name == "CombineQuery") {
|
---|
| 340 | // add this query
|
---|
| 341 |
|
---|
| 342 | // "all", needed when combining queries where the document results are needed
|
---|
| 343 | if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
|
---|
| 344 | query_params.push_back (query);
|
---|
| 345 |
|
---|
| 346 | // start on next query
|
---|
| 347 | query.clear();
|
---|
| 348 | query.combinequery = (*options_here).value;
|
---|
| 349 |
|
---|
| 350 | // set defaults for query parameters
|
---|
| 351 | set_queryparam_defaults(query);
|
---|
| 352 |
|
---|
| 353 | // "all", needed when combining queries where the document results are needed
|
---|
| 354 | if (need_matching_docs (request.filterResultOptions)) query.maxdocs = -1;
|
---|
| 355 |
|
---|
| 356 | } else if ((*options_here).name == "StartResults") {
|
---|
| 357 | startresults = (*options_here).value.getint();
|
---|
| 358 | } else if ((*options_here).name == "EndResults") {
|
---|
| 359 | endresults = (*options_here).value.getint();
|
---|
| 360 | } else if (!set_queryparam_field(*options_here, query)) {
|
---|
| 361 | logout << text_t2ascii
|
---|
| 362 | << "warning: unknown queryfilter option \""
|
---|
| 363 | << (*options_here).name
|
---|
| 364 | << "\" ignored.\n\n";
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | ++options_here;
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | // Store the start and end results in the query too, as lucene now needs to
|
---|
| 371 | // pass them through to the Java
|
---|
| 372 | query.startresults = startresults;
|
---|
| 373 | query.endresults = endresults;
|
---|
| 374 |
|
---|
| 375 | // add the last query
|
---|
| 376 | query_params.push_back (query);
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 |
|
---|
| 380 |
|
---|
| 381 | // translate will return true if successful
|
---|
| 382 | bool queryfilterclass::translate (dbclass *db_ptr, text_t& docnum, text_t &trans_OID) {
|
---|
| 383 | infodbclass info;
|
---|
| 384 |
|
---|
| 385 | trans_OID.clear();
|
---|
| 386 |
|
---|
| 387 | // get the info
|
---|
| 388 | if (db_ptr == NULL) return false;
|
---|
| 389 | if (!db_ptr->getinfo(docnum, info)) return false;
|
---|
| 390 |
|
---|
| 391 | // translate
|
---|
| 392 | if (info["section"].empty()) return false;
|
---|
| 393 |
|
---|
| 394 | trans_OID = info["section"];
|
---|
| 395 | return true;
|
---|
| 396 | }
|
---|
| 397 |
|
---|
| 398 |
|
---|
| 399 | // whether document results are needed
|
---|
| 400 | bool queryfilterclass::need_matching_docs (int filterResultOptions) {
|
---|
| 401 | return ((filterResultOptions & FROID) || (filterResultOptions & FRranking) ||
|
---|
| 402 | (filterResultOptions & FRmetadata));
|
---|
| 403 | }
|
---|
| 404 |
|
---|
| 405 | // whether term information is needed
|
---|
| 406 | bool queryfilterclass::need_term_info (int filterResultOptions) {
|
---|
| 407 | return ((filterResultOptions & FRtermFreq) || (filterResultOptions & FRmatchTerms));
|
---|
| 408 | }
|
---|