/********************************************************************** * * lucenesearch.cpp -- * Copyright (C) 1999-2002 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_IOS_H) # include #else # include #endif #include #include #include "gsdlconf.h" #include "gsdltools.h" #include "lucenesearch.h" #include "fileutil.h" #include "queryinfo.h" #include "gsdlunicode.h" #include "expat_resultset.h" text_t lucenesearchclass::getindexsuffix(const queryparamclass &qp) { text_t indexsuffix = "index"; // get the first char of the level to be the start of the index name text_t suffix = substr(qp.level.begin(), qp.level.begin()+1); lc(suffix); text_t ind = qp.index; text_t sub = qp.subcollection; text_t lang = qp.language; // collection name not added for Lucene indexsuffix = filename_cat(indexsuffix, suffix +ind + sub + lang); return indexsuffix; } //////////////////// // lucenesearch class // //////////////////// lucenesearchclass::lucenesearchclass () : searchclass() { gdbm_level = "Doc"; } lucenesearchclass::~lucenesearchclass () { if (cache != NULL) { delete cache; cache = NULL; } } void lucenesearchclass::set_gdbm_level(const text_t &level) { gdbm_level = level; } bool lucenesearchclass::search(const queryparamclass &queryparams, queryresultsclass &queryresult) { #ifdef __WIN32__ char basepath[]=""; #else char basepath[] = "/"; #endif char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr(); // set default stem method from values originally set on prefs page int defaultStemMethod = 0; if (queryparams.casefolding) { defaultStemMethod |= 1; } if (queryparams.stemming) { defaultStemMethod |= 2; } text_t utf8querystring = to_utf8(queryparams.querystring); text_t escaped_utf8querystring = ""; text_t::const_iterator here = utf8querystring.begin(); while (here != utf8querystring.end()) { if (*here == '"') escaped_utf8querystring.push_back('\\'); escaped_utf8querystring.push_back(*here); ++here; } // Use this to skip lucene_query.pl and access GS2LuceneQuery directly (Java must be on path) // text_t cmd = "java -classpath \"" + filename_cat(gsdlhome, "bin", "java", "LuceneWrap.jar") + "\" org.nzdl.gsdl.LuceneWrap.GS2LuceneQuery "; text_t cmd = "\"" + filename_cat(gsdlhome, "bin", "script", "lucene_query.pl") + "\""; cmd += (text_t)" \""+indexname + (text_t)"\" \"" + escaped_utf8querystring + (text_t)"\""; if (!queryparams.filterstring.empty()) { cmd += " -filter \"" + queryparams.filterstring + "\""; } if (!queryparams.sortfield.empty()) { cmd += " -sort \"" + queryparams.sortfield + "\""; } if (!queryparams.fuzziness.empty()) { cmd += " -fuzziness " + queryparams.fuzziness; } // New code to support configuration of the default conjuction operator // set default Boolean combiner from all/some setting // if match_mode == 1, ie all, default=1 ie AND // if match_mode == 0, ie some, default=0, ie OR if (queryparams.match_mode) { cmd += " -dco AND"; } // New code to allow Lucene to do paging of search results. This should // substantially improve performance as we don't have to return all 12000 // hits if we only need the first 20! if (queryparams.startresults && queryparams.endresults) { cmd += (text_t)" -startresults " + queryparams.startresults; cmd += (text_t)" -endresults " + queryparams.endresults; } cerr << "Lucene command: " << cmd << endl; text_t xml_text = ""; // I don't want to do this, but I have to. text_t gsdlhome_env = "GSDLHOME=" + gsdlhome; putenv(gsdlhome_env.getcstr()); #ifdef __WIN32__ putenv("GSDLOS=windows"); //FILE *PIN = _popen(cmd.getcstr(), "r"); // didn't seem to work cmd = (text_t)"perl -S "+cmd; // we write the result to a file clock_t this_time = clock(); text_t filename = "luc"; filename.append(this_time); filename.append(".txt"); text_t out_file = filename_cat(collectdir, filename); cmd += " -out \"" + out_file + "\""; int rv = gsdl_system(cmd, true, cerr); if (rv != 0) { cerr << "tried to run command \""<