/********************************************************************** * * lucenesearch.cpp -- * Copyright (C) 1999-2002 The New Zealand Digital Library Project * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_IOS_H) # include #else # include #endif #include #include #include "gsdlconf.h" #include "gsdltools.h" #include "lucenesearch.h" #include "fileutil.h" #include "queryinfo.h" #include "gsdlunicode.h" #include "expat_resultset.h" text_t lucenesearchclass::getindexsuffix(const queryparamclass &qp) { text_t indexsuffix = "index"; // get the first char of the level to be the start of the index name text_t suffix = substr(qp.level.begin(), qp.level.begin()+1); lc(suffix); text_t ind = qp.index; text_t sub = qp.subcollection; text_t lang = qp.language; // collection name not added for Lucene indexsuffix = filename_cat(indexsuffix, suffix +ind + sub + lang); return indexsuffix; } //////////////////// // lucenesearch class // //////////////////// lucenesearchclass::lucenesearchclass () : searchclass() { gdbm_level = "Doc"; } lucenesearchclass::~lucenesearchclass () { if (cache != NULL) { delete cache; cache = NULL; } } void lucenesearchclass::set_gdbm_level(const text_t &level) { gdbm_level = level; } bool lucenesearchclass::search(const queryparamclass &queryparams, queryresultsclass &queryresult) { #ifdef __WIN32__ char basepath[]=""; #else char basepath[] = "/"; #endif cerr << "**** in lucene search" << endl; char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr(); // set default stem method from values originally set on prefs page int defaultStemMethod = 0; if (queryparams.casefolding) { defaultStemMethod |= 1; } if (queryparams.stemming) { defaultStemMethod |= 2; } // set default Boolean combiner from all/some setting // if match_mode == 1, ie all, default=1 ie AND // if match_mode == 0, ie some, default=0, ie OR int defaultBoolCombine = 0; if (queryparams.match_mode){ defaultBoolCombine = 1; } text_t utf8querystring = to_utf8(queryparams.querystring); cerr << "**** query string = " << utf8querystring << endl; text_t escaped_utf8querystring = ""; text_t::const_iterator here = utf8querystring.begin(); while (here != utf8querystring.end()) { if (*here == '"') escaped_utf8querystring.push_back('\\'); escaped_utf8querystring.push_back(*here); here++; } cerr << "**** escaped query string = " << escaped_utf8querystring << endl; cerr << "***** index name = " << indexname << endl; text_t cmd = "\"" + filename_cat(gsdlhome, "bin", "script", "lucene_query.pl") + "\""; cmd += (text_t)" \""+indexname + (text_t)"\" \"" + escaped_utf8querystring + (text_t)"\""; cerr << "Lucene command: " << cmd << endl; text_t xml_text = ""; // I don't want to do this, but I have to. text_t gsdlhome_env = "GSDLHOME=" + gsdlhome; putenv(gsdlhome_env.getcstr()); #ifdef __WIN32__ putenv("GSDLOS=windows"); //FILE *PIN = _popen(cmd.getcstr(), "r"); // didn't seem to work cmd = (text_t)"perl -S "+cmd; // we write the result to a file clock_t this_time = clock(); text_t filename = "luc"; filename.append(this_time); filename.append(".txt"); text_t out_file = filename_cat(collectdir, filename); cmd += (text_t)" \""+out_file+ (text_t)"\""; int rv = gsdl_system(cmd, true, cerr); if (rv != 0) { cerr << "tried to run command \""<