source: gsdl/trunk/src/colservr/lucenesearch.cpp@ 16445

Last change on this file since 16445 was 16310, checked in by davidb, 16 years ago

Introduction of 'collecthome' which parallels 'gsdlhome' to allow the toplevel collect folder to be outside of the gsdlhome area

  • Property svn:keywords set to Author Date Id Revision
File size: 6.8 KB
Line 
1/**********************************************************************
2 *
3 * lucenesearch.cpp --
4 * Copyright (C) 1999-2002 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#if defined(GSDL_USE_OBJECTSPACE)
28# include <ospace\std\iostream>
29#elif defined(GSDL_USE_IOS_H)
30# include <iostream.h>
31#else
32# include <iostream>
33#endif
34
35#include <stdio.h>
36#include <time.h>
37
38#include "gsdlconf.h"
39#include "gsdltools.h"
40#include "lucenesearch.h"
41#include "fileutil.h"
42#include "queryinfo.h"
43#include "gsdlunicode.h"
44
45#include "expat_resultset.h"
46
47text_t lucenesearchclass::getindexsuffix(const queryparamclass &qp) {
48 text_t indexsuffix = "index";
49 // get the first char of the level to be the start of the index name
50 text_t suffix = substr(qp.level.begin(), qp.level.begin()+1);
51 lc(suffix);
52 text_t ind = qp.index;
53 text_t sub = qp.subcollection;
54 text_t lang = qp.language;
55
56 // collection name not added for Lucene
57 indexsuffix = filename_cat(indexsuffix, suffix +ind + sub + lang);
58 return indexsuffix;
59
60}
61
62////////////////////
63// lucenesearch class //
64////////////////////
65
66lucenesearchclass::lucenesearchclass ()
67 : searchclass() {
68
69 textlevel = "Doc";
70}
71
72lucenesearchclass::~lucenesearchclass ()
73{
74 if (cache != NULL)
75 {
76 delete cache;
77 cache = NULL;
78 }
79}
80
81void lucenesearchclass::set_text_level(const text_t &textlevel_arg)
82{
83 textlevel = textlevel_arg;
84}
85
86
87bool lucenesearchclass::search(const queryparamclass &queryparams,
88 queryresultsclass &queryresult) {
89
90#ifdef __WIN32__
91 char basepath[]="";
92#else
93 char basepath[] = "/";
94#endif
95
96 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
97
98 // set default stem method from values originally set on prefs page
99 int defaultStemMethod = 0;
100 if (queryparams.casefolding) {
101 defaultStemMethod |= 1;
102 }
103 if (queryparams.stemming) {
104 defaultStemMethod |= 2;
105 }
106
107 text_t utf8querystring = to_utf8(queryparams.querystring);
108
109 text_t escaped_utf8querystring = "";
110 text_t::const_iterator here = utf8querystring.begin();
111 while (here != utf8querystring.end()) {
112 if (*here == '"') escaped_utf8querystring.push_back('\\');
113 escaped_utf8querystring.push_back(*here);
114 ++here;
115 }
116
117 // Use this to skip lucene_query.pl and access GS2LuceneQuery directly (Java must be on path)
118 // text_t cmd = "java -classpath \"" + filename_cat(gsdlhome, "bin", "java", "LuceneWrapper.jar") + "\" org.greenstone.LuceneWrapper.GS2LuceneQuery ";
119 text_t cmd = "\"" + filename_cat(gsdlhome, "bin", "script", "lucene_query.pl") + "\"";
120 cmd += (text_t)" \""+indexname + (text_t)"\" \"" + escaped_utf8querystring + (text_t)"\"";
121
122 if (!queryparams.filterstring.empty()) {
123 cmd += " -filter \"" + queryparams.filterstring + "\"";
124 }
125 if (!queryparams.sortfield.empty()) {
126 cmd += " -sort \"" + queryparams.sortfield + "\"";
127 }
128 if (!queryparams.fuzziness.empty()) {
129 cmd += " -fuzziness " + queryparams.fuzziness;
130 }
131
132 // New code to support configuration of the default conjuction operator
133 // set default Boolean combiner from all/some setting
134 // if match_mode == 1, ie all, default=1 ie AND
135 // if match_mode == 0, ie some, default=0, ie OR
136 if (queryparams.match_mode)
137 {
138 cmd += " -dco AND";
139 }
140
141 // New code to allow Lucene to do paging of search results. This should
142 // substantially improve performance as we don't have to return all 12000
143 // hits if we only need the first 20!
144 if (queryparams.startresults && queryparams.endresults)
145 {
146 cmd += (text_t)" -startresults " + queryparams.startresults;
147 cmd += (text_t)" -endresults " + queryparams.endresults;
148 }
149
150 text_t xml_text = "";
151
152 // I don't want to do this, but I have to.
153 text_t gsdlhome_env = "GSDLHOME=" + gsdlhome;
154 putenv(gsdlhome_env.getcstr());
155
156#ifdef __WIN32__
157 putenv("GSDLOS=windows");
158
159 //FILE *PIN = _popen(cmd.getcstr(), "r"); // didn't seem to work
160 cmd = (text_t)"perl -S "+cmd;
161 // we write the result to a file
162 clock_t this_time = clock();
163 text_t filename = "luc";
164 filename.append(this_time);
165 filename.append(".txt");
166
167 text_t out_file = filename_cat(collectdir, filename);
168 cmd += " -out \"" + out_file + "\"";
169 int rv = gsdl_system(cmd, true, cerr);
170 if (rv != 0) {
171 cerr << "tried to run command \""<<cmd<<"\", but it failed\n";
172 } else {
173 read_file(out_file, xml_text);
174 remove(out_file.getcstr()); // now delete it
175 }
176#else
177 putenv("GSDLOS=linux");
178
179 FILE *PIN = popen(cmd.getcstr(), "r");
180
181 if (PIN==NULL) {
182 perror("PIPE");
183 cerr << "Error: unable to open pipe to " << cmd << endl;
184
185 return false;
186 }
187 while (!feof(PIN)) {
188 char buffer[256];
189 int num_bytes = fread(buffer,1,256,PIN);
190 xml_text.appendcarr(buffer,num_bytes);
191 }
192
193#endif
194
195 expat_resultset(xml_text,queryresult);
196
197#ifdef __WIN32__
198 // _pclose(PIN);
199#else
200 pclose(PIN);
201#endif
202
203 return true;
204}
205
206
207bool lucenesearchclass::browse_search(const queryparamclass &queryparams,
208 int start, int numDocs,
209 queryresultsclass &queryresult) {
210
211 cerr << "**** Not sure what this function does!" << endl;
212 return false;
213}
214
215// the document text for 'docnum' is placed in 'output'
216// docTargetDocument returns 'true' if it was able to
217// try to get a document
218// collection is needed to see if an index from the
219// collection is loaded. THe default index bits are just there cos
220// the mg version needs them
221
222bool lucenesearchclass::docTargetDocument(const text_t &/*defaultindex*/,
223 const text_t &/*defaultsubcollection*/,
224 const text_t &/*defaultlanguage*/,
225 const text_t &collection,
226 int docnum,
227 text_t &output) {
228
229 // we now get the document directly by lucenesource, so don't use this
230 // method
231 return false;
232}
233
234// used to clear any cached databases for persistent versions of
235// Greenstone like the Windows local library
236void lucenesearchclass::unload_database () {
237}
238
239void lucenesearchclass::set_gsdlhome (const text_t &gh)
240{
241 gsdlhome = gh;
242}
Note: See TracBrowser for help on using the repository browser.