Context Navigation

source: trunk/gsdl/src/colservr/lucenesearch.cpp@ 9174

Last change on this file since 9174 was 9115, checked in by mdewsnip, 19 years ago
Added #ifdefs for slightly different popen and pclose on Windows.
Property svn:keywords set to `Author Date Id Revision`
File size: 9.2 KB

Line
1	/**********************************************************************
2	*
3	* lucenesearch.cpp --
4	* Copyright (C) 1999-2002 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26
27	#if defined(GSDL_USE_OBJECTSPACE)
28	# include <ospace\std\iostream>
29	#elif defined(GSDL_USE_IOS_H)
30	# include <iostream.h>
31	#else
32	# include <iostream>
33	#endif
34
35	#include <stdio.h>
36
37	#include "gsdlconf.h"
38	#include "lucenesearch.h"
39	#include "fileutil.h"
40	#include "queryinfo.h"
41	#include "gsdlunicode.h"
42
43	#include "expat_resultset.h"
44
45
46	static text_t getindexsuffix(const queryparamclass &qp) {
47	text_t indexsuffix = "index";
48	// get the first char of the level to be the start of the index name
49	text_t suffix = substr(qp.level.begin(), qp.level.begin()+1);
50	lc(suffix);
51	text_t ind = qp.index;
52	text_t sub = qp.subcollection;
53	text_t lang = qp.language;
54
55	// collection name not added for Lucene
56	indexsuffix = filename_cat(indexsuffix, suffix +ind + sub + lang);
57	return indexsuffix;
58
59	}
60
61	////////////////////
62	// lucenesearch class //
63	////////////////////
64
65	lucenesearchclass::lucenesearchclass ()
66	: searchclass() {
67
68	gdbm_level = "Doc";
69	}
70
71	lucenesearchclass::~lucenesearchclass ()
72	{
73	if (cache != NULL)
74	{
75	delete cache;
76	cache = NULL;
77	}
78	}
79
80	void lucenesearchclass::set_gdbm_level(const text_t &level) {
81	gdbm_level = level;
82
83	}
84
85
86	bool lucenesearchclass::search(const queryparamclass &queryparams,
87	queryresultsclass &queryresult) {
88
89	#ifdef __WIN32__
90	char basepath[]="";
91	#else
92	char basepath[] = "/";
93	#endif
94
95	cerr << "**** in luecen search" << endl;
96
97	char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
98
99	// set default stem method from values originally set on prefs page
100	int defaultStemMethod = 0;
101	if (queryparams.casefolding) {
102	defaultStemMethod \|= 1;
103	}
104	if (queryparams.stemming) {
105	defaultStemMethod \|= 2;
106	}
107
108	// set default Boolean combiner from all/some setting
109	// if match_mode == 1, ie all, default=1 ie AND
110	// if match_mode == 0, ie some, default=0, ie OR
111	int defaultBoolCombine = 0;
112	if (queryparams.match_mode){
113	defaultBoolCombine = 1;
114	}
115	char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
116	cerr << "**** query string = " << utf8querystring << endl;
117	cerr << "***** index name = " << indexname << endl;
118
119	text_t cmd = "lucene_query.pl ";
120	cmd += indexname + (text_t)" \"" + to_utf8(queryparams.querystring) + (text_t)"\"";
121
122	#ifdef __WIN32__
123	FILE *PIN = _popen(cmd.getcstr(), "r");
124	#else
125	FILE *PIN = popen(cmd.getcstr(), "r");
126	#endif
127	if (PIN==NULL) {
128	cerr << "Error: unable to open pipe to " << cmd << endl;
129	return false;
130	}
131
132	text_t xml_text = "";
133
134	while (!feof(PIN)) {
135	char buffer[256];
136	int num_bytes = fread(buffer,1,256,PIN);
137	xml_text.appendcarr(buffer,num_bytes);
138	}
139
140	expat_resultset(xml_text,queryresult);
141
142	#ifdef __WIN32__
143	_pclose(PIN);
144	#else
145	pclose(PIN);
146	#endif
147
148	return true;
149
150	/*
151	// use default query info settings - change to reflect user preferences??
152	QueryInfo queryInfo;
153
154	SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
155	queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
156	queryInfo.sortByRank = (queryparams.search_type == 1);
157	queryInfo.exactWeights = false;
158	queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
159	queryInfo.needTermFreqs = true;
160
161	ExtQueryResult queryResult;
162
163	UCArray queryArray;
164	// greenstone gives us the query encoded in unicode. We want utf8.
165	char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
166	SetCStr(queryArray, utf8querystring);
167	delete utf8querystring;
168
169	UCArray level;
170	UCArrayClear(level);
171
172	//set the level for results
173	SetCStr(level, gdbm_level.getcstr());
174
175
176	// do the query
177	// LuceneQuery(indexData, queryInfo, queryTree, queryResult, level); // ***
178
179
180	// convert ExtQueryResult to queryresultclass
181
182	queryresult.docs_matched = (int)queryResult.docs.size();
183
184	if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
185	queryresult.is_approx = Exact;
186	}
187	else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
188	queryresult.is_approx = MoreThan;
189	}
190	else {
191	queryresult.is_approx = Approximate;
192	}
193
194	docresultclass doc;
195	for (int i=0; i<(int)queryResult.docs.size(); i++) {
196	doc.clear();
197	doc.docnum = (int)queryResult.levels[i];
198	doc.docweight = queryResult.ranks[i];
199	queryresult.docs.docset[doc.docnum] = doc;
200	queryresult.docs.docorder.push_back(doc.docnum);
201
202	}
203
204	// term info
205	termfreqclass term;
206	for (int k=0; k<(int)queryResult.termFreqs.size(); k++) {
207	term.clear();
208	char* termfreq_cstr=GetCStr(queryResult.termFreqs[k].term);
209	term.termstr = to_uni(termfreq_cstr);
210	delete termfreq_cstr;
211	term.termstemstr = term.termstr;
212	// we don't set term.utf8equivterms ?? - jrm21
213	term.termfreq = queryResult.termFreqs[k].termFreq;
214	queryresult.terms.push_back(term);
215	queryresult.orgterms.push_back(term); // should this change??
216
217	for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); j++) {
218	char* equivterm_cstr=GetCStr(queryResult.termFreqs[k].equivTerms[j]);
219	queryresult.termvariants.insert(to_uni(equivterm_cstr));
220	delete equivterm_cstr;
221	}
222
223	}
224	// clean up
225	delete indexname;
226	return true;
227	*/
228
229	return false;
230
231	}
232
233
234	bool lucenesearchclass::browse_search(const queryparamclass &queryparams,
235	int start, int numDocs,
236	queryresultsclass &queryresult) {
237
238	cerr << "**** Not sure what this function does!" << endl;
239
240	/*
241	#ifdef __WIN32__
242	char basepath[]="";
243	#else
244	char basepath[] = "/";
245	#endif
246
247	char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
248
249	UCArray level;
250	UCArrayClear(level);
251
252	//browse always at top level
253	SetCStr(level, "Doc"); // this name may change.
254
255
256	BrowseQueryNode browseNode;
257	browseNode.startPosition = start;
258	browseNode.numTerms = numDocs;
259
260	BrowseQueryResult browseResult;
261
262
263	UCArrayClear(browseNode.term);
264	// greenstone gives us the query encoded in unicode. We want utf8.
265	char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
266	SetCStr(browseNode.term, utf8querystring);
267	delete utf8querystring;
268
269	// do the actual query
270	// LuceneBrowseQuery(indexData, level, browseNode, browseResult); // ***
271
272	// load results into term info
273	termfreqclass term;
274	for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
275	term.clear();
276	char* term_cstr = GetCStr(browseResult.termFreqs[i].term);
277	term.termstr = to_uni(term_cstr);
278	delete term_cstr;
279	term.termstemstr = term.termstr;
280	term.termfreq = browseResult.termFreqs[i].termFreq;
281	queryresult.terms.push_back(term);
282	queryresult.orgterms.push_back(term);
283
284	}
285	// clean up
286	delete indexname;
287
288	return true;
289
290	*/
291
292	return false;
293	}
294
295	// the document text for 'docnum' is placed in 'output'
296	// docTargetDocument returns 'true' if it was able to
297	// try to get a document
298	// collection is needed to see if an index from the
299	// collection is loaded. THe default index bits are just there cos
300	// the mg version needs them
301
302	bool lucenesearchclass::docTargetDocument(const text_t &/defaultindex/,
303	const text_t &/defaultsubcollection/,
304	const text_t &/defaultlanguage/,
305	const text_t &collection,
306	int docnum,
307	text_t &output) {
308
309	cerr << "**** Should return document text here!" << endl;
310
311	/*
312	#ifdef __WIN32__
313	char basepath[]="";
314	#else
315	char basepath[] = "/";
316	#endif
317	char *textname = (filename_cat(collectdir, "index", "text", collection)).getcstr();;
318
319
320	TextData textdata;
321	if(!textdata.LoadData(basepath, textname)) {
322	cout<<"couldn't load text data\n"<<endl;
323	return false;
324	}
325
326	UCArray doctext;
327	UCArray level;
328	SetCStr(level, gdbm_level.getcstr());
329	if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
330	cout<<"couldn't retrieve document text\n";
331	return false;
332	}
333
334	// convert UCArray to text_t
335	output.clear();
336	char* doctext_cstr = GetCStr(doctext);
337	output = to_uni(doctext_cstr); // convert from utf-8 to unicode
338	delete doctext_cstr;
339
340	// here need to remove the <Document>, <Section>, <Paragraph> tags
341
342
343	//clean up
344	textdata.UnloadData ();
345	delete textname;
346
347	return true;
348
349	*/
350
351	return false;
352	}
353
354	// used to clear any cached databases for persistent versions of
355	// Greenstone like the Windows local library
356	void lucenesearchclass::unload_database () {
357	}
358
359
360
361
362

Note: See TracBrowser for help on using the repository browser.

Download in other formats: