Context Navigation

source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 2701

Last change on this file since 2701 was 2701, checked in by kjm18, 23 years ago
previous changes broke linux version. all fixed now hopefully :-)
Property svn:keywords set to `Author Date Id Revision`
File size: 7.8 KB

Line
1	/**********************************************************************
2	*
3	* mgppsearch.cpp --
4	* Copyright (C) 1999 The New Zealand Digital Library Project
5	*
6	* A component of the Greenstone digital library software
7	* from the New Zealand Digital Library Project at the
8	* University of Waikato, New Zealand.
9	*
10	* This program is free software; you can redistribute it and/or modify
11	* it under the terms of the GNU General Public License as published by
12	* the Free Software Foundation; either version 2 of the License, or
13	* (at your option) any later version.
14	*
15	* This program is distributed in the hope that it will be useful,
16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18	* GNU General Public License for more details.
19	*
20	* You should have received a copy of the GNU General Public License
21	* along with this program; if not, write to the Free Software
22	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23	*
24	*********************************************************************/
25
26
27	#include "gsdlconf.h"
28	#include "mgppsearch.h"
29	#include "fileutil.h"
30	#include "GSDLQueryParser.h"
31	#include "MGQuery.h"
32	#include "TextGet.h"
33	#include "queryinfo.h"
34
35
36
37	static text_t getindexsuffix(const queryparamclass &qp) {
38	text_t indexsuffix = "index";
39	text_t ind = qp.index;
40	text_t sub = qp.subcollection;
41	text_t lang = qp.language;
42
43	indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44	return indexsuffix;
45
46	}
47
48	////////////////////
49	// mgppsearch class //
50	////////////////////
51
52	mgppsearchclass::mgppsearchclass ()
53	: searchclass() {
54
55	gdbm_level = "Document";
56	}
57
58	mgppsearchclass::~mgppsearchclass ()
59	{
60	if (cache != NULL)
61	{
62	delete cache;
63	cache = NULL;
64	}
65
66	if (indexData !=NULL) {
67	indexData->UnloadData();
68	delete indexData;
69	indexData = NULL;
70	}
71
72	}
73
74	void mgppsearchclass::set_gdbm_level(text_t &level) {
75	gdbm_level = level;
76
77	}
78
79	bool mgppsearchclass::search(const queryparamclass &queryparams,
80	queryresultsclass &queryresult) {
81
82	#ifdef __WIN32__
83	char basepath[]="";
84	#else
85	char basepath[] = "/";
86	#endif
87
88	char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
89
90	// load index data
91	if (indexData == NULL) {
92	indexData = new IndexData();
93	}
94	if (!indexData->LoadData (basepath, indexname)) {
95	cerr<<"couldn't load index data\n"<<endl;
96	return false;
97	}
98
99	// set default stem method from values originally set on prefs page
100	int defaultStemMethod = 0;
101	if (queryparams.casefolding) {
102	defaultStemMethod \|= 1;
103	}
104	if (queryparams.stemming) {
105	defaultStemMethod \|= 2;
106	}
107
108	// set default Boolean combiner from all/some setting
109	// if match_mode == 1, ie all, default=1 ie AND
110	// if match_mode == 0, ie some, default=0, ie OR
111	int defaultBoolCombine = 0;
112	if (queryparams.match_mode){
113	defaultBoolCombine = 1;
114	}
115
116	// use default query info settings - change to reflect user preferences??
117	QueryInfo queryInfo;
118	SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
119	queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
120	queryInfo.sortByRank = (queryparams.search_type == 1);
121	queryInfo.exactWeights = false;
122	queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
123	queryInfo.needTermFreqs = true;
124
125	ExtQueryResult queryResult;
126
127	UCArray queryArray;
128	SetCStr(queryArray, (queryparams.querystring.getcstr()));
129
130	// create the mgpp query tree
131	QueryNode *queryTree = NULL;
132	queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
133
134	UCArray level;
135	UCArrayClear(level);
136
137	//set the level for results
138	SetCStr(level, gdbm_level.getcstr());
139
140
141	// do the query
142	MGQuery(*indexData, queryInfo, queryTree, queryResult, level);
143
144
145	// convert ExtQueryResult to queryresultclass
146
147	queryresult.docs_matched = (int)queryResult.docs.size();
148
149	if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
150	queryresult.is_approx = Exact;
151	}
152	else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
153	queryresult.is_approx = MoreThan;
154	}
155	else {
156	queryresult.is_approx = Approximate;
157	}
158
159	docresultclass doc;
160	for (int i=0; i<(int)queryResult.docs.size(); i++) {
161	doc.clear();
162	doc.docnum = (int)queryResult.levels[i];
163	doc.docweight = queryResult.ranks[i];
164	queryresult.docs.docset[doc.docnum] = doc;
165	queryresult.docs.docorder.push_back(doc.docnum);
166
167	}
168
169	// term info
170	termfreqclass term;
171	for (int k=0; k<(int)queryResult.termFreqs.size(); k++) {
172	term.clear();
173	term.termstr = GetCStr(queryResult.termFreqs[k].term);
174	term.termstemstr = term.termstr;
175	term.termfreq = queryResult.termFreqs[k].termFreq;
176	queryresult.terms.push_back(term);
177	queryresult.orgterms.push_back(term); // should this change??
178
179	for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); j++) {
180	queryresult.termvariants.insert(GetCStr(queryResult.termFreqs[k].equivTerms[j]));
181	}
182
183	}
184	// clean up
185	delete indexname;
186	return true;
187
188	}
189
190
191	bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
192	queryresultsclass &queryresult) {
193
194	#ifdef __WIN32__
195	char basepath[]="";
196	#else
197	char basepath[] = "/";
198	#endif
199
200	char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
201
202	if (indexData == NULL) {
203	indexData = new IndexData();
204	}
205	if (!indexData->LoadData (basepath, indexname)) {
206	cerr<<"couldn't load index data\n"<<endl;
207	return false;
208	}
209
210	UCArray level;
211	UCArrayClear(level);
212
213	//browse always at top level
214	SetCStr(level, "Document");
215
216
217	BrowseQueryNode browseNode;
218	browseNode.startPosition = start;
219	browseNode.numTerms = numDocs;
220
221	BrowseQueryResult browseResult;
222
223	UCArrayClear(browseNode.term);
224	SetCStr(browseNode.term, (queryparams.querystring.getcstr()));
225
226	// do the actual query
227	MGBrowseQuery(*indexData, level, browseNode, browseResult);
228
229	// load results into term info
230	termfreqclass term;
231	for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
232	term.clear();
233	term.termstr = GetCStr(browseResult.termFreqs[i].term);
234	term.termstemstr = term.termstr;
235	term.termfreq = browseResult.termFreqs[i].termFreq;
236	queryresult.terms.push_back(term);
237	queryresult.orgterms.push_back(term);
238
239	}
240	// clean up
241	delete indexname;
242
243	return true;
244	}
245
246	// the document text for 'docnum' is placed in 'output'
247	// docTargetDocument returns 'true' if it was able to
248	// try to get a document
249	// collection is needed to see if an index from the
250	// collection is loaded. THe default index bits are just there cos
251	// the mg version needs them
252
253	bool mgppsearchclass::docTargetDocument(const text_t &/defaultindex/,
254	const text_t &/defaultsubcollection/,
255	const text_t &/defaultlanguage/,
256	const text_t &collection,
257	int docnum,
258	text_t &output) {
259
260	#ifdef __WIN32__
261	char basepath[]="";
262	#else
263	char basepath[] = "/";
264	#endif
265	char *textname = (filename_cat(collectdir, "index", "text", collection)).getcstr();;
266
267	TextData textdata;
268	if(!textdata.LoadData(basepath, textname)) {
269	cout<<"couldn't load text data\n"<<endl;
270	return false;
271	}
272	UCArray doctext;
273	UCArray level;
274	SetCStr(level, gdbm_level.getcstr());
275	if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
276	cout<<"couldn't retrieve document text\n";
277	return false;
278	}
279
280	// convert UCArray to text_t
281	output.clear();
282	output = GetCStr(doctext);
283
284	// here need to remove the <Document>, <Section>, <Paragraph> tags
285
286	// mg converts to unicode, this may need to be added here???
287
288	//clean up
289	textdata.UnloadData ();
290	delete textname;
291
292	return true;
293
294	}
295
296	// used to clear any cached databases for persistent versions of
297	// Greenstone like the Windows local library
298	void mgppsearchclass::unload_database () {
299
300	if (indexData !=NULL) {
301	indexData->UnloadData();
302	}
303	}
304
305
306
307
308

Note: See TracBrowser for help on using the repository browser.

Download in other formats: