source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 9631

Last change on this file since 9631 was 9631, checked in by kjdon, 19 years ago

delete -> delete [] change submitted by Emanuel Dejanu

  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/**********************************************************************
2 *
3 * mgppsearch.cpp --
4 * Copyright (C) 1999-2002 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "gsdlconf.h"
28#include "mgppsearch.h"
29#include "fileutil.h"
30#include "GSDLQueryParser.h"
31#include "MGQuery.h"
32#include "TextGet.h"
33#include "queryinfo.h"
34#include "gsdlunicode.h"
35
36
37static text_t getindexsuffix(const queryparamclass &qp) {
38 text_t indexsuffix = "index";
39 text_t ind = qp.index;
40 text_t sub = qp.subcollection;
41 text_t lang = qp.language;
42
43 indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44 return indexsuffix;
45
46}
47
48////////////////////
49// mgppsearch class //
50////////////////////
51
52mgppsearchclass::mgppsearchclass ()
53 : searchclass() {
54
55 gdbm_level = "Doc";
56 indexData = NULL;
57}
58
59mgppsearchclass::~mgppsearchclass ()
60{
61 if (cache != NULL)
62 {
63 delete cache;
64 cache = NULL;
65 }
66
67 if (indexData !=NULL) {
68 indexData->UnloadData();
69 delete indexData;
70 indexData = NULL;
71 }
72
73}
74
75void mgppsearchclass::set_gdbm_level(const text_t &level) {
76 gdbm_level = level;
77
78}
79
80
81bool mgppsearchclass::search(const queryparamclass &queryparams,
82 queryresultsclass &queryresult) {
83
84#ifdef __WIN32__
85 char basepath[]="";
86#else
87 char basepath[] = "/";
88#endif
89
90 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
91
92 // load index data
93 if (indexData == NULL) {
94 indexData = new IndexData();
95 }
96 if (!indexData->LoadData (basepath, indexname)) {
97 cerr<<"couldn't load index data\n"<<endl;
98 return false;
99 }
100
101 // set default stem method from values originally set on prefs page
102 int defaultStemMethod = 0;
103 if (queryparams.casefolding) {
104 defaultStemMethod |= 1;
105 }
106 if (queryparams.stemming) {
107 defaultStemMethod |= 2;
108 }
109
110 // set default Boolean combiner from all/some setting
111 // if match_mode == 1, ie all, default=1 ie AND
112 // if match_mode == 0, ie some, default=0, ie OR
113 int defaultBoolCombine = 0;
114 if (queryparams.match_mode){
115 defaultBoolCombine = 1;
116 }
117
118 // use default query info settings - change to reflect user preferences??
119 QueryInfo queryInfo;
120
121 SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
122 queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
123 queryInfo.sortByRank = (queryparams.search_type == 1);
124 queryInfo.exactWeights = false;
125 queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
126 queryInfo.needTermFreqs = true;
127
128 ExtQueryResult queryResult;
129
130 UCArray queryArray;
131 // greenstone gives us the query encoded in unicode. We want utf8.
132 char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
133 SetCStr(queryArray, utf8querystring);
134 delete []utf8querystring;
135
136 // create the mgpp query tree
137 QueryNode *queryTree = NULL;
138 queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
139 if (queryTree == NULL) { // syntax error
140 queryresult.syntax_error = true;
141 return true; // should we return true or false?
142 }
143 UCArray level;
144 UCArrayClear(level);
145
146 //set the level for results
147 SetCStr(level, gdbm_level.getcstr());
148
149
150 // do the query
151 MGQuery(*indexData, queryInfo, queryTree, queryResult, level);
152
153
154 // convert ExtQueryResult to queryresultclass
155
156 queryresult.docs_matched = (int)queryResult.docs.size();
157
158 if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
159 queryresult.is_approx = Exact;
160 }
161 else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
162 queryresult.is_approx = MoreThan;
163 }
164 else {
165 queryresult.is_approx = Approximate;
166 }
167
168 docresultclass doc;
169 for (int i=0; i<(int)queryResult.docs.size(); ++i) {
170 doc.clear();
171 doc.docnum = (int)queryResult.levels[i];
172 doc.docweight = queryResult.ranks[i];
173 queryresult.docs.docset[doc.docnum] = doc;
174 queryresult.docs.docorder.push_back(doc.docnum);
175
176 }
177
178 // term info
179 termfreqclass term;
180 for (int k=0; k<(int)queryResult.termFreqs.size(); ++k) {
181 term.clear();
182 char* termfreq_cstr=GetCStr(queryResult.termFreqs[k].term);
183 term.termstr = to_uni(termfreq_cstr);
184 delete []termfreq_cstr;
185 term.termstemstr = term.termstr;
186 // we don't set term.utf8equivterms ?? - jrm21
187 term.termfreq = queryResult.termFreqs[k].termFreq;
188 queryresult.terms.push_back(term);
189 queryresult.orgterms.push_back(term); // should this change??
190
191 for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); ++j) {
192 char* equivterm_cstr=GetCStr(queryResult.termFreqs[k].equivTerms[j]);
193 queryresult.termvariants.insert(to_uni(equivterm_cstr));
194 delete []equivterm_cstr;
195 }
196
197 }
198 // clean up
199 delete []indexname;
200 return true;
201
202}
203
204
205bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
206 queryresultsclass &queryresult) {
207
208#ifdef __WIN32__
209 char basepath[]="";
210#else
211 char basepath[] = "/";
212#endif
213
214 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
215
216 if (indexData == NULL) {
217 indexData = new IndexData();
218 }
219 if (!indexData->LoadData (basepath, indexname)) {
220 cerr<<"couldn't load index data\n"<<endl;
221 return false;
222 }
223
224 UCArray level;
225 UCArrayClear(level);
226
227 //browse always at top level
228 SetCStr(level, "Doc"); // this name may change.
229
230
231 BrowseQueryNode browseNode;
232 browseNode.startPosition = start;
233 browseNode.numTerms = numDocs;
234
235 BrowseQueryResult browseResult;
236
237 UCArrayClear(browseNode.term);
238 // greenstone gives us the query encoded in unicode. We want utf8.
239 char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
240 SetCStr(browseNode.term, utf8querystring);
241 delete []utf8querystring;
242
243 // do the actual query
244 MGBrowseQuery(*indexData, level, browseNode, browseResult);
245
246 // load results into term info
247 termfreqclass term;
248 for (int i=0; i<(int)browseResult.termFreqs.size(); ++i) {
249 term.clear();
250 char* term_cstr = GetCStr(browseResult.termFreqs[i].term);
251 term.termstr = to_uni(term_cstr);
252 delete []term_cstr;
253 term.termstemstr = term.termstr;
254 term.termfreq = browseResult.termFreqs[i].termFreq;
255 queryresult.terms.push_back(term);
256 queryresult.orgterms.push_back(term);
257
258 }
259 // clean up
260 delete []indexname;
261
262 return true;
263}
264
265// the document text for 'docnum' is placed in 'output'
266// docTargetDocument returns 'true' if it was able to
267// try to get a document
268// collection is needed to see if an index from the
269// collection is loaded. THe default index bits are just there cos
270// the mg version needs them
271
272bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
273 const text_t &/*defaultsubcollection*/,
274 const text_t &/*defaultlanguage*/,
275 const text_t &collection,
276 int docnum,
277 text_t &output) {
278
279#ifdef __WIN32__
280 char basepath[]="";
281#else
282 char basepath[] = "/";
283#endif
284 char *textname = (filename_cat(collectdir, "index", "text", collection)).getcstr();;
285
286 TextData textdata;
287 if(!textdata.LoadData(basepath, textname)) {
288 cout<<"couldn't load text data\n"<<endl;
289 return false;
290 }
291 UCArray doctext;
292 UCArray level;
293 SetCStr(level, gdbm_level.getcstr());
294 if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
295 cout<<"couldn't retrieve document text\n";
296 return false;
297 }
298
299 // convert UCArray to text_t
300 output.clear();
301 char* doctext_cstr = GetCStr(doctext);
302 output = to_uni(doctext_cstr); // convert from utf-8 to unicode
303 delete []doctext_cstr;
304
305 // here need to remove the <Document>, <Section>, <Paragraph> tags
306
307
308 //clean up
309 textdata.UnloadData ();
310 delete []textname;
311
312 return true;
313
314}
315
316// used to clear any cached databases for persistent versions of
317// Greenstone like the Windows local library
318void mgppsearchclass::unload_database () {
319
320 if (indexData !=NULL) {
321 indexData->UnloadData();
322 }
323}
324
325
326
327
328
Note: See TracBrowser for help on using the repository browser.