source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 4742

Last change on this file since 4742 was 4217, checked in by kjdon, 21 years ago

now we pass on syntax_error from mgpp parsing back to the mgppqueryfilter via queryresultsclass

  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1/**********************************************************************
2 *
3 * mgppsearch.cpp --
4 * Copyright (C) 1999-2002 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "gsdlconf.h"
28#include "mgppsearch.h"
29#include "fileutil.h"
30#include "GSDLQueryParser.h"
31#include "MGQuery.h"
32#include "TextGet.h"
33#include "queryinfo.h"
34#include "gsdlunicode.h"
35
36
37static text_t getindexsuffix(const queryparamclass &qp) {
38 text_t indexsuffix = "index";
39 text_t ind = qp.index;
40 text_t sub = qp.subcollection;
41 text_t lang = qp.language;
42
43 indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44 return indexsuffix;
45
46}
47
48////////////////////
49// mgppsearch class //
50////////////////////
51
52mgppsearchclass::mgppsearchclass ()
53 : searchclass() {
54
55 gdbm_level = "Document";
56 indexData = NULL;
57}
58
59mgppsearchclass::~mgppsearchclass ()
60{
61 if (cache != NULL)
62 {
63 delete cache;
64 cache = NULL;
65 }
66
67 if (indexData !=NULL) {
68 indexData->UnloadData();
69 delete indexData;
70 indexData = NULL;
71 }
72
73}
74
75void mgppsearchclass::set_gdbm_level(text_t &level) {
76 gdbm_level = level;
77
78}
79
80bool mgppsearchclass::search(const queryparamclass &queryparams,
81 queryresultsclass &queryresult) {
82
83#ifdef __WIN32__
84 char basepath[]="";
85#else
86 char basepath[] = "/";
87#endif
88
89 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
90
91 // load index data
92 if (indexData == NULL) {
93 indexData = new IndexData();
94 }
95 if (!indexData->LoadData (basepath, indexname)) {
96 cerr<<"couldn't load index data\n"<<endl;
97 return false;
98 }
99
100 // set default stem method from values originally set on prefs page
101 int defaultStemMethod = 0;
102 if (queryparams.casefolding) {
103 defaultStemMethod |= 1;
104 }
105 if (queryparams.stemming) {
106 defaultStemMethod |= 2;
107 }
108
109 // set default Boolean combiner from all/some setting
110 // if match_mode == 1, ie all, default=1 ie AND
111 // if match_mode == 0, ie some, default=0, ie OR
112 int defaultBoolCombine = 0;
113 if (queryparams.match_mode){
114 defaultBoolCombine = 1;
115 }
116
117 // use default query info settings - change to reflect user preferences??
118 QueryInfo queryInfo;
119 SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
120 queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
121 queryInfo.sortByRank = (queryparams.search_type == 1);
122 queryInfo.exactWeights = false;
123 queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
124 queryInfo.needTermFreqs = true;
125
126 ExtQueryResult queryResult;
127
128 UCArray queryArray;
129 // greenstone gives us the query encoded in unicode. We want utf8.
130 char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
131 SetCStr(queryArray, utf8querystring);
132 delete utf8querystring;
133
134 // create the mgpp query tree
135 QueryNode *queryTree = NULL;
136 queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
137 if (queryTree == NULL) { // syntax error
138 queryresult.syntax_error = true;
139 return true; // should we return true or false?
140 }
141 UCArray level;
142 UCArrayClear(level);
143
144 //set the level for results
145 SetCStr(level, gdbm_level.getcstr());
146
147
148 // do the query
149 MGQuery(*indexData, queryInfo, queryTree, queryResult, level);
150
151
152 // convert ExtQueryResult to queryresultclass
153
154 queryresult.docs_matched = (int)queryResult.docs.size();
155
156 if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
157 queryresult.is_approx = Exact;
158 }
159 else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
160 queryresult.is_approx = MoreThan;
161 }
162 else {
163 queryresult.is_approx = Approximate;
164 }
165
166 docresultclass doc;
167 for (int i=0; i<(int)queryResult.docs.size(); i++) {
168 doc.clear();
169 doc.docnum = (int)queryResult.levels[i];
170 doc.docweight = queryResult.ranks[i];
171 queryresult.docs.docset[doc.docnum] = doc;
172 queryresult.docs.docorder.push_back(doc.docnum);
173
174 }
175
176 // term info
177 termfreqclass term;
178 for (int k=0; k<(int)queryResult.termFreqs.size(); k++) {
179 term.clear();
180 char* termfreq_cstr=GetCStr(queryResult.termFreqs[k].term);
181 term.termstr = to_uni(termfreq_cstr);
182 delete termfreq_cstr;
183 term.termstemstr = term.termstr;
184 // we don't set term.utf8equivterms ?? - jrm21
185 term.termfreq = queryResult.termFreqs[k].termFreq;
186 queryresult.terms.push_back(term);
187 queryresult.orgterms.push_back(term); // should this change??
188
189 for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); j++) {
190 char* equivterm_cstr=GetCStr(queryResult.termFreqs[k].equivTerms[j]);
191 queryresult.termvariants.insert(to_uni(equivterm_cstr));
192 delete equivterm_cstr;
193 }
194
195 }
196 // clean up
197 delete indexname;
198 return true;
199
200}
201
202
203bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
204 queryresultsclass &queryresult) {
205
206#ifdef __WIN32__
207 char basepath[]="";
208#else
209 char basepath[] = "/";
210#endif
211
212 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
213
214 if (indexData == NULL) {
215 indexData = new IndexData();
216 }
217 if (!indexData->LoadData (basepath, indexname)) {
218 cerr<<"couldn't load index data\n"<<endl;
219 return false;
220 }
221
222 UCArray level;
223 UCArrayClear(level);
224
225 //browse always at top level
226 SetCStr(level, "Document");
227
228
229 BrowseQueryNode browseNode;
230 browseNode.startPosition = start;
231 browseNode.numTerms = numDocs;
232
233 BrowseQueryResult browseResult;
234
235 UCArrayClear(browseNode.term);
236 // greenstone gives us the query encoded in unicode. We want utf8.
237 char* utf8querystring=to_utf8(queryparams.querystring).getcstr();
238 SetCStr(browseNode.term, utf8querystring);
239 delete utf8querystring;
240
241 // do the actual query
242 MGBrowseQuery(*indexData, level, browseNode, browseResult);
243
244 // load results into term info
245 termfreqclass term;
246 for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
247 term.clear();
248 char* term_cstr = GetCStr(browseResult.termFreqs[i].term);
249 term.termstr = to_uni(term_cstr);
250 delete term_cstr;
251 term.termstemstr = term.termstr;
252 term.termfreq = browseResult.termFreqs[i].termFreq;
253 queryresult.terms.push_back(term);
254 queryresult.orgterms.push_back(term);
255
256 }
257 // clean up
258 delete indexname;
259
260 return true;
261}
262
263// the document text for 'docnum' is placed in 'output'
264// docTargetDocument returns 'true' if it was able to
265// try to get a document
266// collection is needed to see if an index from the
267// collection is loaded. THe default index bits are just there cos
268// the mg version needs them
269
270bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
271 const text_t &/*defaultsubcollection*/,
272 const text_t &/*defaultlanguage*/,
273 const text_t &collection,
274 int docnum,
275 text_t &output) {
276
277#ifdef __WIN32__
278 char basepath[]="";
279#else
280 char basepath[] = "/";
281#endif
282 char *textname = (filename_cat(collectdir, "index", "text", collection)).getcstr();;
283
284 TextData textdata;
285 if(!textdata.LoadData(basepath, textname)) {
286 cout<<"couldn't load text data\n"<<endl;
287 return false;
288 }
289 UCArray doctext;
290 UCArray level;
291 SetCStr(level, gdbm_level.getcstr());
292 if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
293 cout<<"couldn't retrieve document text\n";
294 return false;
295 }
296
297 // convert UCArray to text_t
298 output.clear();
299 char* doctext_cstr = GetCStr(doctext);
300 output = to_uni(doctext_cstr); // convert from utf-8 to unicode
301 delete doctext_cstr;
302
303 // here need to remove the <Document>, <Section>, <Paragraph> tags
304
305
306 //clean up
307 textdata.UnloadData ();
308 delete textname;
309
310 return true;
311
312}
313
314// used to clear any cached databases for persistent versions of
315// Greenstone like the Windows local library
316void mgppsearchclass::unload_database () {
317
318 if (indexData !=NULL) {
319 indexData->UnloadData();
320 }
321}
322
323
324
325
326
Note: See TracBrowser for help on using the repository browser.