source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 2701

Last change on this file since 2701 was 2701, checked in by kjm18, 23 years ago

previous changes broke linux version. all fixed now hopefully :-)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1/**********************************************************************
2 *
3 * mgppsearch.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "gsdlconf.h"
28#include "mgppsearch.h"
29#include "fileutil.h"
30#include "GSDLQueryParser.h"
31#include "MGQuery.h"
32#include "TextGet.h"
33#include "queryinfo.h"
34
35
36
37static text_t getindexsuffix(const queryparamclass &qp) {
38 text_t indexsuffix = "index";
39 text_t ind = qp.index;
40 text_t sub = qp.subcollection;
41 text_t lang = qp.language;
42
43 indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44 return indexsuffix;
45
46}
47
48////////////////////
49// mgppsearch class //
50////////////////////
51
52mgppsearchclass::mgppsearchclass ()
53 : searchclass() {
54
55 gdbm_level = "Document";
56}
57
58mgppsearchclass::~mgppsearchclass ()
59{
60 if (cache != NULL)
61 {
62 delete cache;
63 cache = NULL;
64 }
65
66 if (indexData !=NULL) {
67 indexData->UnloadData();
68 delete indexData;
69 indexData = NULL;
70 }
71
72}
73
74void mgppsearchclass::set_gdbm_level(text_t &level) {
75 gdbm_level = level;
76
77}
78
79bool mgppsearchclass::search(const queryparamclass &queryparams,
80 queryresultsclass &queryresult) {
81
82#ifdef __WIN32__
83 char basepath[]="";
84#else
85 char basepath[] = "/";
86#endif
87
88 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
89
90 // load index data
91 if (indexData == NULL) {
92 indexData = new IndexData();
93 }
94 if (!indexData->LoadData (basepath, indexname)) {
95 cerr<<"couldn't load index data\n"<<endl;
96 return false;
97 }
98
99 // set default stem method from values originally set on prefs page
100 int defaultStemMethod = 0;
101 if (queryparams.casefolding) {
102 defaultStemMethod |= 1;
103 }
104 if (queryparams.stemming) {
105 defaultStemMethod |= 2;
106 }
107
108 // set default Boolean combiner from all/some setting
109 // if match_mode == 1, ie all, default=1 ie AND
110 // if match_mode == 0, ie some, default=0, ie OR
111 int defaultBoolCombine = 0;
112 if (queryparams.match_mode){
113 defaultBoolCombine = 1;
114 }
115
116 // use default query info settings - change to reflect user preferences??
117 QueryInfo queryInfo;
118 SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
119 queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
120 queryInfo.sortByRank = (queryparams.search_type == 1);
121 queryInfo.exactWeights = false;
122 queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
123 queryInfo.needTermFreqs = true;
124
125 ExtQueryResult queryResult;
126
127 UCArray queryArray;
128 SetCStr(queryArray, (queryparams.querystring.getcstr()));
129
130 // create the mgpp query tree
131 QueryNode *queryTree = NULL;
132 queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
133
134 UCArray level;
135 UCArrayClear(level);
136
137 //set the level for results
138 SetCStr(level, gdbm_level.getcstr());
139
140
141 // do the query
142 MGQuery(*indexData, queryInfo, queryTree, queryResult, level);
143
144
145 // convert ExtQueryResult to queryresultclass
146
147 queryresult.docs_matched = (int)queryResult.docs.size();
148
149 if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
150 queryresult.is_approx = Exact;
151 }
152 else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
153 queryresult.is_approx = MoreThan;
154 }
155 else {
156 queryresult.is_approx = Approximate;
157 }
158
159 docresultclass doc;
160 for (int i=0; i<(int)queryResult.docs.size(); i++) {
161 doc.clear();
162 doc.docnum = (int)queryResult.levels[i];
163 doc.docweight = queryResult.ranks[i];
164 queryresult.docs.docset[doc.docnum] = doc;
165 queryresult.docs.docorder.push_back(doc.docnum);
166
167 }
168
169 // term info
170 termfreqclass term;
171 for (int k=0; k<(int)queryResult.termFreqs.size(); k++) {
172 term.clear();
173 term.termstr = GetCStr(queryResult.termFreqs[k].term);
174 term.termstemstr = term.termstr;
175 term.termfreq = queryResult.termFreqs[k].termFreq;
176 queryresult.terms.push_back(term);
177 queryresult.orgterms.push_back(term); // should this change??
178
179 for (int j=0; j<(int)queryResult.termFreqs[k].equivTerms.size(); j++) {
180 queryresult.termvariants.insert(GetCStr(queryResult.termFreqs[k].equivTerms[j]));
181 }
182
183 }
184 // clean up
185 delete indexname;
186 return true;
187
188}
189
190
191bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
192 queryresultsclass &queryresult) {
193
194#ifdef __WIN32__
195 char basepath[]="";
196#else
197 char basepath[] = "/";
198#endif
199
200 char *indexname = (filename_cat(collectdir, getindexsuffix(queryparams))).getcstr();
201
202 if (indexData == NULL) {
203 indexData = new IndexData();
204 }
205 if (!indexData->LoadData (basepath, indexname)) {
206 cerr<<"couldn't load index data\n"<<endl;
207 return false;
208 }
209
210 UCArray level;
211 UCArrayClear(level);
212
213 //browse always at top level
214 SetCStr(level, "Document");
215
216
217 BrowseQueryNode browseNode;
218 browseNode.startPosition = start;
219 browseNode.numTerms = numDocs;
220
221 BrowseQueryResult browseResult;
222
223 UCArrayClear(browseNode.term);
224 SetCStr(browseNode.term, (queryparams.querystring.getcstr()));
225
226 // do the actual query
227 MGBrowseQuery(*indexData, level, browseNode, browseResult);
228
229 // load results into term info
230 termfreqclass term;
231 for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
232 term.clear();
233 term.termstr = GetCStr(browseResult.termFreqs[i].term);
234 term.termstemstr = term.termstr;
235 term.termfreq = browseResult.termFreqs[i].termFreq;
236 queryresult.terms.push_back(term);
237 queryresult.orgterms.push_back(term);
238
239 }
240 // clean up
241 delete indexname;
242
243 return true;
244}
245
246// the document text for 'docnum' is placed in 'output'
247// docTargetDocument returns 'true' if it was able to
248// try to get a document
249// collection is needed to see if an index from the
250// collection is loaded. THe default index bits are just there cos
251// the mg version needs them
252
253bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
254 const text_t &/*defaultsubcollection*/,
255 const text_t &/*defaultlanguage*/,
256 const text_t &collection,
257 int docnum,
258 text_t &output) {
259
260#ifdef __WIN32__
261 char basepath[]="";
262#else
263 char basepath[] = "/";
264#endif
265 char *textname = (filename_cat(collectdir, "index", "text", collection)).getcstr();;
266
267 TextData textdata;
268 if(!textdata.LoadData(basepath, textname)) {
269 cout<<"couldn't load text data\n"<<endl;
270 return false;
271 }
272 UCArray doctext;
273 UCArray level;
274 SetCStr(level, gdbm_level.getcstr());
275 if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
276 cout<<"couldn't retrieve document text\n";
277 return false;
278 }
279
280 // convert UCArray to text_t
281 output.clear();
282 output = GetCStr(doctext);
283
284 // here need to remove the <Document>, <Section>, <Paragraph> tags
285
286 // mg converts to unicode, this may need to be added here???
287
288 //clean up
289 textdata.UnloadData ();
290 delete textname;
291
292 return true;
293
294}
295
296// used to clear any cached databases for persistent versions of
297// Greenstone like the Windows local library
298void mgppsearchclass::unload_database () {
299
300 if (indexData !=NULL) {
301 indexData->UnloadData();
302 }
303}
304
305
306
307
308
Note: See TracBrowser for help on using the repository browser.