source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 1908

Last change on this file since 1908 was 1908, checked in by kjm18, 23 years ago

get_mg_type() added. new variable: mgppsearchclass::gdbm_level. defaults to
Document, can be changed by set_gdbm_level(level). Docnums from a search
returned at this level (corresponds to what level the gdbm file is done
at (usu Section).

  • Property svn:keywords set to Author Date Id Revision
File size: 7.4 KB
Line 
1/**********************************************************************
2 *
3 * mgppsearch.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "gsdlconf.h"
28#include "mgppsearch.h"
29#include "fileutil.h"
30#include "GSDLQueryParser.h"
31#include "MGQuery.h"
32#include "TextGet.h"
33#include "queryinfo.h"
34
35
36
37static text_t getindexsuffix(const queryparamclass &qp) {
38 text_t indexsuffix = "index";
39 text_t ind = qp.index;
40 text_t sub = qp.subcollection;
41 text_t lang = qp.language;
42
43 indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44 return indexsuffix;
45
46}
47
48////////////////////
49// mgppsearch class //
50////////////////////
51
52mgppsearchclass::mgppsearchclass ()
53 : searchclass() {
54
55 gdbm_level = "Document";
56}
57
58mgppsearchclass::~mgppsearchclass ()
59{
60 if (cache != NULL)
61 {
62 delete cache;
63 cache = NULL;
64 }
65}
66
67void mgppsearchclass::set_gdbm_level(text_t &level) {
68 gdbm_level = level;
69
70}
71
72bool mgppsearchclass::search(const queryparamclass &queryparams,
73 queryresultsclass &queryresult) {
74
75 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
76 char *indexname = (getindexsuffix(queryparams)).getcstr(); // like ...demo/mt/demo
77
78 // load index data
79 IndexData indexData;
80 if (!indexData.LoadData (basepath, indexname)) {
81 cerr<<"couldn't load index data\n"<<endl;
82 return false;
83 }
84
85 // set default stem method from values originally set on prefs page
86 int defaultStemMethod = 0;
87 if (queryparams.casefolding) {
88 defaultStemMethod |= 1;
89 }
90 if (queryparams.stemming) {
91 defaultStemMethod |= 2;
92 }
93
94 // set default Boolean combiner from all/some setting
95 // if match_mode == 1, ie all, default=1 ie AND
96 // if match_mode == 0, ie some, default=0, ie OR
97 int defaultBoolCombine = 0;
98 if (queryparams.match_mode){
99 defaultBoolCombine = 1;
100 }
101
102 // use default query info settings - change to reflect user preferences??
103 QueryInfo queryInfo;
104 SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
105 queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
106 queryInfo.sortByRank = (queryparams.search_type == 1);
107 queryInfo.exactWeights = false;
108 queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
109 queryInfo.needTermFreqs = true;
110
111 ExtQueryResult queryResult;
112
113 UCArray queryArray;
114 SetCStr(queryArray, (queryparams.querystring.getcstr()));
115
116 // create the mgpp query tree
117 QueryNode *queryTree = NULL;
118 queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
119
120 UCArray level;
121 UCArrayClear(level);
122
123 //set the level for results
124 SetCStr(level, gdbm_level.getcstr());
125
126
127 // do the query
128 MGQuery(indexData, queryInfo, queryTree, queryResult, level);
129
130
131 // convert ExtQueryResult to queryresultclass
132
133 queryresult.docs_matched = (int)queryResult.docs.size();
134
135 if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
136 queryresult.is_approx = Exact;
137 }
138 else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
139 queryresult.is_approx = MoreThan;
140 }
141 else {
142 queryresult.is_approx = Approximate;
143 }
144
145 docresultclass doc;
146 for (int i=0; i<(int)queryResult.docs.size(); i++) {
147 doc.clear();
148 doc.docnum = (int)queryResult.levels[i];
149 doc.docweight = queryResult.ranks[i];
150 queryresult.docs.docset[doc.docnum] = doc;
151 queryresult.docs.docorder.push_back(doc.docnum);
152
153 }
154
155 // term info
156 termfreqclass term;
157 for (int i=0; i<(int)queryResult.termFreqs.size(); i++) {
158 term.clear();
159 term.termstr = GetCStr(queryResult.termFreqs[i].term);
160 term.termstemstr = term.termstr;
161 term.termfreq = queryResult.termFreqs[i].termFreq;
162 queryresult.terms.push_back(term);
163 queryresult.orgterms.push_back(term); // should this change??
164
165 for (int j=0; j<(int)queryResult.termFreqs[i].equivTerms.size(); j++) {
166 queryresult.termvariants.insert(GetCStr(queryResult.termFreqs[i].equivTerms[j]));
167 }
168
169 }
170 // clean up
171 indexData.UnloadData();
172 delete indexname;
173 return true;
174
175}
176
177
178bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
179 queryresultsclass &queryresult) {
180
181 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
182 char *indexname = (getindexsuffix(queryparams)).getcstr();
183
184 IndexData indexData;
185 if (!indexData.LoadData (basepath, indexname)) {
186 cerr<<"couldn't load index data\n"<<endl;
187 return false;
188 }
189
190 UCArray level;
191 UCArrayClear(level);
192
193 //browse always at top level
194 SetCStr(level, "Document");
195
196
197 BrowseQueryNode browseNode;
198 browseNode.startPosition = start;
199 browseNode.numTerms = numDocs;
200
201 BrowseQueryResult browseResult;
202
203 UCArrayClear(browseNode.term);
204 SetCStr(browseNode.term, (queryparams.querystring.getcstr()));
205
206 // do the actual query
207 MGBrowseQuery(indexData, level, browseNode, browseResult);
208
209 // load results into term info
210 termfreqclass term;
211 for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
212 term.clear();
213 term.termstr = GetCStr(browseResult.termFreqs[i].term);
214 term.termstemstr = term.termstr;
215 term.termfreq = browseResult.termFreqs[i].termFreq;
216 queryresult.terms.push_back(term);
217 queryresult.orgterms.push_back(term);
218
219 }
220 // clean up
221 indexData.UnloadData();
222 delete indexname;
223
224 return true;
225}
226
227// the document text for 'docnum' is placed in 'output'
228// docTargetDocument returns 'true' if it was able to
229// try to get a document
230// collection is needed to see if an index from the
231// collection is loaded. THe default index bits are just there cos
232// the mg version needs them
233
234bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
235 const text_t &/*defaultsubcollection*/,
236 const text_t &/*defaultlanguage*/,
237 const text_t &collection,
238 int docnum,
239 text_t &output) {
240
241 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
242
243 text_t textfilename = "/index/text/"+collection;
244 TextData textdata;
245 if(!textdata.LoadData(basepath, textfilename.getcstr())) {
246 //error
247 return false;
248 }
249 UCArray doctext;
250 UCArray level;
251 //SetCStr(level, "Section");
252 SetCStr(level, gdbm_level.getcstr());
253 if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
254 //error
255 return false;
256 }
257
258 // convert UCArray to text_t
259 output.clear();
260 output = GetCStr(doctext);
261
262 // here need to remove the <Document>, <Section>, <Paragraph> tags
263
264 // mg converts to unicode, this may need to be added here???
265
266 //clean up
267 textdata.UnloadData ();
268 delete basepath;
269
270 return true;
271
272}
273
274
275
276
277
278
Note: See TracBrowser for help on using the repository browser.