source: trunk/gsdl/src/colservr/mgppsearch.cpp@ 1834

Last change on this file since 1834 was 1834, checked in by kjm18, 23 years ago

queryresult.termvariants now contains all the equivalent terms to the
query terms (used for highlighting)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1/**********************************************************************
2 *
3 * mgppsearch.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26
27#include "gsdlconf.h"
28#include "mgppsearch.h"
29#include "fileutil.h"
30#include "GSDLQueryParser.h"
31#include "MGQuery.h"
32#include "TextGet.h"
33#include "queryinfo.h"
34
35
36
37static text_t getindexsuffix(const queryparamclass &qp) {
38 text_t indexsuffix = "index";
39 text_t ind = qp.index;
40 text_t sub = qp.subcollection;
41 text_t lang = qp.language;
42
43 indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
44 return indexsuffix;
45
46}
47
48////////////////////
49// mgppsearch class //
50////////////////////
51
52mgppsearchclass::mgppsearchclass ()
53 : searchclass() {
54
55}
56
57mgppsearchclass::~mgppsearchclass ()
58{
59 if (cache != NULL)
60 {
61 delete cache;
62 cache = NULL;
63 }
64}
65
66bool mgppsearchclass::search(const queryparamclass &queryparams,
67 queryresultsclass &queryresult) {
68
69 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
70 char *indexname = (getindexsuffix(queryparams)).getcstr(); // like ...demo/mt/demo
71
72 // load index data
73 IndexData indexData;
74 if (!indexData.LoadData (basepath, indexname)) {
75 cerr<<"couldn't load index data\n"<<endl;
76 return false;
77 }
78
79 // set default stem method from values originally set on prefs page
80 int defaultStemMethod = 0;
81 if (queryparams.casefolding) {
82 defaultStemMethod |= 1;
83 }
84 if (queryparams.stemming) {
85 defaultStemMethod |= 2;
86 }
87
88 // set default Boolean combiner from all/some setting
89 // if match_mode == 1, ie all, default=1 ie AND
90 // if match_mode == 0, ie some, default=0, ie OR
91 int defaultBoolCombine = 0;
92 if (queryparams.match_mode){
93 defaultBoolCombine = 1;
94 }
95
96 // use default query info settings - change to reflect user preferences??
97 QueryInfo queryInfo;
98 SetCStr (queryInfo.docLevel, (queryparams.level.getcstr()));
99 queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
100 queryInfo.sortByRank = (queryparams.search_type == 1);
101 queryInfo.exactWeights = false;
102 queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
103 queryInfo.needTermFreqs = true;
104
105 ExtQueryResult queryResult;
106
107 UCArray queryArray;
108 SetCStr(queryArray, (queryparams.querystring.getcstr()));
109
110 // create the mgpp query tree
111 QueryNode *queryTree = NULL;
112 queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
113
114 UCArray level;
115 UCArrayClear(level);
116
117 //always return sections - default for GSDL
118 SetCStr(level, "Section");
119
120 // do the query
121 MGQuery(indexData, queryInfo, queryTree, queryResult, level);
122
123
124 // convert ExtQueryResult to queryresultclass
125
126 queryresult.docs_matched = (int)queryResult.docs.size();
127
128 if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
129 queryresult.is_approx = Exact;
130 }
131 else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
132 queryresult.is_approx = MoreThan;
133 }
134 else {
135 queryresult.is_approx = Approximate;
136 }
137
138 docresultclass doc;
139 for (int i=0; i<(int)queryResult.docs.size(); i++) {
140 doc.clear();
141 doc.docnum = (int)queryResult.levels[i];
142 doc.docweight = queryResult.ranks[i];
143 queryresult.docs.docset[doc.docnum] = doc;
144 queryresult.docs.docorder.push_back(doc.docnum);
145
146 }
147
148 // term info
149 termfreqclass term;
150 for (int i=0; i<(int)queryResult.termFreqs.size(); i++) {
151 term.clear();
152 term.termstr = GetCStr(queryResult.termFreqs[i].term);
153 term.termstemstr = term.termstr;
154 term.termfreq = queryResult.termFreqs[i].termFreq;
155 queryresult.terms.push_back(term);
156 queryresult.orgterms.push_back(term); // should this change??
157
158 for (int j=0; j<(int)queryResult.termFreqs[i].equivTerms.size(); j++) {
159 queryresult.termvariants.insert(GetCStr(queryResult.termFreqs[i].equivTerms[j]));
160 }
161
162 }
163 // clean up
164 indexData.UnloadData();
165 delete indexname;
166 return true;
167
168}
169
170
171bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
172 queryresultsclass &queryresult) {
173
174 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
175 char *indexname = (getindexsuffix(queryparams)).getcstr();
176
177 IndexData indexData;
178 if (!indexData.LoadData (basepath, indexname)) {
179 cerr<<"couldn't load index data\n"<<endl;
180 return false;
181 }
182
183 UCArray level;
184 UCArrayClear(level);
185
186 //browse always at top level
187 SetCStr(level, "Document");
188
189
190 BrowseQueryNode browseNode;
191 browseNode.startPosition = start;
192 browseNode.numTerms = numDocs;
193
194 BrowseQueryResult browseResult;
195
196 UCArrayClear(browseNode.term);
197 SetCStr(browseNode.term, (queryparams.querystring.getcstr()));
198
199 // do the actual query
200 MGBrowseQuery(indexData, level, browseNode, browseResult);
201
202 // load results into term info
203 termfreqclass term;
204 for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
205 term.clear();
206 term.termstr = GetCStr(browseResult.termFreqs[i].term);
207 term.termstemstr = term.termstr;
208 term.termfreq = browseResult.termFreqs[i].termFreq;
209 queryresult.terms.push_back(term);
210 queryresult.orgterms.push_back(term);
211
212 }
213 // clean up
214 indexData.UnloadData();
215 delete indexname;
216
217 return true;
218}
219
220// the document text for 'docnum' is placed in 'output'
221// docTargetDocument returns 'true' if it was able to
222// try to get a document
223// collection is needed to see if an index from the
224// collection is loaded. THe default index bits are just there cos
225// the mg version needs them
226
227bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
228 const text_t &/*defaultsubcollection*/,
229 const text_t &/*defaultlanguage*/,
230 const text_t &collection,
231 int docnum,
232 text_t &output) {
233
234 char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
235
236 text_t textfilename = "/index/text/"+collection;
237 TextData textdata;
238 if(!textdata.LoadData(basepath, textfilename.getcstr())) {
239 //error
240 return false;
241 }
242 UCArray doctext;
243 UCArray level;
244 SetCStr(level, "Section");
245 if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
246 //error
247 return false;
248 }
249
250 // convert UCArray to text_t
251 output.clear();
252 output = GetCStr(doctext);
253
254 // here need to remove the <Document>, <Section>, <Paragraph> tags
255
256 // mg converts to unicode, this may need to be added here???
257
258 //clean up
259 textdata.UnloadData ();
260 delete basepath;
261
262 return true;
263
264}
265
266
267
268
269
270
Note: See TracBrowser for help on using the repository browser.