source: trunk/gsdl/src/colservr/browsefilter.cpp@ 9937

Last change on this file since 9937 was 9937, checked in by kjdon, 19 years ago

modified the filters/sources etc so that if an indexstem is specified in the build.cfg file, then this will be used as the root of the index/gdbm filenames instead of the collection name. colleciton name still used by default. this means that we can rename a coll directory without rebuilding.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1/**********************************************************************
2 *
3 * browsefilter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "browsefilter.h"
27#include "fileutil.h"
28#include "gsdltools.h"
29
30
31browsefilterclass::browsefilterclass () {
32 gdbmptr = NULL;
33
34 // -- onePerQuery StartResults integer
35 FilterOption_t filtopt;
36 filtopt.name = "StartResults";
37 filtopt.type = FilterOption_t::integert;
38 filtopt.repeatable = FilterOption_t::onePerQuery;
39 filtopt.defaultValue = "1";
40 filtopt.validValues.push_back("1");
41 filtopt.validValues.push_back("10000");
42 filterOptions["StartResults"] = filtopt;
43
44 // -- onePerQuery EndResults integer
45 filtopt.clear();
46 filtopt.name = "EndResults";
47 filtopt.type = FilterOption_t::integert;
48 filtopt.repeatable = FilterOption_t::onePerQuery;
49 filtopt.defaultValue = "-1";
50 filtopt.validValues.push_back("-1");
51 filtopt.validValues.push_back("10000");
52 filterOptions["EndResults"] = filtopt;
53
54 // -- onePerQuery ParentNode string ("" will return the browsing available)
55 filtopt.clear();
56 filtopt.name = "ParentNode";
57 filtopt.type = FilterOption_t::stringt;
58 filtopt.repeatable = FilterOption_t::onePerQuery;
59 filtopt.defaultValue = "";
60 filterOptions["ParentNode"] = filtopt;
61}
62
63browsefilterclass::~browsefilterclass () {
64}
65
66void browsefilterclass::configure (const text_t &key, const text_tarray &cfgline) {
67 filterclass::configure (key, cfgline);
68 if (key == "indexstem") {
69 indexstem = cfgline[0];
70 }
71}
72
73bool browsefilterclass::init (ostream &logout) {
74 outconvertclass text_t2ascii;
75
76 if (!filterclass::init(logout)) return false;
77
78 if (indexstem.empty()) {
79 indexstem = collection;
80 }
81 // get the filename for the database and make sure it exists
82 gdbm_filename = filename_cat(gdbmhome, "collect", collection, "index", "text", indexstem);
83
84 if (littleEndian()) gdbm_filename += ".ldb";
85 else gdbm_filename += ".bdb";
86
87 if (!file_exists(gdbm_filename)) {
88 logout << text_t2ascii
89 << "warning: gdbm database \"" //****
90 << gdbm_filename << "\" does not exist\n\n";
91 // return false; //****
92 }
93
94 return true;
95}
96
97void browsefilterclass::filter (const FilterRequest_t &request,
98 FilterResponse_t &response,
99 comerror_t &err, ostream &logout) {
100 int numDocs = 0;
101 outconvertclass text_t2ascii;
102
103 response.clear ();
104 err = noError;
105 if (gdbmptr == NULL) {
106 // most likely a configuration problem
107 logout << text_t2ascii
108 << "configuration error: browsefilter contains a null gdbmclass\n\n";
109 err = configurationError;
110 return;
111 }
112
113 // open the database
114 gdbmptr->setlogout(&logout);
115 if (!gdbmptr->opendatabase (gdbm_filename, GDBM_READER, 100, false)) {
116 // most likely a system problem (we have already checked that the
117 // gdbm database exists)
118 logout << text_t2ascii
119 << "system problem: open on gdbm database \""
120 << gdbm_filename << "\" failed\n\n";
121 err = systemProblem;
122 return;
123 }
124
125 // get the browse parameters
126 int startresults = filterOptions["StartResults"].defaultValue.getint();
127 int endresults = filterOptions["EndResults"].defaultValue.getint();
128 text_t parentnode = filterOptions["ParentNode"].defaultValue;
129 OptionValue_tarray::const_iterator options_here = request.filterOptions.begin();
130 OptionValue_tarray::const_iterator options_end = request.filterOptions.end();
131 while (options_here != options_end) {
132 if ((*options_here).name == "StartResults")
133 startresults = (*options_here).value.getint();
134 else if ((*options_here).name == "EndResults")
135 endresults = (*options_here).value.getint();
136 else if ((*options_here).name == "ParentNode")
137 parentnode = (*options_here).value;
138 else {
139 logout << text_t2ascii
140 << "warning: unknown browsefilter option \""
141 << (*options_here).name
142 << "\" ignored.\n\n";
143 }
144
145 ++options_here;
146 }
147
148 infodbclass info;
149
150 // translate any ".fc", ".pr" etc. stuff in the parentnode
151 parentnode = gdbmptr->translate_OID (parentnode, info);
152
153 // adjust topmost browsing node
154 if (parentnode.empty()) parentnode = "browse";
155
156 // get the node
157 if ((request.filterResultOptions & FROID) ||
158 (request.filterResultOptions & FRmetadata)) {
159 if (!gdbmptr->getinfo(parentnode, info)) {
160 // didn't find the node
161 logout << text_t2ascii
162 << "warning: lookup for node \"" << parentnode
163 << "\" failed for browsefilter.\n\n";
164 } else {
165 // found the node
166
167 // replace " with the parent node name and split the contains string
168 // into the result set
169 text_tarray resultset;
170 text_t tmptext;
171 text_t &contains = info["contains"];
172 text_t::iterator contains_here = contains.begin();
173 text_t::iterator contains_end = contains.end();
174 while (contains_here != contains_end) {
175 if (*contains_here == '"') tmptext += parentnode;
176 else if (*contains_here == ';') {
177 if (!tmptext.empty()) resultset.push_back (tmptext);
178 tmptext.clear();
179 } else tmptext.push_back(*contains_here);
180
181 ++contains_here;
182 }
183 // insert the last result in the set
184 if (!tmptext.empty()) resultset.push_back (tmptext);
185
186 text_tarray offset_resultset;
187 text_t &md_type = info["mdtype"];
188 if (!md_type.empty())
189 {
190 text_t &md_offset = info["mdoffset"];
191 if (!md_offset.empty())
192 {
193 text_t offsettext;
194
195 text_t::iterator offset_here = md_offset.begin();
196 text_t::iterator offset_end = md_offset.end();
197 while (offset_here != offset_end)
198 {
199 if (*offset_here == ';')
200 {
201 if (offsettext.empty())
202 {
203 offset_resultset.push_back ("0");
204 }
205 else
206 {
207 offset_resultset.push_back (offsettext);
208 }
209 offsettext.clear();
210 }
211 else
212 {
213 offsettext.push_back(*offset_here);
214 }
215
216 ++offset_here;
217 }
218 // insert the last result in the set
219 if (offsettext.empty())
220 {
221 offset_resultset.push_back ("0");
222 }
223 else
224 {
225 offset_resultset.push_back (offsettext);
226 }
227 }
228 else
229 {
230 // add 0 offset for each 'contains' entry
231 text_tarray::iterator result_here = resultset.begin();
232 text_tarray::iterator result_end = resultset.end();
233 while (result_here != result_end) {
234 offset_resultset.push_back("0");
235 ++result_here;
236 }
237 }
238
239 // do an intersection with the input set
240 if (!request.docSet.empty()) {
241
242 text_tarray intersect_resultset;
243 text_tarray intersect_offset_resultset;
244
245 text_tarray::const_iterator resultset_here = resultset.begin();
246 text_tarray::const_iterator resultset_end = resultset.end();
247 text_tarray::const_iterator offset_resultset_here = offset_resultset.begin();
248
249 while (resultset_here != resultset_end) {
250 if (in_set (request.docSet, *resultset_here))
251 {
252 intersect_resultset.push_back (*resultset_here);
253 intersect_offset_resultset.push_back (*offset_resultset_here);
254 }
255 ++resultset_here;
256 ++offset_resultset_here;
257 }
258 resultset = intersect_resultset;
259 offset_resultset = intersect_offset_resultset;
260 }
261 }
262 else
263 {
264 // do an intersection with the input set
265 if (!request.docSet.empty()) {
266 intersect (resultset, request.docSet);
267 }
268
269 // add 0 offset for each 'contains' entry
270 text_tarray::iterator result_here = resultset.begin();
271 text_tarray::iterator result_end = resultset.end();
272 while (result_here != result_end) {
273 offset_resultset.push_back("0");
274 ++result_here;
275 }
276 }
277
278 // create the response
279 numDocs = resultset.size();
280 int resultnum = 1;
281 ResultDocInfo_t resultdoc;
282 text_tarray::iterator result_here = resultset.begin();
283 text_tarray::iterator result_end = resultset.end();
284 text_tarray::iterator offset_result_here = offset_resultset.begin();
285
286 while (result_here != result_end) {
287 // if endresults is -1 get all results
288 if ((endresults != -1) && (resultnum > endresults)) break;
289 if (resultnum >= startresults) {
290 resultdoc.OID = (*result_here);
291 if (!md_type.empty())
292 {
293 resultdoc.classifier_metadata_type = md_type;
294 resultdoc.classifier_metadata_offset = offset_result_here->getint();
295 }
296 response.docInfo.push_back(resultdoc);
297 }
298
299 ++resultnum;
300 ++result_here;
301 if (!md_type.empty()) ++offset_result_here;
302 }
303 }
304 }
305
306 response.numDocs = numDocs;
307 response.isApprox = Exact;
308}
Note: See TracBrowser for help on using the repository browser.