source: main/trunk/greenstone2/runtime-src/src/colservr/filter.cpp@ 31387

Last change on this file since 31387 was 31387, checked in by ak19, 7 years ago

Round 1 of commits for getting OAI deletion policy to work with GS2 (server end). The perl code writing out the OAI db and the GS3 server code implementing the deletion policy had already been completed earlier (end 2016).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.3 KB
Line 
1/**********************************************************************
2 *
3 * filter.cpp --
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "filter.h"
27#include "fileutil.h"
28#include <assert.h>
29
30#include <iostream>
31using namespace std;
32
33
34// default constructor does nothing
35filterclass::filterclass () {
36}
37
38// default destructor does nothing
39filterclass::~filterclass () {
40}
41
42// configure should be called once for each configuration line
43// default configures the default filter options
44void filterclass::configure (const text_t &key, const text_tarray &cfgline) {
45 if (cfgline.size() >= 1) {
46 const text_t &value = cfgline[0];
47
48 if (key == "collection") collection = value;
49 else if (key == "collectdir") collectdir = value;
50 else if (key == "gsdlhome") gsdlhome = value;
51 else if (key == "collecthome") collecthome = value;
52 else if (key == "gdbmhome") dbhome = value;
53 else if ((key == "filteroptdefault") && (cfgline.size() == 2)) {
54 // see if this filter has an option with this name
55 FilterOption_tmap::iterator thisfilteroption =
56 filterOptions.find(cfgline[0]);
57 if (thisfilteroption != filterOptions.end())
58 (*thisfilteroption).second.defaultValue = cfgline[1];
59 }
60 }
61}
62
63text_t filterclass::getcollectionpath()
64{
65 text_t resolved_filename;
66
67 if (gsdlhome==dbhome) {
68 // dbhome has defaulted to gsdlhome which we take to means the
69 // database has been specifically moved out of gsdlhome area.
70 // => it should be whereever collecthome is set to
71 resolved_filename = filename_cat(collecthome, collection);
72 }
73 else {
74 // dbhome is explicitly set to something other than gsdlhome
75 // => use dbhome
76 resolved_filename = filename_cat(dbhome, "collect", collection);
77 }
78
79 return resolved_filename;
80}
81
82text_t filterclass::resolve_db_filename(const text_t& idx,
83 const text_t& file_ext)
84{
85 /*
86 text_t resolved_filename;
87
88 if (gsdlhome==dbhome) {
89 // dbhome has defaulted to gsdlhome which we take to means the
90 // database has been specifically moved out of gsdlhome area.
91 // => it should be whereever collecthome is set to
92 resolved_filename = filename_cat(collecthome, collection, "index", "text", idx);
93 }
94 else {
95 // dbhome is explicitly set to something other than gsdlhome
96 // => use dbhome
97 resolved_filename = filename_cat(dbhome, "collect", collection, "index", "text", idx);
98 }
99 */
100
101 text_t resolved_filename = filename_cat(getcollectionpath(), "index", "text", idx);
102 resolved_filename += file_ext;
103
104 return resolved_filename;
105}
106
107text_t filterclass::resolve_oaidb_filename(const text_t& file_ext)
108{
109 text_t resolved_filename = filename_cat(getcollectionpath(), "etc", "oai-inf");
110
111 resolved_filename += file_ext;
112
113 return resolved_filename;
114}
115
116
117// init should be called after all the configuration is done but
118// before any other methods are called
119// default checks all the filter option defaults
120bool filterclass::init (ostream &/*logout*/) {
121 // check all the filter defaults
122 FilterOption_tmap::iterator filteroption_here = filterOptions.begin();
123 FilterOption_tmap::iterator filteroption_end = filterOptions.end();
124 while (filteroption_here != filteroption_end) {
125 (*filteroption_here).second.check_defaultValue ();
126
127 ++filteroption_here;
128 }
129
130 if (collecthome.empty()) collecthome = filename_cat(gsdlhome,"collect");
131 if (dbhome.empty()) dbhome = gsdlhome;
132
133 // get the collection directory name
134 if (collectdir.empty()) {
135 collectdir = filename_cat (collecthome, collection);
136 }
137
138 return true;
139}
140
141// returns the name of this filter
142// default returns "NullFilter"
143text_t filterclass::get_filter_name () {
144 return "NullFilter";
145}
146
147// returns the current filter options
148void filterclass::get_filteroptions (InfoFilterOptionsResponse_t &response,
149 comerror_t &err, ostream &/*logout*/) {
150 response.clear();
151 response.filterOptions = filterOptions;
152 err = noError;
153}
154
155// default returns nothing
156void filterclass::filter (const FilterRequest_t &request,
157 FilterResponse_t &response,
158 comerror_t &err, ostream &/*logout*/) {
159 ResultDocInfo_t resultdoc;
160
161 response.clear();
162
163 if ((request.filterResultOptions & FROID) ||
164 (request.filterResultOptions & FRmetadata)) {
165 // copy the OIDs from the request to the response
166 text_tarray::const_iterator here = request.docSet.begin();
167 text_tarray::const_iterator end = request.docSet.end();
168 while (here != end) {
169 resultdoc.OID = (*here);
170 response.docInfo.push_back(resultdoc);
171
172 ++here;
173 }
174 }
175
176 response.numDocs = response.docInfo.size();
177 response.isApprox = Exact;
178 err = noError;
179}
180
181
182bool operator==(const filterptr &x, const filterptr &y) {
183 return (x.f == y.f);
184}
185
186bool operator<(const filterptr &x, const filterptr &y) {
187 return (x.f < y.f);
188}
189
190
191// thefilter remains the property of the calling code but
192// should not be deleted until it is removed from this list.
193void filtermapclass::addfilter (filterclass *thefilter) {
194 // can't add a null filter
195 assert (thefilter != NULL);
196 if (thefilter == NULL) return;
197
198 // can't add an filter with no name
199 assert (!(thefilter->get_filter_name()).empty());
200 if ((thefilter->get_filter_name()).empty()) return;
201
202 filterptr fptr;
203 fptr.f = thefilter;
204 filterptrs[thefilter->get_filter_name()] = fptr;
205}
206
207// getfilter will return NULL if the filter could not be found
208filterclass *filtermapclass::getfilter (const text_t &key) {
209 // can't find an filter with no name
210 assert (!key.empty());
211 if (key.empty()) return NULL;
212
213 iterator here = filterptrs.find (key);
214 if (here == filterptrs.end()) return NULL;
215
216 return (*here).second.f;
217}
218
219
220
221
222// some useful functions for dealing with document sets
223
224// returns -1 if t1 is a child of t2
225// returns 0 if t1 and t2 are not parent-child related
226// returns 1 if t1 is a parent of t2
227int child_compare (const text_t &t1, const text_t &t2) {
228 text_t::const_iterator t1_here = t1.begin();
229 text_t::const_iterator t1_end = t1.end();
230 text_t::const_iterator t2_here = t2.begin();
231 text_t::const_iterator t2_end = t2.end();
232
233 while ((t1_here != t1_end) && (t2_here != t2_end)) {
234 if (*t1_here != *t2_here) return 0; // unrelated
235 ++t1_here;
236 ++t2_here;
237 }
238
239 if ((t1_here == t1_end) && (t2_here == t2_end)) return 0; // equal
240 if (t1_here != t1_end) {
241 if (*t1_here == '.') return -1; // t1 is child
242 else return 0; // unrelated
243 }
244
245 if (t2_here != t2_end) {
246 if (*t2_here == '.') return 1; // t2 is child
247 else return 0; // unrelated
248 }
249
250 return 0; // shouldn't get here...
251}
252
253// intersect places the result in set1
254void intersect (text_tset &set1, const text_tset &set2) {
255 text_tset resultset;
256 int childcomp = 0;
257
258 text_tset::const_iterator set1_here = set1.begin();
259 text_tset::const_iterator set1_end = set1.end();
260 text_tset::const_iterator set2_here = set2.begin();
261 text_tset::const_iterator set2_end = set2.end();
262 while ((set1_here != set1_end) && (set2_here != set2_end)) {
263 if (*set1_here == *set2_here) {
264 // equal
265 resultset.insert (*set1_here);
266 ++set1_here;
267 ++set2_here;
268
269 } else if ((childcomp=child_compare(*set1_here, *set2_here)) != 0) {
270 if (childcomp < 0) {
271 // set1_here is child
272 resultset.insert (*set1_here);
273 ++set1_here;
274 } else {
275 // set2_here is child
276 resultset.insert (*set2_here);
277 ++set2_here;
278 }
279
280 } else if (*set1_here < *set2_here) {
281 // set1 is less
282 ++set1_here;
283
284 } else {
285 // set2 is less
286 ++set2_here;
287 }
288 }
289
290 set1 = resultset;
291}
292
293void intersect (text_tarray &set1, const text_tset &set2) {
294 text_tarray resultset;
295
296 text_tarray::const_iterator set1_here = set1.begin();
297 text_tarray::const_iterator set1_end = set1.end();
298
299 while (set1_here != set1_end) {
300 if (in_set (set2, *set1_here))
301 resultset.push_back (*set1_here);
302 ++set1_here;
303 }
304 set1 = resultset;
305}
306
307void intersect (text_tarray &set1, const text_tarray &set2) {
308 text_tarray resultset;
309
310 text_tarray::const_iterator set1_here = set1.begin();
311 text_tarray::const_iterator set1_end = set1.end();
312
313 while (set1_here != set1_end) {
314 if (in_set (set2, *set1_here))
315 resultset.push_back (*set1_here);
316 ++set1_here;
317 }
318 set1 = resultset;
319}
320
321// tests to see if el is in set
322bool in_set (const text_tset &set1, const text_t &el) {
323 text_t::const_iterator here = el.begin();
324 text_t::const_iterator end = el.end();
325 text_t tryel, tryel_add;
326 bool first = true;
327
328 // the element is in the set if any of its parents are
329 // in the set
330 do {
331 // get next possible element to try
332 here = getdelimitstr (here, end, '.', tryel_add);
333 if (!first) tryel += ".";
334 first = false;
335 tryel += tryel_add;
336
337 // see if this element is in the set
338 if (set1.find(tryel) != set1.end()) return true;
339 } while (here != end);
340
341 return false;
342}
343
344bool in_set (const text_tarray &set1, const text_t &el) {
345 text_t::const_iterator here = el.begin();
346 text_t::const_iterator end = el.end();
347 text_t tryel, tryel_add;
348 bool first = true;
349
350 // the element is in the set if any of its parents are
351 // in the set
352 do {
353 // get next possible element to try
354 here = getdelimitstr (here, end, '.', tryel_add);
355 if (!first) tryel += ".";
356 first = false;
357 tryel += tryel_add;
358
359 // see if this element is in the set
360 text_tarray::const_iterator h = set1.begin();
361 text_tarray::const_iterator e = set1.end();
362 while (h != e) {
363 if (*h == tryel) return true;
364 ++h;
365 }
366 } while (here != end);
367
368 return false;
369}
Note: See TracBrowser for help on using the repository browser.