1 |
|
---|
2 | /**********************************************************************
|
---|
3 | *
|
---|
4 | * collectserver.cpp --
|
---|
5 | * Copyright (C) 1999 The New Zealand Digital Library Project
|
---|
6 | *
|
---|
7 | * A component of the Greenstone digital library software
|
---|
8 | * from the New Zealand Digital Library Project at the
|
---|
9 | * University of Waikato, New Zealand.
|
---|
10 | *
|
---|
11 | * This program is free software; you can redistribute it and/or modify
|
---|
12 | * it under the terms of the GNU General Public License as published by
|
---|
13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
14 | * (at your option) any later version.
|
---|
15 | *
|
---|
16 | * This program is distributed in the hope that it will be useful,
|
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | * GNU General Public License for more details.
|
---|
20 | *
|
---|
21 | * You should have received a copy of the GNU General Public License
|
---|
22 | * along with this program; if not, write to the Free Software
|
---|
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | *
|
---|
25 | *********************************************************************/
|
---|
26 |
|
---|
27 | #include "collectserver.h"
|
---|
28 | #include "OIDtools.h"
|
---|
29 | #include <assert.h>
|
---|
30 | #include "display.h"
|
---|
31 |
|
---|
32 | void check_if_valid_buildtype(const text_t& buildtype)
|
---|
33 | {
|
---|
34 | if (buildtype=="mg") {
|
---|
35 | #ifndef ENABLE_MG
|
---|
36 | cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mg'." << endl;
|
---|
37 | #endif
|
---|
38 | }
|
---|
39 |
|
---|
40 | else if (buildtype=="mgpp") {
|
---|
41 | #ifndef ENABLE_MGPP
|
---|
42 | cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'mgpp'." << endl;
|
---|
43 | #endif
|
---|
44 | }
|
---|
45 |
|
---|
46 | else if (buildtype=="lucene") {
|
---|
47 | #ifndef ENABLE_LUCENE
|
---|
48 | cerr << "Warning: Greenstone installation has not been compiled to support buildtype 'lucene'." << endl;
|
---|
49 | #endif
|
---|
50 | }
|
---|
51 |
|
---|
52 | else {
|
---|
53 | cerr << "Error: buildtype '" << buildtype << "' is not a recognized indexer for Greenstone." << endl;
|
---|
54 | }
|
---|
55 |
|
---|
56 | }
|
---|
57 |
|
---|
58 |
|
---|
59 | void check_if_valid_infodbtype(const text_t& infodbtype)
|
---|
60 | {
|
---|
61 | if (infodbtype=="gdbm") {
|
---|
62 | #ifndef USE_GDBM
|
---|
63 | cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm'." << endl;
|
---|
64 | #endif
|
---|
65 | }
|
---|
66 | else if (infodbtype=="gdbm-txtgz") {
|
---|
67 | #ifndef USE_GDBM
|
---|
68 | cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'gdbm-txtgz'." << endl;
|
---|
69 | #endif
|
---|
70 | }
|
---|
71 | else if (infodbtype=="jdbm") {
|
---|
72 | #ifndef USE_JDBM
|
---|
73 | cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'jdbm'." << endl;
|
---|
74 | #endif
|
---|
75 | }
|
---|
76 | else if (infodbtype=="sqlite") {
|
---|
77 | #ifndef USE_SQLITE
|
---|
78 | cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'sqlite'." << endl;
|
---|
79 | #endif
|
---|
80 | }
|
---|
81 | else if (infodbtype=="mssql") {
|
---|
82 | #ifndef USE_MSSQL
|
---|
83 | cerr << "Warning: Greenstone installation has not been compiled to support infodbtype 'mssql'." << endl;
|
---|
84 | #endif
|
---|
85 | }
|
---|
86 |
|
---|
87 | else {
|
---|
88 | cerr << "Error: infodbtype '" << infodbtype << "' is not a recognized database type for Greenstone." << endl;
|
---|
89 | }
|
---|
90 |
|
---|
91 | }
|
---|
92 |
|
---|
93 |
|
---|
94 |
|
---|
95 | collectserver::collectserver ()
|
---|
96 | : collectinfo()
|
---|
97 | {
|
---|
98 | configinfo.collection = "null";
|
---|
99 | }
|
---|
100 |
|
---|
101 | collectserver::~collectserver () {
|
---|
102 |
|
---|
103 | // clean up the sources
|
---|
104 | sourcelistclass::iterator source_here = sources.begin();
|
---|
105 | sourcelistclass::iterator source_end = sources.end();
|
---|
106 | while (source_here != source_end) {
|
---|
107 | if ((*source_here).s != NULL)
|
---|
108 | delete (*source_here).s;
|
---|
109 | ++source_here;
|
---|
110 | }
|
---|
111 | sources.clear();
|
---|
112 |
|
---|
113 | // clean up the filters
|
---|
114 | filtermapclass::iterator filter_here = filters.begin();
|
---|
115 | filtermapclass::iterator filter_end = filters.end();
|
---|
116 | while (filter_here != filter_end) {
|
---|
117 | if ((*filter_here).second.f != NULL)
|
---|
118 | delete (*filter_here).second.f;
|
---|
119 | ++filter_here;
|
---|
120 | }
|
---|
121 | filters.clear();
|
---|
122 | }
|
---|
123 |
|
---|
124 | // configure should be called for each line in the
|
---|
125 | // configuration files to configure the collection server and everything
|
---|
126 | // it contains. The configuration should take place just before initialisation
|
---|
127 | void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
|
---|
128 | if (cfgline.size() >= 1) {
|
---|
129 | const text_t &value = cfgline[0];
|
---|
130 | if (key == "plugin")
|
---|
131 | {
|
---|
132 | //get the plugin name
|
---|
133 | const text_t &name = cfgline[0];
|
---|
134 |
|
---|
135 | if (name == "HTMLPlugin" || name== "PDFPlugin")
|
---|
136 | {
|
---|
137 | for (int hI = 1; hI < cfgline.size(); hI++)
|
---|
138 | {
|
---|
139 | const text_t &plugOption = cfgline[hI];
|
---|
140 |
|
---|
141 | if (plugOption == "-use_realistic_book")
|
---|
142 | {
|
---|
143 | collectinfo.useBook = true;
|
---|
144 | break;
|
---|
145 | }
|
---|
146 | }
|
---|
147 | }
|
---|
148 | }
|
---|
149 | else if (key == "gsdlhome") configinfo.gsdlhome = value;
|
---|
150 | else if (key == "gdbmhome") configinfo.dbhome = value;
|
---|
151 | else if (key == "collecthome") configinfo.collecthome = value;
|
---|
152 | else if (key == "collection") {
|
---|
153 | configinfo.collection = value;
|
---|
154 | collectinfo.shortInfo.name = value;
|
---|
155 | }
|
---|
156 | else if (key == "collectdir") configinfo.collectdir = value;
|
---|
157 | else if (key == "host") collectinfo.shortInfo.host = value;
|
---|
158 | else if (key == "port") collectinfo.shortInfo.port = value.getint();
|
---|
159 | else if (key == "public") {
|
---|
160 | if (value == "true") collectinfo.isPublic = true;
|
---|
161 | else collectinfo.isPublic = false;
|
---|
162 | } else if (key == "beta") {
|
---|
163 | if (value == "true") collectinfo.isBeta = true;
|
---|
164 | else collectinfo.isBeta = false;
|
---|
165 | } else if (key == "collectgroup") {
|
---|
166 | if (value == "true") collectinfo.isCollectGroup = true;
|
---|
167 | else collectinfo.isCollectGroup = false;
|
---|
168 | } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
|
---|
169 | else if (key == "supercollectionoptions") {
|
---|
170 | text_tarray::const_iterator begin = cfgline.begin();
|
---|
171 | text_tarray::const_iterator end = cfgline.end();
|
---|
172 | while(begin != end) {
|
---|
173 |
|
---|
174 | if (*begin == "uniform_search_results_formatting") {
|
---|
175 | collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
|
---|
176 | }
|
---|
177 | begin++;
|
---|
178 | }
|
---|
179 | }
|
---|
180 | else if (key == "builddate") collectinfo.buildDate = value.getint();
|
---|
181 | else if (key == "languages") collectinfo.languages = cfgline;
|
---|
182 | else if (key == "numdocs") collectinfo.numDocs = value.getint();
|
---|
183 | else if (key == "numsections") collectinfo.numSections = value.getint();
|
---|
184 | else if (key == "numwords") collectinfo.numWords = value.getint();
|
---|
185 | else if (key == "numbytes") collectinfo.numBytes = value.getint();
|
---|
186 | else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
|
---|
187 | else if (key == "collectionmeta") {
|
---|
188 | // genuine collmeta get added as collectionmeta and collection_macros
|
---|
189 | // .collmeta just get added as collection_macros
|
---|
190 | text_t params;
|
---|
191 | if (cfgline.size() == 3) {
|
---|
192 | // get the params for later
|
---|
193 | text_t::const_iterator first=cfgline[1].begin()+1;
|
---|
194 | text_t::const_iterator last=cfgline[1].end()-1;
|
---|
195 | params=substr(first, last);
|
---|
196 | }
|
---|
197 |
|
---|
198 | text_t meta_name = cfgline[0];
|
---|
199 | if (*(meta_name.begin())=='.') {
|
---|
200 | // a .xxx collectionmeta. strip off the . and
|
---|
201 | // look it up in the indexmap to get the actual value
|
---|
202 |
|
---|
203 | text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
|
---|
204 | text_t new_name;
|
---|
205 |
|
---|
206 | // Now that GLI has been fixed to deal with ex. prefixes, and modelcol's collect.cfg does not contain
|
---|
207 | // Greenstone ex.* meta in the "collectionmeta" section, we won't encounter ex.* in collectionmeta here.
|
---|
208 | // So we should not remove any "ex." prefixes here, since collectionmeta does not contain ex.* but it can
|
---|
209 | // contain ex.dc.* type metadata, which will need to have their ex. prefix preserved for matching below.
|
---|
210 |
|
---|
211 | if (indexmap.from2to(name, new_name)) {
|
---|
212 | meta_name = new_name;
|
---|
213 | }
|
---|
214 | } else {
|
---|
215 | // add them to collectionmeta
|
---|
216 | text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
|
---|
217 | if (cfgline.size() == 2) {
|
---|
218 | lang_map[g_EmptyText] = cfgline[1];
|
---|
219 | } else if (cfgline.size() == 3 ) {
|
---|
220 | // get the lang out of params
|
---|
221 | paramhashtype params_hash;
|
---|
222 | splitparams(params, params_hash);
|
---|
223 |
|
---|
224 | text_t lang = params_hash["l"];
|
---|
225 | lang_map[lang] = cfgline[2];
|
---|
226 | if (lang_map[g_EmptyText].empty()) {
|
---|
227 | // want the first one as the default if no default specified
|
---|
228 | lang_map[g_EmptyText] = cfgline[2];
|
---|
229 | }
|
---|
230 | }
|
---|
231 | collectinfo.collectionmeta[cfgline[0]] = lang_map;
|
---|
232 |
|
---|
233 | }
|
---|
234 |
|
---|
235 | // add all collectionmeta to macro list
|
---|
236 | text_tmap params_map = collectinfo.collection_macros[meta_name];
|
---|
237 |
|
---|
238 | if (cfgline.size() == 2) {// no params for this macro
|
---|
239 | params_map[g_EmptyText] = cfgline[1];
|
---|
240 | }
|
---|
241 | else if (cfgline.size() == 3) {// has params
|
---|
242 | params_map[params] = cfgline[2];
|
---|
243 | if (params_map[g_EmptyText].empty()) {
|
---|
244 | params_map[g_EmptyText] = cfgline[2];
|
---|
245 | }
|
---|
246 | }
|
---|
247 | collectinfo.collection_macros[meta_name] = params_map;
|
---|
248 | }
|
---|
249 | else if (key == "collectionmacro") {
|
---|
250 | text_t nobrackets;
|
---|
251 | text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
|
---|
252 | // add all to macro list
|
---|
253 | if (cfgline.size() == 2) { // no params for this macro
|
---|
254 | params_map[g_EmptyText] = cfgline[1];
|
---|
255 | }
|
---|
256 | else if (cfgline.size() == 3) {// has params
|
---|
257 | // strip [ ] brackets from params
|
---|
258 | text_t::const_iterator first=cfgline[1].begin()+1;
|
---|
259 | text_t::const_iterator last=cfgline[1].end()-1;
|
---|
260 | nobrackets=substr(first, last);
|
---|
261 | params_map[nobrackets] = cfgline[2];
|
---|
262 | }
|
---|
263 | collectinfo.collection_macros[cfgline[0]] = params_map;
|
---|
264 |
|
---|
265 | } else if (key == "format" && cfgline.size() == 2)
|
---|
266 | collectinfo.format[cfgline[0]] = cfgline[1];
|
---|
267 | else if (key == "building" && cfgline.size() == 2)
|
---|
268 | collectinfo.building[cfgline[0]] = cfgline[1];
|
---|
269 | else if (key == "httpdomain") collectinfo.httpdomain = value;
|
---|
270 | else if (key == "httpprefix") collectinfo.httpprefix = value;
|
---|
271 | else if (key == "receptionist") collectinfo.receptionist = value;
|
---|
272 | else if (key == "buildtype") {
|
---|
273 | check_if_valid_buildtype(value); // prints warning if value (indexer) is invalid
|
---|
274 | collectinfo.buildType = value;
|
---|
275 | }
|
---|
276 | // backwards compatibility - searchytpes is now a format statement
|
---|
277 | else if (key == "searchtype") { // means buildtype is mgpp
|
---|
278 | if (collectinfo.buildType.empty()) {
|
---|
279 | check_if_valid_buildtype("mgpp"); // prints warning if value (indexer) is invalid
|
---|
280 | collectinfo.buildType = "mgpp";
|
---|
281 | }
|
---|
282 | joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
|
---|
283 | //collectinfo.searchTypes = cfgline;
|
---|
284 | }
|
---|
285 | else if (key == "infodbtype") {
|
---|
286 | check_if_valid_infodbtype(value); // prints warning if value (database type) is invalid
|
---|
287 | collectinfo.infodbType = value;
|
---|
288 | }
|
---|
289 | else if (key == "separate_cjk") {
|
---|
290 | if (value == "true") collectinfo.isSegmented = true;
|
---|
291 | else collectinfo.isSegmented = false;
|
---|
292 | }
|
---|
293 | // What have we set in our collect.cfg file : document or collection ?
|
---|
294 | else if (key == "authenticate") collectinfo.authenticate = value;
|
---|
295 |
|
---|
296 | // What have we set for our group list
|
---|
297 | else if ((key == "auth_group") || (key == "auth_groups")) joinchar(cfgline,',',collectinfo.auth_group);
|
---|
298 |
|
---|
299 | // build.cfg, earliestDatestamp of this collection needed for
|
---|
300 | // OAIServer to work out earliestDatestamp of this repository
|
---|
301 | else if (key == "earliestdatestamp") {
|
---|
302 | collectinfo.earliestDatestamp = cfgline[0]; // get it from build.cfg
|
---|
303 | }
|
---|
304 |
|
---|
305 | // store all the mappings for use when collection meta is read later
|
---|
306 | // (build.cfg read before collect.cfg)
|
---|
307 | else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
|
---|
308 | indexmap.importmap (cfgline, true);
|
---|
309 |
|
---|
310 | }
|
---|
311 | // In the map the key-value pair contain the same
|
---|
312 | // data i.e key == data, if key is 2 then data is 2
|
---|
313 |
|
---|
314 | // What have we set for our public_documents ACL
|
---|
315 | else if (key == "public_documents")
|
---|
316 | {
|
---|
317 | text_tarray::const_iterator begin = cfgline.begin();
|
---|
318 | text_tarray::const_iterator end = cfgline.end();
|
---|
319 | while(begin != end)
|
---|
320 | {
|
---|
321 | // key = data i.e if key is 2 then data is 2
|
---|
322 | // collectinfo.public_documents[*begin] is the key
|
---|
323 | // *begin is the data value
|
---|
324 |
|
---|
325 | collectinfo.public_documents[*begin] = *begin;
|
---|
326 | ++begin;
|
---|
327 | }
|
---|
328 | }
|
---|
329 |
|
---|
330 | // What have we set for our private_documents ACL
|
---|
331 | else if (key == "private_documents")
|
---|
332 | {
|
---|
333 | text_tarray::const_iterator begin = cfgline.begin();
|
---|
334 | text_tarray::const_iterator end = cfgline.end();
|
---|
335 | while(begin != end)
|
---|
336 | {
|
---|
337 | // key = data i.e if key is 2 then data is 2
|
---|
338 | // collectinfo.public_documents[*begin] is the key
|
---|
339 | // *begin is the data value
|
---|
340 |
|
---|
341 | collectinfo.private_documents[*begin] = *begin;
|
---|
342 | ++begin;
|
---|
343 | }
|
---|
344 | }
|
---|
345 |
|
---|
346 | // dynamic_classifier <UniqueID> "<Options>"
|
---|
347 | else if (key == "dynamic_classifier")
|
---|
348 | {
|
---|
349 | collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
|
---|
350 | }
|
---|
351 | }
|
---|
352 |
|
---|
353 | // configure the filters
|
---|
354 | filtermapclass::iterator filter_here = filters.begin();
|
---|
355 | filtermapclass::iterator filter_end = filters.end();
|
---|
356 | while (filter_here != filter_end) {
|
---|
357 | assert ((*filter_here).second.f != NULL);
|
---|
358 | if ((*filter_here).second.f != NULL)
|
---|
359 | (*filter_here).second.f->configure(key, cfgline);
|
---|
360 |
|
---|
361 | ++filter_here;
|
---|
362 | }
|
---|
363 |
|
---|
364 | // configure the sources
|
---|
365 | sourcelistclass::iterator source_here = sources.begin();
|
---|
366 | sourcelistclass::iterator source_end = sources.end();
|
---|
367 | while (source_here != source_end) {
|
---|
368 | assert ((*source_here).s != NULL);
|
---|
369 | if ((*source_here).s != NULL)
|
---|
370 | (*source_here).s->configure(key, cfgline);
|
---|
371 |
|
---|
372 | ++source_here;
|
---|
373 | }
|
---|
374 | }
|
---|
375 |
|
---|
376 |
|
---|
377 | void collectserver::configure (const text_t &key, const text_t &value) {
|
---|
378 | text_tarray cfgline;
|
---|
379 | cfgline.push_back (value);
|
---|
380 | configure(key, cfgline);
|
---|
381 | }
|
---|
382 |
|
---|
383 | void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
|
---|
384 | // if we've not been properly configured, then it is a foregone
|
---|
385 | // conclusion that we cannot be active
|
---|
386 | if (this->configinfo.collection == "null")
|
---|
387 | {
|
---|
388 | wasSuccess = false;
|
---|
389 | }
|
---|
390 | // if no build date exists, then the collection was probably not built;
|
---|
391 | // ditto if the number of documents is zero, then something is pretty
|
---|
392 | // wrong
|
---|
393 | else if (this->collectinfo.buildDate == 0 ||
|
---|
394 | this->collectinfo.numDocs == 0)
|
---|
395 | {
|
---|
396 | wasSuccess = false;
|
---|
397 | }
|
---|
398 | // it is probably okay
|
---|
399 | else
|
---|
400 | wasSuccess = true;
|
---|
401 | }
|
---|
402 |
|
---|
403 |
|
---|
404 | bool collectserver::init (ostream &logout) {
|
---|
405 | // delete the indexmap
|
---|
406 | indexmap.clear();
|
---|
407 |
|
---|
408 | // init the filters
|
---|
409 | filtermapclass::iterator filter_here = filters.begin();
|
---|
410 | filtermapclass::iterator filter_end = filters.end();
|
---|
411 | while (filter_here != filter_end) {
|
---|
412 | assert ((*filter_here).second.f != NULL);
|
---|
413 | if (((*filter_here).second.f != NULL) &&
|
---|
414 | !(*filter_here).second.f->init(logout)) return false;
|
---|
415 |
|
---|
416 | ++filter_here;
|
---|
417 | }
|
---|
418 |
|
---|
419 | // init the sources
|
---|
420 | sourcelistclass::iterator source_here = sources.begin();
|
---|
421 | sourcelistclass::iterator source_end = sources.end();
|
---|
422 | while (source_here != source_end) {
|
---|
423 | assert ((*source_here).s != NULL);
|
---|
424 | if (((*source_here).s != NULL) &&
|
---|
425 | !(*source_here).s->init(logout)) return false;
|
---|
426 |
|
---|
427 | ++source_here;
|
---|
428 | }
|
---|
429 |
|
---|
430 | return true;
|
---|
431 | }
|
---|
432 |
|
---|
433 |
|
---|
434 | void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
|
---|
435 | comerror_t &err, ostream &/*logout*/) {
|
---|
436 | reponse = collectinfo;
|
---|
437 | err = noError;
|
---|
438 | }
|
---|
439 |
|
---|
440 | void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
|
---|
441 | comerror_t &err, ostream &/*logout*/) {
|
---|
442 | response.clear ();
|
---|
443 |
|
---|
444 | // get a list of filter names
|
---|
445 | filtermapclass::iterator filter_here = filters.begin();
|
---|
446 | filtermapclass::iterator filter_end = filters.end();
|
---|
447 | while (filter_here != filter_end) {
|
---|
448 | response.filterNames.insert ((*filter_here).first);
|
---|
449 | ++filter_here;
|
---|
450 | }
|
---|
451 |
|
---|
452 | err = noError;
|
---|
453 | }
|
---|
454 |
|
---|
455 | void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
|
---|
456 | InfoFilterOptionsResponse_t &response,
|
---|
457 | comerror_t &err, ostream &logout) {
|
---|
458 | outconvertclass text_t2ascii;
|
---|
459 |
|
---|
460 | filterclass *thisfilter = filters.getfilter(request.filterName);
|
---|
461 | if (thisfilter != NULL) {
|
---|
462 | thisfilter->get_filteroptions (response, err, logout);
|
---|
463 | } else {
|
---|
464 | response.clear ();
|
---|
465 | err = protocolError;
|
---|
466 | text_t& infodbtype = collectinfo.infodbType;
|
---|
467 |
|
---|
468 | // Don't print out the warning if were's asking about SQLQueryFilter
|
---|
469 | // when we know the infodbtype is something other than .*sql.*
|
---|
470 |
|
---|
471 | if ((request.filterName != "SQLQueryFilter")
|
---|
472 | || (findword(infodbtype.begin(),infodbtype.end(),"sql") != infodbtype.end())) {
|
---|
473 | logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
|
---|
474 | << "filter \"" << request.filterName << "\".\n\n";
|
---|
475 | }
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | void collectserver::filter (FilterRequest_t &request,
|
---|
480 | FilterResponse_t &response,
|
---|
481 | comerror_t &err, ostream &logout) {
|
---|
482 | outconvertclass text_t2ascii;
|
---|
483 |
|
---|
484 | // translate any ".fc", ".pr" etc. stuff in the docSet
|
---|
485 | text_t translatedOID;
|
---|
486 | text_tarray translatedOIDs;
|
---|
487 | text_tarray::iterator doc_here = request.docSet.begin();
|
---|
488 | text_tarray::iterator doc_end = request.docSet.end();
|
---|
489 | while (doc_here != doc_end) {
|
---|
490 | if (needs_translating (*doc_here)) {
|
---|
491 | sourcelistclass::iterator source_here = sources.begin();
|
---|
492 | sourcelistclass::iterator source_end = sources.end();
|
---|
493 | while (source_here != source_end) {
|
---|
494 | assert ((*source_here).s != NULL);
|
---|
495 | if (((*source_here).s != NULL) &&
|
---|
496 | ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
|
---|
497 | if (err != noError) return;
|
---|
498 | break;
|
---|
499 | }
|
---|
500 | ++source_here;
|
---|
501 | }
|
---|
502 | translatedOIDs.push_back (translatedOID);
|
---|
503 | } else {
|
---|
504 | translatedOIDs.push_back (*doc_here);
|
---|
505 | }
|
---|
506 | ++doc_here;
|
---|
507 | }
|
---|
508 | request.docSet = translatedOIDs;
|
---|
509 |
|
---|
510 | response.clear();
|
---|
511 |
|
---|
512 | filterclass *thisfilter = filters.getfilter(request.filterName);
|
---|
513 | if (thisfilter != NULL) {
|
---|
514 | // filter the data
|
---|
515 | thisfilter->filter (request, response, err, logout);
|
---|
516 | if (err != noError) return;
|
---|
517 |
|
---|
518 | // fill in the metadata for each of the OIDs (if it is requested)
|
---|
519 | if (request.filterResultOptions & FRmetadata) {
|
---|
520 |
|
---|
521 | bool processed = false;
|
---|
522 | ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
|
---|
523 | ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
|
---|
524 | while (resultdoc_here != resultdoc_end) {
|
---|
525 |
|
---|
526 | text_t deleted_status = "";
|
---|
527 | bool append_metadata = (request.filterResultOptions & FROAI) ? true : false;
|
---|
528 |
|
---|
529 | // try each of the sources in turn
|
---|
530 | sourcelistclass::iterator source_here = sources.begin();
|
---|
531 | sourcelistclass::iterator source_end = sources.end();
|
---|
532 | while (source_here != source_end) {
|
---|
533 | assert ((*source_here).s != NULL);
|
---|
534 |
|
---|
535 | // first check for oai metadata from the oai_db, if asked for it (if FROAI is set)
|
---|
536 | if(((*source_here).s != NULL) &&
|
---|
537 | request.filterResultOptions & FROAI &&
|
---|
538 | ((*source_here).s->get_oai_metadata(request.requestParams, request.refParams,
|
---|
539 | request.getParents, request.fields,
|
---|
540 | (*resultdoc_here).OID, deleted_status, (*resultdoc_here).metadata,
|
---|
541 | err, logout))) {
|
---|
542 |
|
---|
543 | if (err != noError) return;
|
---|
544 |
|
---|
545 | processed = true;
|
---|
546 | }
|
---|
547 |
|
---|
548 | // We may or may not have got oai_meta (depends on if FROAI was set).
|
---|
549 | // If we didn't get oai_meta, then deleted_status would still be "".
|
---|
550 | // If we did get oai_meta, and if the deleted_status for the OID was D for deleted entry,
|
---|
551 | // don't bother getting any other metadata, as there will be no entry for that OID in index db.
|
---|
552 |
|
---|
553 | // Note that if we did get oai_meta and OID marked as existing, we're in append_mode:
|
---|
554 | // don't let get_metadata() clear the metadata list, as there's already stuff in there
|
---|
555 | //if(deleted_status == "E") append_metadata = true;
|
---|
556 |
|
---|
557 | if (((*source_here).s != NULL) &&
|
---|
558 | deleted_status != "D" &&
|
---|
559 | ((*source_here).s->get_metadata(request.requestParams, request.refParams,
|
---|
560 | request.getParents, request.fields,
|
---|
561 | (*resultdoc_here).OID, (*resultdoc_here).metadata,
|
---|
562 | err, logout, append_metadata))) {
|
---|
563 | if (err != noError) return; // check for errors again
|
---|
564 |
|
---|
565 | processed = processed || true; // processed would not have been set yet if not doing FROAI. Set now.
|
---|
566 | // OR-ing isn't necessary, but indicates some consideration of both get oai meta & get meta success
|
---|
567 | }
|
---|
568 |
|
---|
569 | if(processed) break;
|
---|
570 |
|
---|
571 | ++source_here;
|
---|
572 | }
|
---|
573 | if (!processed) {
|
---|
574 |
|
---|
575 | logout << text_t2ascii << "Protocol Error: nothing processed for "
|
---|
576 | << "filter \"" << request.filterName << "\".\n\n";
|
---|
577 |
|
---|
578 | err = protocolError;
|
---|
579 | return;
|
---|
580 | }
|
---|
581 | ++resultdoc_here;
|
---|
582 | }
|
---|
583 | }
|
---|
584 |
|
---|
585 | err = noError;
|
---|
586 | }
|
---|
587 | else
|
---|
588 | {
|
---|
589 | response.clear ();
|
---|
590 | err = protocolError;
|
---|
591 | logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
|
---|
592 | << "filter \"" << request.filterName << "\".\n\n";
|
---|
593 | }
|
---|
594 | }
|
---|
595 |
|
---|
596 | void collectserver::get_document (const DocumentRequest_t &request,
|
---|
597 | DocumentResponse_t &response,
|
---|
598 | comerror_t &err, ostream &logout) {
|
---|
599 |
|
---|
600 | sourcelistclass::iterator source_here = sources.begin();
|
---|
601 | sourcelistclass::iterator source_end = sources.end();
|
---|
602 | while (source_here != source_end) {
|
---|
603 | assert ((*source_here).s != NULL);
|
---|
604 | if (((*source_here).s != NULL) &&
|
---|
605 | ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
|
---|
606 | if (err != noError) return;
|
---|
607 | break;
|
---|
608 | }
|
---|
609 | ++source_here;
|
---|
610 | }
|
---|
611 | }
|
---|
612 |
|
---|
613 | void collectserver::is_searchable (bool &issearchable, comerror_t &err,
|
---|
614 | ostream &logout) {
|
---|
615 |
|
---|
616 | sourcelistclass::iterator source_here = sources.begin();
|
---|
617 | sourcelistclass::iterator source_end = sources.end();
|
---|
618 | while (source_here != source_end) {
|
---|
619 | assert ((*source_here).s != NULL);
|
---|
620 | if (((*source_here).s != NULL) &&
|
---|
621 | ((*source_here).s->is_searchable (issearchable, err, logout))) {
|
---|
622 | if (err != noError) return;
|
---|
623 | break;
|
---|
624 | }
|
---|
625 | ++source_here;
|
---|
626 | }
|
---|
627 | }
|
---|
628 |
|
---|
629 |
|
---|
630 | bool operator==(const collectserverptr &x, const collectserverptr &y) {
|
---|
631 | return (x.c == y.c);
|
---|
632 | }
|
---|
633 |
|
---|
634 | bool operator<(const collectserverptr &x, const collectserverptr &y) {
|
---|
635 | return (x.c < y.c);
|
---|
636 | }
|
---|
637 |
|
---|
638 |
|
---|
639 | // thecollectserver remains the property of the calling code but
|
---|
640 | // should not be deleted until it is removed from this list.
|
---|
641 | void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
|
---|
642 | // can't add a null collection server
|
---|
643 | assert (thecollectserver != NULL);
|
---|
644 | if (thecollectserver == NULL) return;
|
---|
645 |
|
---|
646 | // can't add an collection server with no collection name
|
---|
647 | assert (!(thecollectserver->get_collection_name()).empty());
|
---|
648 | if ((thecollectserver->get_collection_name()).empty()) return;
|
---|
649 |
|
---|
650 | collectserverptr cptr;
|
---|
651 | cptr.c = thecollectserver;
|
---|
652 | collectserverptrs[thecollectserver->get_collection_name()] = cptr;
|
---|
653 | }
|
---|
654 |
|
---|
655 | // getcollectserver will return NULL if the collectserver could not be found
|
---|
656 | collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
|
---|
657 | // can't find a collection with no name
|
---|
658 | if (collection.empty()) return NULL;
|
---|
659 |
|
---|
660 | iterator here = collectserverptrs.find (collection);
|
---|
661 | if (here == collectserverptrs.end()) return NULL;
|
---|
662 |
|
---|
663 | return (*here).second.c;
|
---|
664 | }
|
---|