source: main/tags/2.71/gsdl/src/colservr/collectserver.cpp@ 25382

Last change on this file since 25382 was 12867, checked in by kjdon, 18 years ago

use stemindexes from build.cfg to set colinfo stemIndexes value

  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "infodbclass.h"
29#include "OIDtools.h"
30#include <assert.h>
31#include "display.h"
32
33
34collectserver::collectserver () {
35 configinfo.collection = "null";
36}
37
38collectserver::~collectserver () {
39
40 // clean up the sources
41 sourcelistclass::iterator source_here = sources.begin();
42 sourcelistclass::iterator source_end = sources.end();
43 while (source_here != source_end) {
44 if ((*source_here).s != NULL)
45 delete (*source_here).s;
46 ++source_here;
47 }
48 sources.clear();
49
50 // clean up the filters
51 filtermapclass::iterator filter_here = filters.begin();
52 filtermapclass::iterator filter_end = filters.end();
53 while (filter_here != filter_end) {
54 if ((*filter_here).second.f != NULL)
55 delete (*filter_here).second.f;
56 ++filter_here;
57 }
58 filters.clear();
59}
60
61// configure should be called for each line in the
62// configuration files to configure the collection server and everything
63// it contains. The configuration should take place just before initialisation.
64void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
65 if (cfgline.size() >= 1) {
66 const text_t &value = cfgline[0];
67 if (key == "gsdlhome") configinfo.gsdlhome = value;
68 else if (key == "gdbmhome") configinfo.gdbmhome = value;
69 else if (key == "collection") {
70 configinfo.collection = value;
71 collectinfo.shortInfo.name = value;
72 } else if (key == "collectdir") configinfo.collectdir = value;
73 else if (key == "host") collectinfo.shortInfo.host = value;
74 else if (key == "port") collectinfo.shortInfo.port = value.getint();
75 else if (key == "public") {
76 if (value == "true") collectinfo.isPublic = true;
77 else collectinfo.isPublic = false;
78 } else if (key == "beta") {
79 if (value == "true") collectinfo.isBeta = true;
80 else collectinfo.isBeta = false;
81 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
82 else if (key == "supercollectionoptions") {
83 text_tarray::const_iterator begin = cfgline.begin();
84 text_tarray::const_iterator end = cfgline.end();
85 while(begin != end) {
86
87 if (*begin == "uniform_search_results_formatting") {
88 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
89 }
90 begin++;
91 }
92 }
93 else if (key == "builddate") collectinfo.buildDate = value.getint();
94 else if (key == "languages") collectinfo.languages = cfgline;
95 else if (key == "numdocs") collectinfo.numDocs = value.getint();
96 else if (key == "numsections") collectinfo.numSections = value.getint();
97 else if (key == "numwords") collectinfo.numWords = value.getint();
98 else if (key == "numbytes") collectinfo.numBytes = value.getint();
99 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
100 else if (key == "collectionmeta") {
101 // genuine collmeta get added as collectionmeta and collection_macros
102 // .collmeta just get added as collection_macros
103 text_t params;
104 if (cfgline.size() == 3) {
105 // get the params for later
106 text_t::const_iterator first=cfgline[1].begin()+1;
107 text_t::const_iterator last=cfgline[1].end()-1;
108 params=substr(first, last);
109 }
110
111 text_t meta_name = cfgline[0];
112 if (*(meta_name.begin())=='.') {
113 // a .xxx collectionmeta. strip off the . and
114 // look it up in the indexmap to get the actual value
115
116 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
117 text_t new_name;
118 if (indexmap.from2to(name, new_name)) {
119 meta_name = new_name;
120 }
121 } else {
122 // add them to collectionmeta
123 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
124 if (cfgline.size() == 2) {
125 lang_map[g_EmptyText] = cfgline[1];
126 } else if (cfgline.size() == 3 ) {
127 // get the lang out of params
128 paramhashtype params_hash;
129 splitparams(params, params_hash);
130
131 text_t lang = params_hash["l"];
132 lang_map[lang] = cfgline[2];
133 if (lang_map[g_EmptyText].empty()) {
134 // want the first one as the default if no default specified
135 lang_map[g_EmptyText] = cfgline[2];
136 }
137 }
138 collectinfo.collectionmeta[cfgline[0]] = lang_map;
139
140 }
141
142 // add all collectionmeta to macro list
143 text_tmap params_map = collectinfo.collection_macros[meta_name];
144
145 if (cfgline.size() == 2) {// no params for this macro
146 params_map[g_EmptyText] = cfgline[1];
147 }
148 else if (cfgline.size() == 3) {// has params
149 params_map[params] = cfgline[2];
150 if (params_map[g_EmptyText].empty()) {
151 params_map[g_EmptyText] = cfgline[2];
152 }
153 }
154 collectinfo.collection_macros[meta_name] = params_map;
155 }
156 else if (key == "collectionmacro") {
157 text_t nobrackets;
158 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
159 // add all to macro list
160 if (cfgline.size() == 2) { // no params for this macro
161 params_map[g_EmptyText] = cfgline[1];
162 }
163 else if (cfgline.size() == 3) {// has params
164 // strip [ ] brackets from params
165 text_t::const_iterator first=cfgline[1].begin()+1;
166 text_t::const_iterator last=cfgline[1].end()-1;
167 nobrackets=substr(first, last);
168 params_map[nobrackets] = cfgline[2];
169 }
170 collectinfo.collection_macros[cfgline[0]] = params_map;
171
172 } else if (key == "format" && cfgline.size() == 2)
173 collectinfo.format[cfgline[0]] = cfgline[1];
174 else if (key == "building" && cfgline.size() == 2)
175 collectinfo.building[cfgline[0]] = cfgline[1];
176 else if (key == "httpdomain") collectinfo.httpdomain = value;
177 else if (key == "httpprefix") collectinfo.httpprefix = value;
178 else if (key == "receptionist") collectinfo.receptionist = value;
179 else if (key == "buildtype") collectinfo.buildType = value;
180 // backwards compatibility - searchytpes is now a format statement
181 else if (key == "searchtype") { // means buildtype is mgpp
182 if (collectinfo.buildType.empty()) {
183 collectinfo.buildType = "mgpp";
184 }
185 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
186 //collectinfo.searchTypes = cfgline;
187 }
188 else if (key == "separate_cjk") {
189 if (value == "true") collectinfo.isSegmented = true;
190 else collectinfo.isSegmented = false;
191 }
192 // What have we set in our collect.cfg file : document or collection ?
193 else if (key == "authenticate") collectinfo.authenticate = value;
194
195 // What have we set for our group list
196 else if (key == "auth_group") joinchar(cfgline,',',collectinfo.auth_group);
197
198 // store all the mappings for use when collection meta is read later
199 // (build.cfg read before collect.cfg)
200 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
201 indexmap.importmap (cfgline, true);
202
203 }
204 // In the map the key-value pair contain the same
205 // data i.e key == data, if key is 2 then data is 2
206
207 // What have we set for our public_documents ACL
208 else if (key == "public_documents")
209 {
210 text_tarray::const_iterator begin = cfgline.begin();
211 text_tarray::const_iterator end = cfgline.end();
212 while(begin != end)
213 {
214 // key = data i.e if key is 2 then data is 2
215 // collectinfo.public_documents[*begin] is the key
216 // *begin is the data value
217
218 collectinfo.public_documents[*begin] = *begin;
219 ++begin;
220 }
221 }
222
223 // What have we set for our private_documents ACL
224 else if (key == "private_documents")
225 {
226 text_tarray::const_iterator begin = cfgline.begin();
227 text_tarray::const_iterator end = cfgline.end();
228 while(begin != end)
229 {
230 // key = data i.e if key is 2 then data is 2
231 // collectinfo.public_documents[*begin] is the key
232 // *begin is the data value
233
234 collectinfo.private_documents[*begin] = *begin;
235 ++begin;
236 }
237 }
238 }
239
240 // configure the filters
241 filtermapclass::iterator filter_here = filters.begin();
242 filtermapclass::iterator filter_end = filters.end();
243 while (filter_here != filter_end) {
244 assert ((*filter_here).second.f != NULL);
245 if ((*filter_here).second.f != NULL)
246 (*filter_here).second.f->configure(key, cfgline);
247
248 ++filter_here;
249 }
250
251 // configure the sources
252 sourcelistclass::iterator source_here = sources.begin();
253 sourcelistclass::iterator source_end = sources.end();
254 while (source_here != source_end) {
255 assert ((*source_here).s != NULL);
256 if ((*source_here).s != NULL)
257 (*source_here).s->configure(key, cfgline);
258
259 ++source_here;
260 }
261}
262
263
264void collectserver::configure (const text_t &key, const text_t &value) {
265 text_tarray cfgline;
266 cfgline.push_back (value);
267 configure(key, cfgline);
268}
269
270void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
271 // if we've not been properly configured, then it is a foregone
272 // conclusion that we cannot be active
273 if (this->configinfo.collection == "null")
274 {
275 wasSuccess = false;
276 }
277 // if no build date exists, then the collection was probably not built;
278 // ditto if the number of documents is zero, then something is pretty
279 // wrong
280 else if (this->collectinfo.buildDate == 0 ||
281 this->collectinfo.numDocs == 0)
282 {
283 wasSuccess = false;
284 }
285 // it is probably okay
286 else
287 wasSuccess = true;
288}
289
290
291bool collectserver::init (ostream &logout) {
292 // delete the indexmap
293 indexmap.clear();
294
295 // init the filters
296 filtermapclass::iterator filter_here = filters.begin();
297 filtermapclass::iterator filter_end = filters.end();
298 while (filter_here != filter_end) {
299 assert ((*filter_here).second.f != NULL);
300 if (((*filter_here).second.f != NULL) &&
301 !(*filter_here).second.f->init(logout)) return false;
302
303 ++filter_here;
304 }
305
306 // init the sources
307 sourcelistclass::iterator source_here = sources.begin();
308 sourcelistclass::iterator source_end = sources.end();
309 while (source_here != source_end) {
310 assert ((*source_here).s != NULL);
311 if (((*source_here).s != NULL) &&
312 !(*source_here).s->init(logout)) return false;
313
314 ++source_here;
315 }
316
317 return true;
318}
319
320
321void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
322 comerror_t &err, ostream &/*logout*/) {
323 reponse = collectinfo;
324 err = noError;
325}
326
327void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
328 comerror_t &err, ostream &/*logout*/) {
329 response.clear ();
330
331 // get a list of filter names
332 filtermapclass::iterator filter_here = filters.begin();
333 filtermapclass::iterator filter_end = filters.end();
334 while (filter_here != filter_end) {
335 response.filterNames.insert ((*filter_here).first);
336 ++filter_here;
337 }
338
339 err = noError;
340}
341
342void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
343 InfoFilterOptionsResponse_t &response,
344 comerror_t &err, ostream &logout) {
345 outconvertclass text_t2ascii;
346
347 filterclass *thisfilter = filters.getfilter(request.filterName);
348 if (thisfilter != NULL) {
349 thisfilter->get_filteroptions (response, err, logout);
350 } else {
351 response.clear ();
352 err = protocolError;
353 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
354 << "filter \"" << request.filterName << "\".\n\n";
355 }
356}
357
358void collectserver::filter (FilterRequest_t &request,
359 FilterResponse_t &response,
360 comerror_t &err, ostream &logout) {
361 outconvertclass text_t2ascii;
362
363 // translate any ".fc", ".pr" etc. stuff in the docSet
364 text_t translatedOID;
365 text_tarray translatedOIDs;
366 text_tarray::iterator doc_here = request.docSet.begin();
367 text_tarray::iterator doc_end = request.docSet.end();
368 while (doc_here != doc_end) {
369 if (needs_translating (*doc_here)) {
370 sourcelistclass::iterator source_here = sources.begin();
371 sourcelistclass::iterator source_end = sources.end();
372 while (source_here != source_end) {
373 assert ((*source_here).s != NULL);
374 if (((*source_here).s != NULL) &&
375 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
376 if (err != noError) return;
377 break;
378 }
379 ++source_here;
380 }
381 translatedOIDs.push_back (translatedOID);
382 } else {
383 translatedOIDs.push_back (*doc_here);
384 }
385 ++doc_here;
386 }
387 request.docSet = translatedOIDs;
388
389 response.clear();
390
391 filterclass *thisfilter = filters.getfilter(request.filterName);
392 if (thisfilter != NULL) {
393 // filter the data
394 thisfilter->filter (request, response, err, logout);
395 if (err != noError) return;
396 // fill in the metadata for each of the OIDs (if it is requested)
397 if (request.filterResultOptions & FRmetadata) {
398 bool processed = false;
399 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
400 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
401 while (resultdoc_here != resultdoc_end) {
402 // try each of the sources in turn
403 sourcelistclass::iterator source_here = sources.begin();
404 sourcelistclass::iterator source_end = sources.end();
405 while (source_here != source_end) {
406 assert ((*source_here).s != NULL);
407 if (((*source_here).s != NULL) &&
408 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
409 request.getParents, request.fields,
410 (*resultdoc_here).OID, (*resultdoc_here).metadata,
411 err, logout))) {
412 if (err != noError) return;
413 processed = true;
414 break;
415 }
416 ++source_here;
417 }
418 if (!processed) {
419
420 logout << text_t2ascii << "Protocol Error: nothing processed for "
421 << "filter \"" << request.filterName << "\".\n\n";
422
423 err = protocolError;
424 return;
425 }
426 ++resultdoc_here;
427 }
428 }
429
430 } else {
431 response.clear ();
432 err = protocolError;
433 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
434 << "filter \"" << request.filterName << "\".\n\n";
435 }
436
437 err = noError;
438}
439
440void collectserver::get_document (const DocumentRequest_t &request,
441 DocumentResponse_t &response,
442 comerror_t &err, ostream &logout) {
443
444 sourcelistclass::iterator source_here = sources.begin();
445 sourcelistclass::iterator source_end = sources.end();
446 while (source_here != source_end) {
447 assert ((*source_here).s != NULL);
448 if (((*source_here).s != NULL) &&
449 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
450 if (err != noError) return;
451 break;
452 }
453 ++source_here;
454 }
455}
456
457void collectserver::is_searchable (bool &issearchable, comerror_t &err,
458 ostream &logout) {
459
460 sourcelistclass::iterator source_here = sources.begin();
461 sourcelistclass::iterator source_end = sources.end();
462 while (source_here != source_end) {
463 assert ((*source_here).s != NULL);
464 if (((*source_here).s != NULL) &&
465 ((*source_here).s->is_searchable (issearchable, err, logout))) {
466 if (err != noError) return;
467 break;
468 }
469 ++source_here;
470 }
471}
472
473
474bool operator==(const collectserverptr &x, const collectserverptr &y) {
475 return (x.c == y.c);
476}
477
478bool operator<(const collectserverptr &x, const collectserverptr &y) {
479 return (x.c < y.c);
480}
481
482
483// thecollectserver remains the property of the calling code but
484// should not be deleted until it is removed from this list.
485void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
486 // can't add a null collection server
487 assert (thecollectserver != NULL);
488 if (thecollectserver == NULL) return;
489
490 // can't add an collection server with no collection name
491 assert (!(thecollectserver->get_collection_name()).empty());
492 if ((thecollectserver->get_collection_name()).empty()) return;
493
494 collectserverptr cptr;
495 cptr.c = thecollectserver;
496 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
497}
498
499// getcollectserver will return NULL if the collectserver could not be found
500collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
501 // can't find a collection with no name
502 if (collection.empty()) return NULL;
503
504 iterator here = collectserverptrs.find (collection);
505 if (here == collectserverptrs.end()) return NULL;
506
507 return (*here).second.c;
508}
Note: See TracBrowser for help on using the repository browser.