source: gsdl/trunk/runtime-src/src/colservr/collectserver.cpp@ 17863

Last change on this file since 17863 was 17863, checked in by anna, 15 years ago

change the name tidy_html switch in HTMLPlug into use_realistic_book in HTMLPlugin

  • Property svn:keywords set to Author Date Id Revision
File size: 17.4 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32
33collectserver::collectserver () {
34 configinfo.collection = "null";
35}
36
37collectserver::~collectserver () {
38
39 // clean up the sources
40 sourcelistclass::iterator source_here = sources.begin();
41 sourcelistclass::iterator source_end = sources.end();
42 while (source_here != source_end) {
43 if ((*source_here).s != NULL)
44 delete (*source_here).s;
45 ++source_here;
46 }
47 sources.clear();
48
49 // clean up the filters
50 filtermapclass::iterator filter_here = filters.begin();
51 filtermapclass::iterator filter_end = filters.end();
52 while (filter_here != filter_end) {
53 if ((*filter_here).second.f != NULL)
54 delete (*filter_here).second.f;
55 ++filter_here;
56 }
57 filters.clear();
58}
59
60// configure should be called for each line in the
61// configuration files to configure the collection server and everything
62// it contains. The configuration should take place just before initialisation
63void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
64 if (cfgline.size() >= 1) {
65 const text_t &value = cfgline[0];
66 if (key == "plugin")
67 {
68 //get the plugin name
69 const text_t &name = cfgline[0];
70
71 if (name == "HTMLPlugin")
72 {
73 for (int hI = 1; hI < cfgline.size(); hI++)
74 {
75 const text_t &plugOption = cfgline[hI];
76
77 if (plugOption == "-use_realistic_book")
78 {
79 collectinfo.useBook = true;
80 break;
81 }
82 }
83 }
84 }
85 else if (key == "gsdlhome") configinfo.gsdlhome = value;
86 else if (key == "gdbmhome") configinfo.dbhome = value;
87 else if (key == "collecthome") configinfo.collecthome = value;
88 else if (key == "collection") {
89 configinfo.collection = value;
90 collectinfo.shortInfo.name = value;
91 }
92 else if (key == "collectdir") configinfo.collectdir = value;
93 else if (key == "host") collectinfo.shortInfo.host = value;
94 else if (key == "port") collectinfo.shortInfo.port = value.getint();
95 else if (key == "public") {
96 if (value == "true") collectinfo.isPublic = true;
97 else collectinfo.isPublic = false;
98 } else if (key == "beta") {
99 if (value == "true") collectinfo.isBeta = true;
100 else collectinfo.isBeta = false;
101 } else if (key == "collectgroup") {
102 if (value == "true") collectinfo.isCollectGroup = true;
103 else collectinfo.isCollectGroup = false;
104 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
105 else if (key == "supercollectionoptions") {
106 text_tarray::const_iterator begin = cfgline.begin();
107 text_tarray::const_iterator end = cfgline.end();
108 while(begin != end) {
109
110 if (*begin == "uniform_search_results_formatting") {
111 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
112 }
113 begin++;
114 }
115 }
116 else if (key == "builddate") collectinfo.buildDate = value.getint();
117 else if (key == "languages") collectinfo.languages = cfgline;
118 else if (key == "numdocs") collectinfo.numDocs = value.getint();
119 else if (key == "numsections") collectinfo.numSections = value.getint();
120 else if (key == "numwords") collectinfo.numWords = value.getint();
121 else if (key == "numbytes") collectinfo.numBytes = value.getint();
122 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
123 else if (key == "collectionmeta") {
124 // genuine collmeta get added as collectionmeta and collection_macros
125 // .collmeta just get added as collection_macros
126 text_t params;
127 if (cfgline.size() == 3) {
128 // get the params for later
129 text_t::const_iterator first=cfgline[1].begin()+1;
130 text_t::const_iterator last=cfgline[1].end()-1;
131 params=substr(first, last);
132 }
133
134 text_t meta_name = cfgline[0];
135 if (*(meta_name.begin())=='.') {
136 // a .xxx collectionmeta. strip off the . and
137 // look it up in the indexmap to get the actual value
138
139 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
140 text_t new_name;
141 if (indexmap.from2to(name, new_name)) {
142 meta_name = new_name;
143 }
144 } else {
145 // add them to collectionmeta
146 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
147 if (cfgline.size() == 2) {
148 lang_map[g_EmptyText] = cfgline[1];
149 } else if (cfgline.size() == 3 ) {
150 // get the lang out of params
151 paramhashtype params_hash;
152 splitparams(params, params_hash);
153
154 text_t lang = params_hash["l"];
155 lang_map[lang] = cfgline[2];
156 if (lang_map[g_EmptyText].empty()) {
157 // want the first one as the default if no default specified
158 lang_map[g_EmptyText] = cfgline[2];
159 }
160 }
161 collectinfo.collectionmeta[cfgline[0]] = lang_map;
162
163 }
164
165 // add all collectionmeta to macro list
166 text_tmap params_map = collectinfo.collection_macros[meta_name];
167
168 if (cfgline.size() == 2) {// no params for this macro
169 params_map[g_EmptyText] = cfgline[1];
170 }
171 else if (cfgline.size() == 3) {// has params
172 params_map[params] = cfgline[2];
173 if (params_map[g_EmptyText].empty()) {
174 params_map[g_EmptyText] = cfgline[2];
175 }
176 }
177 collectinfo.collection_macros[meta_name] = params_map;
178 }
179 else if (key == "collectionmacro") {
180 text_t nobrackets;
181 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
182 // add all to macro list
183 if (cfgline.size() == 2) { // no params for this macro
184 params_map[g_EmptyText] = cfgline[1];
185 }
186 else if (cfgline.size() == 3) {// has params
187 // strip [ ] brackets from params
188 text_t::const_iterator first=cfgline[1].begin()+1;
189 text_t::const_iterator last=cfgline[1].end()-1;
190 nobrackets=substr(first, last);
191 params_map[nobrackets] = cfgline[2];
192 }
193 collectinfo.collection_macros[cfgline[0]] = params_map;
194
195 } else if (key == "format" && cfgline.size() == 2)
196 collectinfo.format[cfgline[0]] = cfgline[1];
197 else if (key == "building" && cfgline.size() == 2)
198 collectinfo.building[cfgline[0]] = cfgline[1];
199 else if (key == "httpdomain") collectinfo.httpdomain = value;
200 else if (key == "httpprefix") collectinfo.httpprefix = value;
201 else if (key == "receptionist") collectinfo.receptionist = value;
202 else if (key == "buildtype") collectinfo.buildType = value;
203 // backwards compatibility - searchytpes is now a format statement
204 else if (key == "searchtype") { // means buildtype is mgpp
205 if (collectinfo.buildType.empty()) {
206 collectinfo.buildType = "mgpp";
207 }
208 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
209 //collectinfo.searchTypes = cfgline;
210 }
211 else if (key == "separate_cjk") {
212 if (value == "true") collectinfo.isSegmented = true;
213 else collectinfo.isSegmented = false;
214 }
215 // What have we set in our collect.cfg file : document or collection ?
216 else if (key == "authenticate") collectinfo.authenticate = value;
217
218 // What have we set for our group list
219 else if (key == "auth_group") joinchar(cfgline,',',collectinfo.auth_group);
220
221 // store all the mappings for use when collection meta is read later
222 // (build.cfg read before collect.cfg)
223 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
224 indexmap.importmap (cfgline, true);
225
226 }
227 // In the map the key-value pair contain the same
228 // data i.e key == data, if key is 2 then data is 2
229
230 // What have we set for our public_documents ACL
231 else if (key == "public_documents")
232 {
233 text_tarray::const_iterator begin = cfgline.begin();
234 text_tarray::const_iterator end = cfgline.end();
235 while(begin != end)
236 {
237 // key = data i.e if key is 2 then data is 2
238 // collectinfo.public_documents[*begin] is the key
239 // *begin is the data value
240
241 collectinfo.public_documents[*begin] = *begin;
242 ++begin;
243 }
244 }
245
246 // What have we set for our private_documents ACL
247 else if (key == "private_documents")
248 {
249 text_tarray::const_iterator begin = cfgline.begin();
250 text_tarray::const_iterator end = cfgline.end();
251 while(begin != end)
252 {
253 // key = data i.e if key is 2 then data is 2
254 // collectinfo.public_documents[*begin] is the key
255 // *begin is the data value
256
257 collectinfo.private_documents[*begin] = *begin;
258 ++begin;
259 }
260 }
261
262 // dynamic_classifier <UniqueID> "<Options>"
263 else if (key == "dynamic_classifier")
264 {
265 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
266 }
267 }
268
269 // configure the filters
270 filtermapclass::iterator filter_here = filters.begin();
271 filtermapclass::iterator filter_end = filters.end();
272 while (filter_here != filter_end) {
273 assert ((*filter_here).second.f != NULL);
274 if ((*filter_here).second.f != NULL)
275 (*filter_here).second.f->configure(key, cfgline);
276
277 ++filter_here;
278 }
279
280 // configure the sources
281 sourcelistclass::iterator source_here = sources.begin();
282 sourcelistclass::iterator source_end = sources.end();
283 while (source_here != source_end) {
284 assert ((*source_here).s != NULL);
285 if ((*source_here).s != NULL)
286 (*source_here).s->configure(key, cfgline);
287
288 ++source_here;
289 }
290}
291
292
293void collectserver::configure (const text_t &key, const text_t &value) {
294 text_tarray cfgline;
295 cfgline.push_back (value);
296 configure(key, cfgline);
297}
298
299void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
300 // if we've not been properly configured, then it is a foregone
301 // conclusion that we cannot be active
302 if (this->configinfo.collection == "null")
303 {
304 wasSuccess = false;
305 }
306 // if no build date exists, then the collection was probably not built;
307 // ditto if the number of documents is zero, then something is pretty
308 // wrong
309 else if (this->collectinfo.buildDate == 0 ||
310 this->collectinfo.numDocs == 0)
311 {
312 wasSuccess = false;
313 }
314 // it is probably okay
315 else
316 wasSuccess = true;
317}
318
319
320bool collectserver::init (ostream &logout) {
321 // delete the indexmap
322 indexmap.clear();
323
324 // init the filters
325 filtermapclass::iterator filter_here = filters.begin();
326 filtermapclass::iterator filter_end = filters.end();
327 while (filter_here != filter_end) {
328 assert ((*filter_here).second.f != NULL);
329 if (((*filter_here).second.f != NULL) &&
330 !(*filter_here).second.f->init(logout)) return false;
331
332 ++filter_here;
333 }
334
335 // init the sources
336 sourcelistclass::iterator source_here = sources.begin();
337 sourcelistclass::iterator source_end = sources.end();
338 while (source_here != source_end) {
339 assert ((*source_here).s != NULL);
340 if (((*source_here).s != NULL) &&
341 !(*source_here).s->init(logout)) return false;
342
343 ++source_here;
344 }
345
346 return true;
347}
348
349
350void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
351 comerror_t &err, ostream &/*logout*/) {
352 reponse = collectinfo;
353 err = noError;
354}
355
356void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
357 comerror_t &err, ostream &/*logout*/) {
358 response.clear ();
359
360 // get a list of filter names
361 filtermapclass::iterator filter_here = filters.begin();
362 filtermapclass::iterator filter_end = filters.end();
363 while (filter_here != filter_end) {
364 response.filterNames.insert ((*filter_here).first);
365 ++filter_here;
366 }
367
368 err = noError;
369}
370
371void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
372 InfoFilterOptionsResponse_t &response,
373 comerror_t &err, ostream &logout) {
374 outconvertclass text_t2ascii;
375
376 filterclass *thisfilter = filters.getfilter(request.filterName);
377 if (thisfilter != NULL) {
378 thisfilter->get_filteroptions (response, err, logout);
379 } else {
380 response.clear ();
381 err = protocolError;
382 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
383 << "filter \"" << request.filterName << "\".\n\n";
384 }
385}
386
387void collectserver::filter (FilterRequest_t &request,
388 FilterResponse_t &response,
389 comerror_t &err, ostream &logout) {
390 outconvertclass text_t2ascii;
391
392 // translate any ".fc", ".pr" etc. stuff in the docSet
393 text_t translatedOID;
394 text_tarray translatedOIDs;
395 text_tarray::iterator doc_here = request.docSet.begin();
396 text_tarray::iterator doc_end = request.docSet.end();
397 while (doc_here != doc_end) {
398 if (needs_translating (*doc_here)) {
399 sourcelistclass::iterator source_here = sources.begin();
400 sourcelistclass::iterator source_end = sources.end();
401 while (source_here != source_end) {
402 assert ((*source_here).s != NULL);
403 if (((*source_here).s != NULL) &&
404 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
405 if (err != noError) return;
406 break;
407 }
408 ++source_here;
409 }
410 translatedOIDs.push_back (translatedOID);
411 } else {
412 translatedOIDs.push_back (*doc_here);
413 }
414 ++doc_here;
415 }
416 request.docSet = translatedOIDs;
417
418 response.clear();
419
420 filterclass *thisfilter = filters.getfilter(request.filterName);
421 if (thisfilter != NULL) {
422 // filter the data
423 thisfilter->filter (request, response, err, logout);
424 if (err != noError) return;
425 // fill in the metadata for each of the OIDs (if it is requested)
426 if (request.filterResultOptions & FRmetadata) {
427 bool processed = false;
428 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
429 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
430 while (resultdoc_here != resultdoc_end) {
431 // try each of the sources in turn
432 sourcelistclass::iterator source_here = sources.begin();
433 sourcelistclass::iterator source_end = sources.end();
434 while (source_here != source_end) {
435 assert ((*source_here).s != NULL);
436 if (((*source_here).s != NULL) &&
437 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
438 request.getParents, request.fields,
439 (*resultdoc_here).OID, (*resultdoc_here).metadata,
440 err, logout))) {
441 if (err != noError) return;
442 processed = true;
443 break;
444 }
445 ++source_here;
446 }
447 if (!processed) {
448
449 logout << text_t2ascii << "Protocol Error: nothing processed for "
450 << "filter \"" << request.filterName << "\".\n\n";
451
452 err = protocolError;
453 return;
454 }
455 ++resultdoc_here;
456 }
457 }
458
459 err = noError;
460 }
461 else
462 {
463 response.clear ();
464 err = protocolError;
465 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
466 << "filter \"" << request.filterName << "\".\n\n";
467 }
468}
469
470void collectserver::get_document (const DocumentRequest_t &request,
471 DocumentResponse_t &response,
472 comerror_t &err, ostream &logout) {
473
474 sourcelistclass::iterator source_here = sources.begin();
475 sourcelistclass::iterator source_end = sources.end();
476 while (source_here != source_end) {
477 assert ((*source_here).s != NULL);
478 if (((*source_here).s != NULL) &&
479 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
480 if (err != noError) return;
481 break;
482 }
483 ++source_here;
484 }
485}
486
487void collectserver::is_searchable (bool &issearchable, comerror_t &err,
488 ostream &logout) {
489
490 sourcelistclass::iterator source_here = sources.begin();
491 sourcelistclass::iterator source_end = sources.end();
492 while (source_here != source_end) {
493 assert ((*source_here).s != NULL);
494 if (((*source_here).s != NULL) &&
495 ((*source_here).s->is_searchable (issearchable, err, logout))) {
496 if (err != noError) return;
497 break;
498 }
499 ++source_here;
500 }
501}
502
503
504bool operator==(const collectserverptr &x, const collectserverptr &y) {
505 return (x.c == y.c);
506}
507
508bool operator<(const collectserverptr &x, const collectserverptr &y) {
509 return (x.c < y.c);
510}
511
512
513// thecollectserver remains the property of the calling code but
514// should not be deleted until it is removed from this list.
515void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
516 // can't add a null collection server
517 assert (thecollectserver != NULL);
518 if (thecollectserver == NULL) return;
519
520 // can't add an collection server with no collection name
521 assert (!(thecollectserver->get_collection_name()).empty());
522 if ((thecollectserver->get_collection_name()).empty()) return;
523
524 collectserverptr cptr;
525 cptr.c = thecollectserver;
526 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
527}
528
529// getcollectserver will return NULL if the collectserver could not be found
530collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
531 // can't find a collection with no name
532 if (collection.empty()) return NULL;
533
534 iterator here = collectserverptrs.find (collection);
535 if (here == collectserverptrs.end()) return NULL;
536
537 return (*here).second.c;
538}
Note: See TracBrowser for help on using the repository browser.