source: gsdl/trunk/src/colservr/collectserver.cpp@ 16310

Last change on this file since 16310 was 16310, checked in by davidb, 16 years ago

Introduction of 'collecthome' which parallels 'gsdlhome' to allow the toplevel collect folder to be outside of the gsdlhome area

  • Property svn:keywords set to Author Date Id Revision
File size: 17.6 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "OIDtools.h"
29#include <assert.h>
30#include "display.h"
31
32
33collectserver::collectserver () {
34 configinfo.collection = "null";
35}
36
37collectserver::~collectserver () {
38
39 // clean up the sources
40 sourcelistclass::iterator source_here = sources.begin();
41 sourcelistclass::iterator source_end = sources.end();
42 while (source_here != source_end) {
43 if ((*source_here).s != NULL)
44 delete (*source_here).s;
45 ++source_here;
46 }
47 sources.clear();
48
49 // clean up the filters
50 filtermapclass::iterator filter_here = filters.begin();
51 filtermapclass::iterator filter_end = filters.end();
52 while (filter_here != filter_end) {
53 if ((*filter_here).second.f != NULL)
54 delete (*filter_here).second.f;
55 ++filter_here;
56 }
57 filters.clear();
58}
59
60// configure should be called for each line in the
61// configuration files to configure the collection server and everything
62// it contains. The configuration should take place just before initialisationhttps://www.google.com/accounts/ServiceLogin?service=mail&passive=true&rm=false&continue=http%3A%2F%2Fmail.google.com%2Fmail%3Fui%3Dhtml%26zy%3Dl&ltmpl=ca_tlsosm_video&ltmplcache=2&hl=en.
63void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
64 if (cfgline.size() >= 1) {
65 const text_t &value = cfgline[0];
66 if (key == "plugin")
67 {
68 //get the plugin name
69 const text_t &name = cfgline[0];
70
71 if (name == "HTMLPlug")
72 {
73 for (int hI = 1; hI < cfgline.size(); hI++)
74 {
75 const text_t &plugOption = cfgline[hI];
76
77 if (plugOption == "-tidy_html")
78 {
79 collectinfo.useBook = true;
80 break;
81 }
82 }
83 }
84 }
85 else if (key == "gsdlhome") configinfo.gsdlhome = value;
86 else if (key == "gdbmhome") configinfo.dbhome = value;
87 else if (key == "collecthome") configinfo.collecthome = value;
88 else if (key == "collection") {
89 configinfo.collection = value;
90 collectinfo.shortInfo.name = value;
91 }
92 else if (key == "collectdir") configinfo.collectdir = value;
93 else if (key == "host") collectinfo.shortInfo.host = value;
94 else if (key == "port") collectinfo.shortInfo.port = value.getint();
95 else if (key == "public") {
96 if (value == "true") collectinfo.isPublic = true;
97 else collectinfo.isPublic = false;
98 } else if (key == "beta") {
99 if (value == "true") collectinfo.isBeta = true;
100 else collectinfo.isBeta = false;
101 } else if (key == "collectgroup") {
102 if (value == "true") collectinfo.isCollectGroup = true;
103 else collectinfo.isCollectGroup = false;
104 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
105 else if (key == "supercollectionoptions") {
106 text_tarray::const_iterator begin = cfgline.begin();
107 text_tarray::const_iterator end = cfgline.end();
108 while(begin != end) {
109
110 if (*begin == "uniform_search_results_formatting") {
111 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
112 }
113 begin++;
114 }
115 }
116 else if (key == "builddate") collectinfo.buildDate = value.getint();
117 else if (key == "languages") collectinfo.languages = cfgline;
118 else if (key == "numdocs") collectinfo.numDocs = value.getint();
119 else if (key == "numsections") collectinfo.numSections = value.getint();
120 else if (key == "numwords") collectinfo.numWords = value.getint();
121 else if (key == "numbytes") collectinfo.numBytes = value.getint();
122 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
123 else if (key == "collectionmeta") {
124 // genuine collmeta get added as collectionmeta and collection_macros
125 // .collmeta just get added as collection_macros
126 text_t params;
127 if (cfgline.size() == 3) {
128 // get the params for later
129 text_t::const_iterator first=cfgline[1].begin()+1;
130 text_t::const_iterator last=cfgline[1].end()-1;
131 params=substr(first, last);
132 }
133
134 text_t meta_name = cfgline[0];
135 if (*(meta_name.begin())=='.') {
136 // a .xxx collectionmeta. strip off the . and
137 // look it up in the indexmap to get the actual value
138
139 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
140 text_t new_name;
141 if (indexmap.from2to(name, new_name)) {
142 meta_name = new_name;
143 }
144 } else {
145 // add them to collectionmeta
146 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
147 if (cfgline.size() == 2) {
148 lang_map[g_EmptyText] = cfgline[1];
149 } else if (cfgline.size() == 3 ) {
150 // get the lang out of params
151 paramhashtype params_hash;
152 splitparams(params, params_hash);
153
154 text_t lang = params_hash["l"];
155 lang_map[lang] = cfgline[2];
156 if (lang_map[g_EmptyText].empty()) {
157 // want the first one as the default if no default specified
158 lang_map[g_EmptyText] = cfgline[2];
159 }
160 }
161 collectinfo.collectionmeta[cfgline[0]] = lang_map;
162
163 }
164
165 // add all collectionmeta to macro list
166 text_tmap params_map = collectinfo.collection_macros[meta_name];
167
168 if (cfgline.size() == 2) {// no params for this macrohttp://uk.sports.yahoo.com/13032007/13/news-brief.html
169 params_map[g_EmptyText] = cfgline[1];
170 }
171 else if (cfgline.size() == 3) {// has params
172 params_map[params] = cfgline[2];
173 if (params_map[g_EmptyText].empty()) {
174 params_map[g_EmptyText] = cfgline[2];
175 }
176 }
177 collectinfo.collection_macros[meta_name] = params_map;
178 }
179 else if (key == "collectionmacro") {
180 text_t nobrackets;
181 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
182 // add all to macro list
183 if (cfgline.size() == 2) { // no params for this macro
184 params_map[g_EmptyText] = cfgline[1];
185 }
186 else if (cfgline.size() == 3) {// has params
187 // strip [ ] brackets from params
188 text_t::const_iterator first=cfgline[1].begin()+1;
189 text_t::const_iterator last=cfgline[1].end()-1;
190 nobrackets=substr(first, last);
191 params_map[nobrackets] = cfgline[2];
192 }
193 collectinfo.collection_macros[cfgline[0]] = params_map;
194
195 } else if (key == "format" && cfgline.size() == 2)
196 collectinfo.format[cfgline[0]] = cfgline[1];
197 else if (key == "building" && cfgline.size() == 2)
198 collectinfo.building[cfgline[0]] = cfgline[1];
199 else if (key == "httpdomain") collectinfo.httpdomain = value;
200 else if (key == "httpprefix") collectinfo.httpprefix = value;
201 else if (key == "receptionist") collectinfo.receptionist = value;
202 else if (key == "buildtype") collectinfo.buildType = value;
203 // backwards compatibility - searchytpes is now a format statement
204 else if (key == "searchtype") { // means buildtype is mgpp
205 if (collectinfo.buildType.empty()) {
206 collectinfo.buildType = "mgpp";
207 }
208 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
209 //collectinfo.searchTypes = cfgline;
210 }
211 else if (key == "separate_cjk") {
212 if (value == "true") collectinfo.isSegmented = true;
213 else collectinfo.isSegmented = false;
214 }
215 // What have we set in our collect.cfg file : document or collection ?
216 else if (key == "authenticate") collectinfo.authenticate = value;
217
218 // What have we set for our group list
219 else if (key == "auth_group") joinchar(cfgline,',',collectinfo.auth_group);
220
221 // store all the mappings for use when collection meta is read later
222 // (build.cfg read before collect.cfg)
223 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
224 indexmap.importmap (cfgline, true);
225
226 }
227 // In the map the key-value pair contain the same
228 // data i.e key == data, if key is 2 then data is 2
229
230 // What have we set for our public_documents ACL
231 else if (key == "public_documents")
232 {
233 text_tarray::const_iterator begin = cfgline.begin();
234 text_tarray::const_iterator end = cfgline.end();
235 while(begin != end)
236 {
237 // key = data i.e if key is 2 then data is 2
238 // collectinfo.public_documents[*begin] is the key
239 // *begin is the data value
240
241 collectinfo.public_documents[*begin] = *begin;
242 ++begin;
243 }
244 }
245
246 // What have we set for our private_documents ACL
247 else if (key == "private_documents")
248 {
249 text_tarray::const_iterator begin = cfgline.begin();
250 text_tarray::const_iterator end = cfgline.end();
251 while(begin != end)
252 {
253 // key = data i.e if key is 2 then data is 2
254 // collectinfo.public_documents[*begin] is the key
255 // *begin is the data value
256
257 collectinfo.private_documents[*begin] = *begin;
258 ++begin;
259 }
260 }
261
262 // dynamic_classifier <UniqueID> "<Options>"
263 else if (key == "dynamic_classifier")
264 {
265 collectinfo.dynamic_classifiers[cfgline[0]] = cfgline[1];
266 }
267 }
268
269 // configure the filters
270 filtermapclass::iterator filter_here = filters.begin();
271 filtermapclass::iterator filter_end = filters.end();
272 while (filter_here != filter_end) {
273 assert ((*filter_here).second.f != NULL);
274 if ((*filter_here).second.f != NULL)
275 (*filter_here).second.f->configure(key, cfgline);
276
277 ++filter_here;
278 }
279
280 // configure the sources
281 sourcelistclass::iterator source_here = sources.begin();
282 sourcelistclass::iterator source_end = sources.end();
283 while (source_here != source_end) {
284 assert ((*source_here).s != NULL);
285 if ((*source_here).s != NULL)
286 (*source_here).s->configure(key, cfgline);
287
288 ++source_here;
289 }
290}
291
292
293void collectserver::configure (const text_t &key, const text_t &value) {
294 text_tarray cfgline;
295 cfgline.push_back (value);
296 configure(key, cfgline);
297}
298
299void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
300 // if we've not been properly configured, then it is a foregone
301 // conclusion that we cannot be active
302 if (this->configinfo.collection == "null")
303 {
304 wasSuccess = false;
305 }
306 // if no build date exists, then the collection was probably not built;
307 // ditto if the number of documents is zero, then something is pretty
308 // wrong
309 else if (this->collectinfo.buildDate == 0 ||
310 this->collectinfo.numDocs == 0)
311 {
312 wasSuccess = false;
313 }
314 // it is probably okay
315 else
316 wasSuccess = true;
317}
318
319
320bool collectserver::init (ostream &logout) {
321 // delete the indexmap
322 indexmap.clear();
323
324 // init the filters
325 filtermapclass::iterator filter_here = filters.begin();
326 filtermapclass::iterator filter_end = filters.end();
327 while (filter_here != filter_end) {
328 assert ((*filter_here).second.f != NULL);
329 if (((*filter_here).second.f != NULL) &&
330 !(*filter_here).second.f->init(logout)) return false;
331
332 ++filter_here;
333 }
334
335 // init the sources
336 sourcelistclass::iterator source_here = sources.begin();
337 sourcelistclass::iterator source_end = sources.end();
338 while (source_here != source_end) {
339 assert ((*source_here).s != NULL);
340 if (((*source_here).s != NULL) &&
341 !(*source_here).s->init(logout)) return false;
342
343 ++source_here;
344 }
345
346 return true;
347}
348
349
350void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
351 comerror_t &err, ostream &/*logout*/) {
352 reponse = collectinfo;
353 err = noError;
354}
355
356void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
357 comerror_t &err, ostream &/*logout*/) {
358 response.clear ();
359
360 // get a list of filter names
361 filtermapclass::iterator filter_here = filters.begin();
362 filtermapclass::iterator filter_end = filters.end();
363 while (filter_here != filter_end) {
364 response.filterNames.insert ((*filter_here).first);
365 ++filter_here;
366 }
367
368 err = noError;
369}
370
371void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
372 InfoFilterOptionsResponse_t &response,
373 comerror_t &err, ostream &logout) {
374 outconvertclass text_t2ascii;
375
376 filterclass *thisfilter = filters.getfilter(request.filterName);
377 if (thisfilter != NULL) {
378 thisfilter->get_filteroptions (response, err, logout);
379 } else {
380 response.clear ();
381 err = protocolError;
382 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
383 << "filter \"" << request.filterName << "\".\n\n";
384 }
385}
386
387void collectserver::filter (FilterRequest_t &request,
388 FilterResponse_t &response,
389 comerror_t &err, ostream &logout) {
390 outconvertclass text_t2ascii;
391
392 // translate any ".fc", ".pr" etc. stuff in the docSet
393 text_t translatedOID;
394 text_tarray translatedOIDs;
395 text_tarray::iterator doc_here = request.docSet.begin();
396 text_tarray::iterator doc_end = request.docSet.end();
397 while (doc_here != doc_end) {
398 if (needs_translating (*doc_here)) {
399 sourcelistclass::iterator source_here = sources.begin();
400 sourcelistclass::iterator source_end = sources.end();
401 while (source_here != source_end) {
402 assert ((*source_here).s != NULL);
403 if (((*source_here).s != NULL) &&
404 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
405 if (err != noError) return;
406 break;
407 }
408 ++source_here;
409 }
410 translatedOIDs.push_back (translatedOID);
411 } else {
412 translatedOIDs.push_back (*doc_here);
413 }
414 ++doc_here;
415 }
416 request.docSet = translatedOIDs;
417
418 response.clear();
419
420 filterclass *thisfilter = filters.getfilter(request.filterName);
421 if (thisfilter != NULL) {
422 // filter the data
423 thisfilter->filter (request, response, err, logout);
424 if (err != noError) return;
425 // fill in the metadata for each of the OIDs (if it is requested)
426 if (request.filterResultOptions & FRmetadata) {
427 bool processed = false;
428 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
429 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
430 while (resultdoc_here != resultdoc_end) {
431 // try each of the sources in turn
432 sourcelistclass::iterator source_here = sources.begin();
433 sourcelistclass::iterator source_end = sources.end();
434 while (source_here != source_end) {
435 assert ((*source_here).s != NULL);
436 if (((*source_here).s != NULL) &&
437 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
438 request.getParents, request.fields,
439 (*resultdoc_here).OID, (*resultdoc_here).metadata,
440 err, logout))) {
441 if (err != noError) return;
442 processed = true;
443 break;
444 }
445 ++source_here;
446 }
447 if (!processed) {
448
449 logout << text_t2ascii << "Protocol Error: nothing processed for "
450 << "filter \"" << request.filterName << "\".\n\n";
451
452 err = protocolError;
453 return;
454 }
455 ++resultdoc_here;
456 }
457 }
458
459 err = noError;
460 }
461 else
462 {
463 response.clear ();
464 err = protocolError;
465 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
466 << "filter \"" << request.filterName << "\".\n\n";
467 }
468}
469
470void collectserver::get_document (const DocumentRequest_t &request,
471 DocumentResponse_t &response,
472 comerror_t &err, ostream &logout) {
473
474 sourcelistclass::iterator source_here = sources.begin();
475 sourcelistclass::iterator source_end = sources.end();
476 while (source_here != source_end) {
477 assert ((*source_here).s != NULL);
478 if (((*source_here).s != NULL) &&
479 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
480 if (err != noError) return;
481 break;
482 }
483 ++source_here;
484 }
485}
486
487void collectserver::is_searchable (bool &issearchable, comerror_t &err,
488 ostream &logout) {
489
490 sourcelistclass::iterator source_here = sources.begin();
491 sourcelistclass::iterator source_end = sources.end();
492 while (source_here != source_end) {
493 assert ((*source_here).s != NULL);
494 if (((*source_here).s != NULL) &&
495 ((*source_here).s->is_searchable (issearchable, err, logout))) {
496 if (err != noError) return;
497 break;
498 }
499 ++source_here;
500 }
501}
502
503
504bool operator==(const collectserverptr &x, const collectserverptr &y) {
505 return (x.c == y.c);
506}
507
508bool operator<(const collectserverptr &x, const collectserverptr &y) {
509 return (x.c < y.c);
510}
511
512
513// thecollectserver remains the property of the calling code but
514// should not be deleted until it is removed from this list.
515void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
516 // can't add a null collection server
517 assert (thecollectserver != NULL);
518 if (thecollectserver == NULL) return;
519
520 // can't add an collection server with no collection name
521 assert (!(thecollectserver->get_collection_name()).empty());
522 if ((thecollectserver->get_collection_name()).empty()) return;
523
524 collectserverptr cptr;
525 cptr.c = thecollectserver;
526 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
527}
528
529// getcollectserver will return NULL if the collectserver could not be found
530collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
531 // can't find a collection with no name
532 if (collection.empty()) return NULL;
533
534 iterator here = collectserverptrs.find (collection);
535 if (here == collectserverptrs.end()) return NULL;
536
537 return (*here).second.c;
538}
Note: See TracBrowser for help on using the repository browser.