source: gsdl/tags/gsdl-2_75-distribution/src/colservr/collectserver.cpp@ 18481

Last change on this file since 18481 was 13982, checked in by lh92, 17 years ago

Added UseBook variable for Realistic Book

  • Property svn:keywords set to Author Date Id Revision
File size: 17.3 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "infodbclass.h"
29#include "OIDtools.h"
30#include <assert.h>
31#include "display.h"
32
33
34collectserver::collectserver () {
35 configinfo.collection = "null";
36}
37
38collectserver::~collectserver () {
39
40 // clean up the sources
41 sourcelistclass::iterator source_here = sources.begin();
42 sourcelistclass::iterator source_end = sources.end();
43 while (source_here != source_end) {
44 if ((*source_here).s != NULL)
45 delete (*source_here).s;
46 ++source_here;
47 }
48 sources.clear();
49
50 // clean up the filters
51 filtermapclass::iterator filter_here = filters.begin();
52 filtermapclass::iterator filter_end = filters.end();
53 while (filter_here != filter_end) {
54 if ((*filter_here).second.f != NULL)
55 delete (*filter_here).second.f;
56 ++filter_here;
57 }
58 filters.clear();
59}
60
61// configure should be called for each line in the
62// configuration files to configure the collection server and everything
63// it contains. The configuration should take place just before initialisationhttps://www.google.com/accounts/ServiceLogin?service=mail&passive=true&rm=false&continue=http%3A%2F%2Fmail.google.com%2Fmail%3Fui%3Dhtml%26zy%3Dl&ltmpl=ca_tlsosm_video&ltmplcache=2&hl=en.
64void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
65 if (cfgline.size() >= 1) {
66 const text_t &value = cfgline[0];
67 if (key == "plugin")
68 {
69 //get the plugin name
70 const text_t &name = cfgline[0];
71
72 if (name == "HTMLPlug")
73 {
74 for (int hI = 1; hI < cfgline.size(); hI++)
75 {
76 const text_t &plugOption = cfgline[hI];
77
78 if (plugOption == "-tidy_html")
79 {
80 collectinfo.useBook = true;
81 break;
82 }
83 }
84 }
85 }
86 else if (key == "gsdlhome") configinfo.gsdlhome = value;
87 else if (key == "gdbmhome") configinfo.gdbmhome = value;
88 else if (key == "collection") {
89 configinfo.collection = value;
90 collectinfo.shortInfo.name = value;
91 } else if (key == "collectdir") configinfo.collectdir = value;
92 else if (key == "host") collectinfo.shortInfo.host = value;
93 else if (key == "port") collectinfo.shortInfo.port = value.getint();
94 else if (key == "public") {
95 if (value == "true") collectinfo.isPublic = true;
96 else collectinfo.isPublic = false;
97 } else if (key == "beta") {
98 if (value == "true") collectinfo.isBeta = true;
99 else collectinfo.isBeta = false;
100 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
101 else if (key == "supercollectionoptions") {
102 text_tarray::const_iterator begin = cfgline.begin();
103 text_tarray::const_iterator end = cfgline.end();
104 while(begin != end) {
105
106 if (*begin == "uniform_search_results_formatting") {
107 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
108 }
109 begin++;
110 }
111 }
112 else if (key == "builddate") collectinfo.buildDate = value.getint();
113 else if (key == "languages") collectinfo.languages = cfgline;
114 else if (key == "numdocs") collectinfo.numDocs = value.getint();
115 else if (key == "numsections") collectinfo.numSections = value.getint();
116 else if (key == "numwords") collectinfo.numWords = value.getint();
117 else if (key == "numbytes") collectinfo.numBytes = value.getint();
118 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
119 else if (key == "collectionmeta") {
120 // genuine collmeta get added as collectionmeta and collection_macros
121 // .collmeta just get added as collection_macros
122 text_t params;
123 if (cfgline.size() == 3) {
124 // get the params for later
125 text_t::const_iterator first=cfgline[1].begin()+1;
126 text_t::const_iterator last=cfgline[1].end()-1;
127 params=substr(first, last);
128 }
129
130 text_t meta_name = cfgline[0];
131 if (*(meta_name.begin())=='.') {
132 // a .xxx collectionmeta. strip off the . and
133 // look it up in the indexmap to get the actual value
134
135 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
136 text_t new_name;
137 if (indexmap.from2to(name, new_name)) {
138 meta_name = new_name;
139 }
140 } else {
141 // add them to collectionmeta
142 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
143 if (cfgline.size() == 2) {
144 lang_map[g_EmptyText] = cfgline[1];
145 } else if (cfgline.size() == 3 ) {
146 // get the lang out of params
147 paramhashtype params_hash;
148 splitparams(params, params_hash);
149
150 text_t lang = params_hash["l"];
151 lang_map[lang] = cfgline[2];
152 if (lang_map[g_EmptyText].empty()) {
153 // want the first one as the default if no default specified
154 lang_map[g_EmptyText] = cfgline[2];
155 }
156 }
157 collectinfo.collectionmeta[cfgline[0]] = lang_map;
158
159 }
160
161 // add all collectionmeta to macro list
162 text_tmap params_map = collectinfo.collection_macros[meta_name];
163
164 if (cfgline.size() == 2) {// no params for this macrohttp://uk.sports.yahoo.com/13032007/13/news-brief.html
165 params_map[g_EmptyText] = cfgline[1];
166 }
167 else if (cfgline.size() == 3) {// has params
168 params_map[params] = cfgline[2];
169 if (params_map[g_EmptyText].empty()) {
170 params_map[g_EmptyText] = cfgline[2];
171 }
172 }
173 collectinfo.collection_macros[meta_name] = params_map;
174 }
175 else if (key == "collectionmacro") {
176 text_t nobrackets;
177 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
178 // add all to macro list
179 if (cfgline.size() == 2) { // no params for this macro
180 params_map[g_EmptyText] = cfgline[1];
181 }
182 else if (cfgline.size() == 3) {// has params
183 // strip [ ] brackets from params
184 text_t::const_iterator first=cfgline[1].begin()+1;
185 text_t::const_iterator last=cfgline[1].end()-1;
186 nobrackets=substr(first, last);
187 params_map[nobrackets] = cfgline[2];
188 }
189 collectinfo.collection_macros[cfgline[0]] = params_map;
190
191 } else if (key == "format" && cfgline.size() == 2)
192 collectinfo.format[cfgline[0]] = cfgline[1];
193 else if (key == "building" && cfgline.size() == 2)
194 collectinfo.building[cfgline[0]] = cfgline[1];
195 else if (key == "httpdomain") collectinfo.httpdomain = value;
196 else if (key == "httpprefix") collectinfo.httpprefix = value;
197 else if (key == "receptionist") collectinfo.receptionist = value;
198 else if (key == "buildtype") collectinfo.buildType = value;
199 // backwards compatibility - searchytpes is now a format statement
200 else if (key == "searchtype") { // means buildtype is mgpp
201 if (collectinfo.buildType.empty()) {
202 collectinfo.buildType = "mgpp";
203 }
204 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
205 //collectinfo.searchTypes = cfgline;
206 }
207 else if (key == "separate_cjk") {
208 if (value == "true") collectinfo.isSegmented = true;
209 else collectinfo.isSegmented = false;
210 }
211 // What have we set in our collect.cfg file : document or collection ?
212 else if (key == "authenticate") collectinfo.authenticate = value;
213
214 // What have we set for our group list
215 else if (key == "auth_group") joinchar(cfgline,',',collectinfo.auth_group);
216
217 // store all the mappings for use when collection meta is read later
218 // (build.cfg read before collect.cfg)
219 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
220 indexmap.importmap (cfgline, true);
221
222 }
223 // In the map the key-value pair contain the same
224 // data i.e key == data, if key is 2 then data is 2
225
226 // What have we set for our public_documents ACL
227 else if (key == "public_documents")
228 {
229 text_tarray::const_iterator begin = cfgline.begin();
230 text_tarray::const_iterator end = cfgline.end();
231 while(begin != end)
232 {
233 // key = data i.e if key is 2 then data is 2
234 // collectinfo.public_documents[*begin] is the key
235 // *begin is the data value
236
237 collectinfo.public_documents[*begin] = *begin;
238 ++begin;
239 }
240 }
241
242 // What have we set for our private_documents ACL
243 else if (key == "private_documents")
244 {
245 text_tarray::const_iterator begin = cfgline.begin();
246 text_tarray::const_iterator end = cfgline.end();
247 while(begin != end)
248 {
249 // key = data i.e if key is 2 then data is 2
250 // collectinfo.public_documents[*begin] is the key
251 // *begin is the data value
252
253 collectinfo.private_documents[*begin] = *begin;
254 ++begin;
255 }
256 }
257 }
258
259 // configure the filters
260 filtermapclass::iterator filter_here = filters.begin();
261 filtermapclass::iterator filter_end = filters.end();
262 while (filter_here != filter_end) {
263 assert ((*filter_here).second.f != NULL);
264 if ((*filter_here).second.f != NULL)
265 (*filter_here).second.f->configure(key, cfgline);
266
267 ++filter_here;
268 }
269
270 // configure the sources
271 sourcelistclass::iterator source_here = sources.begin();
272 sourcelistclass::iterator source_end = sources.end();
273 while (source_here != source_end) {
274 assert ((*source_here).s != NULL);
275 if ((*source_here).s != NULL)
276 (*source_here).s->configure(key, cfgline);
277
278 ++source_here;
279 }
280}
281
282
283void collectserver::configure (const text_t &key, const text_t &value) {
284 text_tarray cfgline;
285 cfgline.push_back (value);
286 configure(key, cfgline);
287}
288
289void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
290 // if we've not been properly configured, then it is a foregone
291 // conclusion that we cannot be active
292 if (this->configinfo.collection == "null")
293 {
294 wasSuccess = false;
295 }
296 // if no build date exists, then the collection was probably not built;
297 // ditto if the number of documents is zero, then something is pretty
298 // wrong
299 else if (this->collectinfo.buildDate == 0 ||
300 this->collectinfo.numDocs == 0)
301 {
302 wasSuccess = false;
303 }
304 // it is probably okay
305 else
306 wasSuccess = true;
307}
308
309
310bool collectserver::init (ostream &logout) {
311 // delete the indexmap
312 indexmap.clear();
313
314 // init the filters
315 filtermapclass::iterator filter_here = filters.begin();
316 filtermapclass::iterator filter_end = filters.end();
317 while (filter_here != filter_end) {
318 assert ((*filter_here).second.f != NULL);
319 if (((*filter_here).second.f != NULL) &&
320 !(*filter_here).second.f->init(logout)) return false;
321
322 ++filter_here;
323 }
324
325 // init the sources
326 sourcelistclass::iterator source_here = sources.begin();
327 sourcelistclass::iterator source_end = sources.end();
328 while (source_here != source_end) {
329 assert ((*source_here).s != NULL);
330 if (((*source_here).s != NULL) &&
331 !(*source_here).s->init(logout)) return false;
332
333 ++source_here;
334 }
335
336 return true;
337}
338
339
340void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
341 comerror_t &err, ostream &/*logout*/) {
342 reponse = collectinfo;
343 err = noError;
344}
345
346void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
347 comerror_t &err, ostream &/*logout*/) {
348 response.clear ();
349
350 // get a list of filter names
351 filtermapclass::iterator filter_here = filters.begin();
352 filtermapclass::iterator filter_end = filters.end();
353 while (filter_here != filter_end) {
354 response.filterNames.insert ((*filter_here).first);
355 ++filter_here;
356 }
357
358 err = noError;
359}
360
361void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
362 InfoFilterOptionsResponse_t &response,
363 comerror_t &err, ostream &logout) {
364 outconvertclass text_t2ascii;
365
366 filterclass *thisfilter = filters.getfilter(request.filterName);
367 if (thisfilter != NULL) {
368 thisfilter->get_filteroptions (response, err, logout);
369 } else {
370 response.clear ();
371 err = protocolError;
372 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
373 << "filter \"" << request.filterName << "\".\n\n";
374 }
375}
376
377void collectserver::filter (FilterRequest_t &request,
378 FilterResponse_t &response,
379 comerror_t &err, ostream &logout) {
380 outconvertclass text_t2ascii;
381
382 // translate any ".fc", ".pr" etc. stuff in the docSet
383 text_t translatedOID;
384 text_tarray translatedOIDs;
385 text_tarray::iterator doc_here = request.docSet.begin();
386 text_tarray::iterator doc_end = request.docSet.end();
387 while (doc_here != doc_end) {
388 if (needs_translating (*doc_here)) {
389 sourcelistclass::iterator source_here = sources.begin();
390 sourcelistclass::iterator source_end = sources.end();
391 while (source_here != source_end) {
392 assert ((*source_here).s != NULL);
393 if (((*source_here).s != NULL) &&
394 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
395 if (err != noError) return;
396 break;
397 }
398 ++source_here;
399 }
400 translatedOIDs.push_back (translatedOID);
401 } else {
402 translatedOIDs.push_back (*doc_here);
403 }
404 ++doc_here;
405 }
406 request.docSet = translatedOIDs;
407
408 response.clear();
409
410 filterclass *thisfilter = filters.getfilter(request.filterName);
411 if (thisfilter != NULL) {
412 // filter the data
413 thisfilter->filter (request, response, err, logout);
414 if (err != noError) return;
415 // fill in the metadata for each of the OIDs (if it is requested)
416 if (request.filterResultOptions & FRmetadata) {
417 bool processed = false;
418 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
419 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
420 while (resultdoc_here != resultdoc_end) {
421 // try each of the sources in turn
422 sourcelistclass::iterator source_here = sources.begin();
423 sourcelistclass::iterator source_end = sources.end();
424 while (source_here != source_end) {
425 assert ((*source_here).s != NULL);
426 if (((*source_here).s != NULL) &&
427 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
428 request.getParents, request.fields,
429 (*resultdoc_here).OID, (*resultdoc_here).metadata,
430 err, logout))) {
431 if (err != noError) return;
432 processed = true;
433 break;
434 }
435 ++source_here;
436 }
437 if (!processed) {
438
439 logout << text_t2ascii << "Protocol Error: nothing processed for "
440 << "filter \"" << request.filterName << "\".\n\n";
441
442 err = protocolError;
443 return;
444 }
445 ++resultdoc_here;
446 }
447 }
448
449 } else {
450 response.clear ();
451 err = protocolError;
452 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
453 << "filter \"" << request.filterName << "\".\n\n";
454 }
455
456 err = noError;
457}
458
459void collectserver::get_document (const DocumentRequest_t &request,
460 DocumentResponse_t &response,
461 comerror_t &err, ostream &logout) {
462
463 sourcelistclass::iterator source_here = sources.begin();
464 sourcelistclass::iterator source_end = sources.end();
465 while (source_here != source_end) {
466 assert ((*source_here).s != NULL);
467 if (((*source_here).s != NULL) &&
468 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
469 if (err != noError) return;
470 break;
471 }
472 ++source_here;
473 }
474}
475
476void collectserver::is_searchable (bool &issearchable, comerror_t &err,
477 ostream &logout) {
478
479 sourcelistclass::iterator source_here = sources.begin();
480 sourcelistclass::iterator source_end = sources.end();
481 while (source_here != source_end) {
482 assert ((*source_here).s != NULL);
483 if (((*source_here).s != NULL) &&
484 ((*source_here).s->is_searchable (issearchable, err, logout))) {
485 if (err != noError) return;
486 break;
487 }
488 ++source_here;
489 }
490}
491
492
493bool operator==(const collectserverptr &x, const collectserverptr &y) {
494 return (x.c == y.c);
495}
496
497bool operator<(const collectserverptr &x, const collectserverptr &y) {
498 return (x.c < y.c);
499}
500
501
502// thecollectserver remains the property of the calling code but
503// should not be deleted until it is removed from this list.
504void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
505 // can't add a null collection server
506 assert (thecollectserver != NULL);
507 if (thecollectserver == NULL) return;
508
509 // can't add an collection server with no collection name
510 assert (!(thecollectserver->get_collection_name()).empty());
511 if ((thecollectserver->get_collection_name()).empty()) return;
512
513 collectserverptr cptr;
514 cptr.c = thecollectserver;
515 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
516}
517
518// getcollectserver will return NULL if the collectserver could not be found
519collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
520 // can't find a collection with no name
521 if (collection.empty()) return NULL;
522
523 iterator here = collectserverptrs.find (collection);
524 if (here == collectserverptrs.end()) return NULL;
525
526 return (*here).second.c;
527}
Note: See TracBrowser for help on using the repository browser.