source: gsdl/trunk/src/colservr/collectserver.cpp@ 15002

Last change on this file since 15002 was 15002, checked in by davidb, 16 years ago

Collections can now be put in to "groups" i.e. can be located inside another collection, which is marked as "collectgroup" true. The top-level collection appears on the home page, and when clicked upon display a new "home page" with only the collections appearing within its folder displayed. To implement this, the main change is in collectset.cpp, which now runs "add_all_collection_groups" after it has added all the top-level collections. comtypes.h has been updated to include the new field, "isCollectGroup". Finally, pageaction.cpp has been updated so these changes are reflected in the home page generated.

  • Property svn:keywords set to Author Date Id Revision
File size: 17.4 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "infodbclass.h"
29#include "OIDtools.h"
30#include <assert.h>
31#include "display.h"
32
33
34collectserver::collectserver () {
35 configinfo.collection = "null";
36}
37
38collectserver::~collectserver () {
39
40 // clean up the sources
41 sourcelistclass::iterator source_here = sources.begin();
42 sourcelistclass::iterator source_end = sources.end();
43 while (source_here != source_end) {
44 if ((*source_here).s != NULL)
45 delete (*source_here).s;
46 ++source_here;
47 }
48 sources.clear();
49
50 // clean up the filters
51 filtermapclass::iterator filter_here = filters.begin();
52 filtermapclass::iterator filter_end = filters.end();
53 while (filter_here != filter_end) {
54 if ((*filter_here).second.f != NULL)
55 delete (*filter_here).second.f;
56 ++filter_here;
57 }
58 filters.clear();
59}
60
61// configure should be called for each line in the
62// configuration files to configure the collection server and everything
63// it contains. The configuration should take place just before initialisationhttps://www.google.com/accounts/ServiceLogin?service=mail&passive=true&rm=false&continue=http%3A%2F%2Fmail.google.com%2Fmail%3Fui%3Dhtml%26zy%3Dl&ltmpl=ca_tlsosm_video&ltmplcache=2&hl=en.
64void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
65 if (cfgline.size() >= 1) {
66 const text_t &value = cfgline[0];
67 if (key == "plugin")
68 {
69 //get the plugin name
70 const text_t &name = cfgline[0];
71
72 if (name == "HTMLPlug")
73 {
74 for (int hI = 1; hI < cfgline.size(); hI++)
75 {
76 const text_t &plugOption = cfgline[hI];
77
78 if (plugOption == "-tidy_html")
79 {
80 collectinfo.useBook = true;
81 break;
82 }
83 }
84 }
85 }
86 else if (key == "gsdlhome") configinfo.gsdlhome = value;
87 else if (key == "gdbmhome") configinfo.gdbmhome = value;
88 else if (key == "collection") {
89 configinfo.collection = value;
90 collectinfo.shortInfo.name = value;
91 } else if (key == "collectdir") configinfo.collectdir = value;
92 else if (key == "host") collectinfo.shortInfo.host = value;
93 else if (key == "port") collectinfo.shortInfo.port = value.getint();
94 else if (key == "public") {
95 if (value == "true") collectinfo.isPublic = true;
96 else collectinfo.isPublic = false;
97 } else if (key == "beta") {
98 if (value == "true") collectinfo.isBeta = true;
99 else collectinfo.isBeta = false;
100 } else if (key == "collectgroup") {
101 if (value == "true") collectinfo.isCollectGroup = true;
102 else collectinfo.isCollectGroup = false;
103 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
104 else if (key == "supercollectionoptions") {
105 text_tarray::const_iterator begin = cfgline.begin();
106 text_tarray::const_iterator end = cfgline.end();
107 while(begin != end) {
108
109 if (*begin == "uniform_search_results_formatting") {
110 collectinfo.ccsOptions |= CCSUniformSearchResultsFormatting;
111 }
112 begin++;
113 }
114 }
115 else if (key == "builddate") collectinfo.buildDate = value.getint();
116 else if (key == "languages") collectinfo.languages = cfgline;
117 else if (key == "numdocs") collectinfo.numDocs = value.getint();
118 else if (key == "numsections") collectinfo.numSections = value.getint();
119 else if (key == "numwords") collectinfo.numWords = value.getint();
120 else if (key == "numbytes") collectinfo.numBytes = value.getint();
121 else if (key == "stemindexes") collectinfo.stemIndexes = value.getint();
122 else if (key == "collectionmeta") {
123 // genuine collmeta get added as collectionmeta and collection_macros
124 // .collmeta just get added as collection_macros
125 text_t params;
126 if (cfgline.size() == 3) {
127 // get the params for later
128 text_t::const_iterator first=cfgline[1].begin()+1;
129 text_t::const_iterator last=cfgline[1].end()-1;
130 params=substr(first, last);
131 }
132
133 text_t meta_name = cfgline[0];
134 if (*(meta_name.begin())=='.') {
135 // a .xxx collectionmeta. strip off the . and
136 // look it up in the indexmap to get the actual value
137
138 text_t name = substr(cfgline[0].begin()+1,cfgline[0].end());
139 text_t new_name;
140 if (indexmap.from2to(name, new_name)) {
141 meta_name = new_name;
142 }
143 } else {
144 // add them to collectionmeta
145 text_tmap lang_map = collectinfo.collectionmeta[cfgline[0]];
146 if (cfgline.size() == 2) {
147 lang_map[g_EmptyText] = cfgline[1];
148 } else if (cfgline.size() == 3 ) {
149 // get the lang out of params
150 paramhashtype params_hash;
151 splitparams(params, params_hash);
152
153 text_t lang = params_hash["l"];
154 lang_map[lang] = cfgline[2];
155 if (lang_map[g_EmptyText].empty()) {
156 // want the first one as the default if no default specified
157 lang_map[g_EmptyText] = cfgline[2];
158 }
159 }
160 collectinfo.collectionmeta[cfgline[0]] = lang_map;
161
162 }
163
164 // add all collectionmeta to macro list
165 text_tmap params_map = collectinfo.collection_macros[meta_name];
166
167 if (cfgline.size() == 2) {// no params for this macrohttp://uk.sports.yahoo.com/13032007/13/news-brief.html
168 params_map[g_EmptyText] = cfgline[1];
169 }
170 else if (cfgline.size() == 3) {// has params
171 params_map[params] = cfgline[2];
172 if (params_map[g_EmptyText].empty()) {
173 params_map[g_EmptyText] = cfgline[2];
174 }
175 }
176 collectinfo.collection_macros[meta_name] = params_map;
177 }
178 else if (key == "collectionmacro") {
179 text_t nobrackets;
180 text_tmap params_map = collectinfo.collection_macros[cfgline[0]];
181 // add all to macro list
182 if (cfgline.size() == 2) { // no params for this macro
183 params_map[g_EmptyText] = cfgline[1];
184 }
185 else if (cfgline.size() == 3) {// has params
186 // strip [ ] brackets from params
187 text_t::const_iterator first=cfgline[1].begin()+1;
188 text_t::const_iterator last=cfgline[1].end()-1;
189 nobrackets=substr(first, last);
190 params_map[nobrackets] = cfgline[2];
191 }
192 collectinfo.collection_macros[cfgline[0]] = params_map;
193
194 } else if (key == "format" && cfgline.size() == 2)
195 collectinfo.format[cfgline[0]] = cfgline[1];
196 else if (key == "building" && cfgline.size() == 2)
197 collectinfo.building[cfgline[0]] = cfgline[1];
198 else if (key == "httpdomain") collectinfo.httpdomain = value;
199 else if (key == "httpprefix") collectinfo.httpprefix = value;
200 else if (key == "receptionist") collectinfo.receptionist = value;
201 else if (key == "buildtype") collectinfo.buildType = value;
202 // backwards compatibility - searchytpes is now a format statement
203 else if (key == "searchtype") { // means buildtype is mgpp
204 if (collectinfo.buildType.empty()) {
205 collectinfo.buildType = "mgpp";
206 }
207 joinchar(cfgline, ',', collectinfo.format["SearchTypes"]);
208 //collectinfo.searchTypes = cfgline;
209 }
210 else if (key == "separate_cjk") {
211 if (value == "true") collectinfo.isSegmented = true;
212 else collectinfo.isSegmented = false;
213 }
214 // What have we set in our collect.cfg file : document or collection ?
215 else if (key == "authenticate") collectinfo.authenticate = value;
216
217 // What have we set for our group list
218 else if (key == "auth_group") joinchar(cfgline,',',collectinfo.auth_group);
219
220 // store all the mappings for use when collection meta is read later
221 // (build.cfg read before collect.cfg)
222 else if (key == "indexmap" || key == "indexfieldmap" || key == "subcollectionmap" || key == "languagemap" || key == "levelmap") {
223 indexmap.importmap (cfgline, true);
224
225 }
226 // In the map the key-value pair contain the same
227 // data i.e key == data, if key is 2 then data is 2
228
229 // What have we set for our public_documents ACL
230 else if (key == "public_documents")
231 {
232 text_tarray::const_iterator begin = cfgline.begin();
233 text_tarray::const_iterator end = cfgline.end();
234 while(begin != end)
235 {
236 // key = data i.e if key is 2 then data is 2
237 // collectinfo.public_documents[*begin] is the key
238 // *begin is the data value
239
240 collectinfo.public_documents[*begin] = *begin;
241 ++begin;
242 }
243 }
244
245 // What have we set for our private_documents ACL
246 else if (key == "private_documents")
247 {
248 text_tarray::const_iterator begin = cfgline.begin();
249 text_tarray::const_iterator end = cfgline.end();
250 while(begin != end)
251 {
252 // key = data i.e if key is 2 then data is 2
253 // collectinfo.public_documents[*begin] is the key
254 // *begin is the data value
255
256 collectinfo.private_documents[*begin] = *begin;
257 ++begin;
258 }
259 }
260 }
261
262 // configure the filters
263 filtermapclass::iterator filter_here = filters.begin();
264 filtermapclass::iterator filter_end = filters.end();
265 while (filter_here != filter_end) {
266 assert ((*filter_here).second.f != NULL);
267 if ((*filter_here).second.f != NULL)
268 (*filter_here).second.f->configure(key, cfgline);
269
270 ++filter_here;
271 }
272
273 // configure the sources
274 sourcelistclass::iterator source_here = sources.begin();
275 sourcelistclass::iterator source_end = sources.end();
276 while (source_here != source_end) {
277 assert ((*source_here).s != NULL);
278 if ((*source_here).s != NULL)
279 (*source_here).s->configure(key, cfgline);
280
281 ++source_here;
282 }
283}
284
285
286void collectserver::configure (const text_t &key, const text_t &value) {
287 text_tarray cfgline;
288 cfgline.push_back (value);
289 configure(key, cfgline);
290}
291
292void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
293 // if we've not been properly configured, then it is a foregone
294 // conclusion that we cannot be active
295 if (this->configinfo.collection == "null")
296 {
297 wasSuccess = false;
298 }
299 // if no build date exists, then the collection was probably not built;
300 // ditto if the number of documents is zero, then something is pretty
301 // wrong
302 else if (this->collectinfo.buildDate == 0 ||
303 this->collectinfo.numDocs == 0)
304 {
305 wasSuccess = false;
306 }
307 // it is probably okay
308 else
309 wasSuccess = true;
310}
311
312
313bool collectserver::init (ostream &logout) {
314 // delete the indexmap
315 indexmap.clear();
316
317 // init the filters
318 filtermapclass::iterator filter_here = filters.begin();
319 filtermapclass::iterator filter_end = filters.end();
320 while (filter_here != filter_end) {
321 assert ((*filter_here).second.f != NULL);
322 if (((*filter_here).second.f != NULL) &&
323 !(*filter_here).second.f->init(logout)) return false;
324
325 ++filter_here;
326 }
327
328 // init the sources
329 sourcelistclass::iterator source_here = sources.begin();
330 sourcelistclass::iterator source_end = sources.end();
331 while (source_here != source_end) {
332 assert ((*source_here).s != NULL);
333 if (((*source_here).s != NULL) &&
334 !(*source_here).s->init(logout)) return false;
335
336 ++source_here;
337 }
338
339 return true;
340}
341
342
343void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
344 comerror_t &err, ostream &/*logout*/) {
345 reponse = collectinfo;
346 err = noError;
347}
348
349void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
350 comerror_t &err, ostream &/*logout*/) {
351 response.clear ();
352
353 // get a list of filter names
354 filtermapclass::iterator filter_here = filters.begin();
355 filtermapclass::iterator filter_end = filters.end();
356 while (filter_here != filter_end) {
357 response.filterNames.insert ((*filter_here).first);
358 ++filter_here;
359 }
360
361 err = noError;
362}
363
364void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
365 InfoFilterOptionsResponse_t &response,
366 comerror_t &err, ostream &logout) {
367 outconvertclass text_t2ascii;
368
369 filterclass *thisfilter = filters.getfilter(request.filterName);
370 if (thisfilter != NULL) {
371 thisfilter->get_filteroptions (response, err, logout);
372 } else {
373 response.clear ();
374 err = protocolError;
375 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
376 << "filter \"" << request.filterName << "\".\n\n";
377 }
378}
379
380void collectserver::filter (FilterRequest_t &request,
381 FilterResponse_t &response,
382 comerror_t &err, ostream &logout) {
383 outconvertclass text_t2ascii;
384
385 // translate any ".fc", ".pr" etc. stuff in the docSet
386 text_t translatedOID;
387 text_tarray translatedOIDs;
388 text_tarray::iterator doc_here = request.docSet.begin();
389 text_tarray::iterator doc_end = request.docSet.end();
390 while (doc_here != doc_end) {
391 if (needs_translating (*doc_here)) {
392 sourcelistclass::iterator source_here = sources.begin();
393 sourcelistclass::iterator source_end = sources.end();
394 while (source_here != source_end) {
395 assert ((*source_here).s != NULL);
396 if (((*source_here).s != NULL) &&
397 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
398 if (err != noError) return;
399 break;
400 }
401 ++source_here;
402 }
403 translatedOIDs.push_back (translatedOID);
404 } else {
405 translatedOIDs.push_back (*doc_here);
406 }
407 ++doc_here;
408 }
409 request.docSet = translatedOIDs;
410
411 response.clear();
412
413 filterclass *thisfilter = filters.getfilter(request.filterName);
414 if (thisfilter != NULL) {
415 // filter the data
416 thisfilter->filter (request, response, err, logout);
417 if (err != noError) return;
418 // fill in the metadata for each of the OIDs (if it is requested)
419 if (request.filterResultOptions & FRmetadata) {
420 bool processed = false;
421 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
422 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
423 while (resultdoc_here != resultdoc_end) {
424 // try each of the sources in turn
425 sourcelistclass::iterator source_here = sources.begin();
426 sourcelistclass::iterator source_end = sources.end();
427 while (source_here != source_end) {
428 assert ((*source_here).s != NULL);
429 if (((*source_here).s != NULL) &&
430 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
431 request.getParents, request.fields,
432 (*resultdoc_here).OID, (*resultdoc_here).metadata,
433 err, logout))) {
434 if (err != noError) return;
435 processed = true;
436 break;
437 }
438 ++source_here;
439 }
440 if (!processed) {
441
442 logout << text_t2ascii << "Protocol Error: nothing processed for "
443 << "filter \"" << request.filterName << "\".\n\n";
444
445 err = protocolError;
446 return;
447 }
448 ++resultdoc_here;
449 }
450 }
451
452 } else {
453 response.clear ();
454 err = protocolError;
455 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
456 << "filter \"" << request.filterName << "\".\n\n";
457 }
458
459 err = noError;
460}
461
462void collectserver::get_document (const DocumentRequest_t &request,
463 DocumentResponse_t &response,
464 comerror_t &err, ostream &logout) {
465
466 sourcelistclass::iterator source_here = sources.begin();
467 sourcelistclass::iterator source_end = sources.end();
468 while (source_here != source_end) {
469 assert ((*source_here).s != NULL);
470 if (((*source_here).s != NULL) &&
471 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
472 if (err != noError) return;
473 break;
474 }
475 ++source_here;
476 }
477}
478
479void collectserver::is_searchable (bool &issearchable, comerror_t &err,
480 ostream &logout) {
481
482 sourcelistclass::iterator source_here = sources.begin();
483 sourcelistclass::iterator source_end = sources.end();
484 while (source_here != source_end) {
485 assert ((*source_here).s != NULL);
486 if (((*source_here).s != NULL) &&
487 ((*source_here).s->is_searchable (issearchable, err, logout))) {
488 if (err != noError) return;
489 break;
490 }
491 ++source_here;
492 }
493}
494
495
496bool operator==(const collectserverptr &x, const collectserverptr &y) {
497 return (x.c == y.c);
498}
499
500bool operator<(const collectserverptr &x, const collectserverptr &y) {
501 return (x.c < y.c);
502}
503
504
505// thecollectserver remains the property of the calling code but
506// should not be deleted until it is removed from this list.
507void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
508 // can't add a null collection server
509 assert (thecollectserver != NULL);
510 if (thecollectserver == NULL) return;
511
512 // can't add an collection server with no collection name
513 assert (!(thecollectserver->get_collection_name()).empty());
514 if ((thecollectserver->get_collection_name()).empty()) return;
515
516 collectserverptr cptr;
517 cptr.c = thecollectserver;
518 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
519}
520
521// getcollectserver will return NULL if the collectserver could not be found
522collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
523 // can't find a collection with no name
524 if (collection.empty()) return NULL;
525
526 iterator here = collectserverptrs.find (collection);
527 if (here == collectserverptrs.end()) return NULL;
528
529 return (*here).second.c;
530}
Note: See TracBrowser for help on using the repository browser.