source: trunk/gsdl/src/colservr/collectserver.cpp@ 8186

Last change on this file since 8186 was 6584, checked in by kjdon, 20 years ago

Fiddled around with segmenting for chinese text. Haven't changed how the
segmentation is done, or what character ranges are used.
But when its done is now controlled by the collect.cfg. There is a new
option, separate_cjk, values true or false, default false. Segmentation
is only done if this is set to true. This is passed as a global option to
all plugins by the import.pl script, so the user just needs to add it
once to the config file, not as an option to all plugins.
The queryaction uses this option too to determine whether or not to segment
the query.

  • Property svn:keywords set to Author Date Id Revision
File size: 13.9 KB
Line 
1
2/**********************************************************************
3 *
4 * collectserver.cpp --
5 * Copyright (C) 1999 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include "collectserver.h"
28#include "infodbclass.h"
29#include "OIDtools.h"
30#include <assert.h>
31
32
33collectserver::collectserver () {
34 configinfo.collection = "null";
35}
36
37collectserver::~collectserver () {
38
39 // clean up the sources
40 sourcelistclass::iterator source_here = sources.begin();
41 sourcelistclass::iterator source_end = sources.end();
42 while (source_here != source_end) {
43 if ((*source_here).s != NULL)
44 delete (*source_here).s;
45 source_here++;
46 }
47 sources.clear();
48
49 // clean up the filters
50 filtermapclass::iterator filter_here = filters.begin();
51 filtermapclass::iterator filter_end = filters.end();
52 while (filter_here != filter_end) {
53 if ((*filter_here).second.f != NULL)
54 delete (*filter_here).second.f;
55 filter_here++;
56 }
57 filters.clear();
58}
59
60// configure should be called for each line in the
61// configuration files to configure the collection server and everything
62// it contains. The configuration should take place just before initialisation.
63void collectserver::configure (const text_t &key, const text_tarray &cfgline) {
64 if (cfgline.size() >= 1) {
65 const text_t &value = cfgline[0];
66 if (key == "gsdlhome") configinfo.gsdlhome = value;
67 else if (key == "gdbmhome") configinfo.gdbmhome = value;
68 else if (key == "collection") {
69 configinfo.collection = value;
70 collectinfo.shortInfo.name = value;
71 } else if (key == "collectdir") configinfo.collectdir = value;
72 else if (key == "host") collectinfo.shortInfo.host = value;
73 else if (key == "port") collectinfo.shortInfo.port = value.getint();
74 else if (key == "public") {
75 if (value == "true") collectinfo.isPublic = true;
76 else collectinfo.isPublic = false;
77 } else if (key == "beta") {
78 if (value == "true") collectinfo.isBeta = true;
79 else collectinfo.isBeta = false;
80 } else if ((key == "ccscols") || (key == "supercollection")) collectinfo.ccsCols = cfgline;
81 else if (key == "builddate") collectinfo.buildDate = value.getint();
82 else if (key == "languages") collectinfo.languages = cfgline;
83 else if (key == "numdocs") collectinfo.numDocs = value.getint();
84 else if (key == "numsections") collectinfo.numSections = value.getint();
85 else if (key == "numwords") collectinfo.numWords = value.getint();
86 else if (key == "numbytes") collectinfo.numBytes = value.getint();
87 else if (key == "collectionmeta" && cfgline.size() == 2)
88 collectinfo.collectionmeta[cfgline[0]] = cfgline[1];
89 else if (key == "collectionmeta" && cfgline.size() == 3 && collectinfo.collectionmeta[cfgline[0]].empty() )
90 collectinfo.collectionmeta[cfgline[0]] = cfgline[2];
91 else if (key == "format" && cfgline.size() == 2)
92 collectinfo.format[cfgline[0]] = cfgline[1];
93 else if (key == "building" && cfgline.size() == 2)
94 collectinfo.building[cfgline[0]] = cfgline[1];
95 else if (key == "httpdomain") collectinfo.httpdomain = value;
96 else if (key == "httpprefix") collectinfo.httpprefix = value;
97 else if (key == "receptionist") collectinfo.receptionist = value;
98 else if (key == "buildtype") collectinfo.buildType = value;
99 else if (key == "searchtype") { // means buildtype is mgpp
100 collectinfo.buildType = "mgpp";
101 collectinfo.searchTypes = cfgline;
102 }
103 else if (key == "separate_cjk") {
104 if (value == "true") collectinfo.isSegmented = true;
105 else collectinfo.isSegmented = false;
106 }
107 // What have we set in our collect.cfg file : document or collection ?
108 else if (key == "authenticate") collectinfo.authenticate = value;
109
110 // What have we set for our group list
111 else if (key == "auth_group")
112 {
113 // use the joinchar helper function from
114 // text_t.h, it takes in the whole cfgline
115 // array and a separator aka a comma in our
116 // case and returns a sting separated by a
117 // comma like this:
118 //
119 // Rene,Kolla,Crystal,Stefan,Aly,Ian
120
121 joinchar(cfgline,',',collectinfo.auth_group);
122
123 //outconvertclass t;
124 //cerr << t << collectinfo.auth_group << "\n";
125 }
126
127 // In the map the key-value pair contain the same
128 // data i.e key == data, if key is 2 then data is 2
129
130 // What have we set for our public_documents ACL
131 else if (key == "public_documents")
132 {
133 text_tarray::const_iterator begin = cfgline.begin();
134 text_tarray::const_iterator end = cfgline.end();
135 while(begin != end)
136 {
137 // key = data i.e if key is 2 then data is 2
138 // collectinfo.public_documents[*begin] is the key
139 // *begin is the data value
140
141 collectinfo.public_documents[*begin] = *begin;
142 begin++;
143 }
144 }
145
146 // What have we set for our private_documents ACL
147 else if (key == "private_documents")
148 {
149 text_tarray::const_iterator begin = cfgline.begin();
150 text_tarray::const_iterator end = cfgline.end();
151 while(begin != end)
152 {
153 // key = data i.e if key is 2 then data is 2
154 // collectinfo.public_documents[*begin] is the key
155 // *begin is the data value
156
157 collectinfo.private_documents[*begin] = *begin;
158 begin++;
159 }
160 }
161 }
162
163 // configure the filters
164 filtermapclass::iterator filter_here = filters.begin();
165 filtermapclass::iterator filter_end = filters.end();
166 while (filter_here != filter_end) {
167 assert ((*filter_here).second.f != NULL);
168 if ((*filter_here).second.f != NULL)
169 (*filter_here).second.f->configure(key, cfgline);
170
171 filter_here++;
172 }
173
174 // configure the sources
175 sourcelistclass::iterator source_here = sources.begin();
176 sourcelistclass::iterator source_end = sources.end();
177 while (source_here != source_end) {
178 assert ((*source_here).s != NULL);
179 if ((*source_here).s != NULL)
180 (*source_here).s->configure(key, cfgline);
181
182 source_here++;
183 }
184}
185
186void collectserver::configure (const text_t &key, const text_t &value) {
187 text_tarray cfgline;
188 cfgline.push_back (value);
189 configure(key, cfgline);
190}
191
192void collectserver::ping (bool &wasSuccess, comerror_t &error, ostream &logout) {
193 // if we've not been properly configured, then it is a foregone
194 // conclusion that we cannot be active
195 if (this->configinfo.collection == "null")
196 {
197 wasSuccess = false;
198 }
199 // if no build date exists, then the collection was probably not built;
200 // ditto if the number of documents is zero, then something is pretty
201 // wrong
202 else if (this->collectinfo.buildDate == 0 ||
203 this->collectinfo.numDocs == 0)
204 {
205 wasSuccess = false;
206 }
207 // it is probably okay
208 else
209 wasSuccess = true;
210}
211
212
213bool collectserver::init (ostream &logout) {
214 // init the filters
215 filtermapclass::iterator filter_here = filters.begin();
216 filtermapclass::iterator filter_end = filters.end();
217 while (filter_here != filter_end) {
218 assert ((*filter_here).second.f != NULL);
219 if (((*filter_here).second.f != NULL) &&
220 !(*filter_here).second.f->init(logout)) return false;
221
222 filter_here++;
223 }
224
225 // init the sources
226 sourcelistclass::iterator source_here = sources.begin();
227 sourcelistclass::iterator source_end = sources.end();
228 while (source_here != source_end) {
229 assert ((*source_here).s != NULL);
230 if (((*source_here).s != NULL) &&
231 !(*source_here).s->init(logout)) return false;
232
233 source_here++;
234 }
235
236 return true;
237}
238
239
240void collectserver::get_collectinfo (ColInfoResponse_t &reponse,
241 comerror_t &err, ostream &/*logout*/) {
242 reponse = collectinfo;
243 err = noError;
244}
245
246void collectserver::get_filterinfo (InfoFiltersResponse_t &response,
247 comerror_t &err, ostream &/*logout*/) {
248 response.clear ();
249
250 // get a list of filter names
251 filtermapclass::iterator filter_here = filters.begin();
252 filtermapclass::iterator filter_end = filters.end();
253 while (filter_here != filter_end) {
254 response.filterNames.insert ((*filter_here).first);
255 filter_here++;
256 }
257
258 err = noError;
259}
260
261void collectserver::get_filteroptions (const InfoFilterOptionsRequest_t &request,
262 InfoFilterOptionsResponse_t &response,
263 comerror_t &err, ostream &logout) {
264 outconvertclass text_t2ascii;
265
266 filterclass *thisfilter = filters.getfilter(request.filterName);
267 if (thisfilter != NULL) {
268 thisfilter->get_filteroptions (response, err, logout);
269 } else {
270 response.clear ();
271 err = protocolError;
272 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
273 << "filter \"" << request.filterName << "\".\n\n";
274 }
275}
276
277void collectserver::filter (FilterRequest_t &request,
278 FilterResponse_t &response,
279 comerror_t &err, ostream &logout) {
280 outconvertclass text_t2ascii;
281
282 // translate any ".fc", ".pr" etc. stuff in the docSet
283 text_t translatedOID;
284 text_tarray translatedOIDs;
285 text_tarray::iterator doc_here = request.docSet.begin();
286 text_tarray::iterator doc_end = request.docSet.end();
287 while (doc_here != doc_end) {
288 if (needs_translating (*doc_here)) {
289 sourcelistclass::iterator source_here = sources.begin();
290 sourcelistclass::iterator source_end = sources.end();
291 while (source_here != source_end) {
292 assert ((*source_here).s != NULL);
293 if (((*source_here).s != NULL) &&
294 ((*source_here).s->translate_OID (*doc_here, translatedOID, err, logout))) {
295 if (err != noError) return;
296 break;
297 }
298 source_here++;
299 }
300 translatedOIDs.push_back (translatedOID);
301 } else {
302 translatedOIDs.push_back (*doc_here);
303 }
304 doc_here ++;
305 }
306 request.docSet = translatedOIDs;
307
308 response.clear();
309
310 filterclass *thisfilter = filters.getfilter(request.filterName);
311 if (thisfilter != NULL) {
312 // filter the data
313 thisfilter->filter (request, response, err, logout);
314 if (err != noError) return;
315 // fill in the metadata for each of the OIDs (if it is requested)
316 if (request.filterResultOptions & FRmetadata) {
317 bool processed = false;
318 ResultDocInfo_tarray::iterator resultdoc_here = response.docInfo.begin();
319 ResultDocInfo_tarray::iterator resultdoc_end = response.docInfo.end();
320 while (resultdoc_here != resultdoc_end) {
321 // try each of the sources in turn
322 sourcelistclass::iterator source_here = sources.begin();
323 sourcelistclass::iterator source_end = sources.end();
324 while (source_here != source_end) {
325 assert ((*source_here).s != NULL);
326 if (((*source_here).s != NULL) &&
327 ((*source_here).s->get_metadata(request.requestParams, request.refParams,
328 request.getParents, request.fields,
329 (*resultdoc_here).OID, (*resultdoc_here).metadata,
330 err, logout))) {
331 if (err != noError) return;
332 processed = true;
333 break;
334 }
335 source_here++;
336 }
337 if (!processed) {
338 err = protocolError;
339 return;
340 }
341 resultdoc_here++;
342 }
343 }
344
345 } else {
346 response.clear ();
347 err = protocolError;
348 logout << text_t2ascii << "Protocol Error: filter options requested for non-existent\n"
349 << "filter \"" << request.filterName << "\".\n\n";
350 }
351
352 err = noError;
353}
354
355void collectserver::get_document (const DocumentRequest_t &request,
356 DocumentResponse_t &response,
357 comerror_t &err, ostream &logout) {
358
359 sourcelistclass::iterator source_here = sources.begin();
360 sourcelistclass::iterator source_end = sources.end();
361 while (source_here != source_end) {
362 assert ((*source_here).s != NULL);
363 if (((*source_here).s != NULL) &&
364 ((*source_here).s->get_document (request.OID, response.doc, err, logout))) {
365 if (err != noError) return;
366 break;
367 }
368 source_here++;
369 }
370}
371
372void collectserver::is_searchable (bool &issearchable, comerror_t &err,
373 ostream &logout) {
374
375 sourcelistclass::iterator source_here = sources.begin();
376 sourcelistclass::iterator source_end = sources.end();
377 while (source_here != source_end) {
378 assert ((*source_here).s != NULL);
379 if (((*source_here).s != NULL) &&
380 ((*source_here).s->is_searchable (issearchable, err, logout))) {
381 if (err != noError) return;
382 break;
383 }
384 source_here++;
385 }
386}
387
388
389bool operator==(const collectserverptr &x, const collectserverptr &y) {
390 return (x.c == y.c);
391}
392
393bool operator<(const collectserverptr &x, const collectserverptr &y) {
394 return (x.c < y.c);
395}
396
397
398// thecollectserver remains the property of the calling code but
399// should not be deleted until it is removed from this list.
400void collectservermapclass::addcollectserver (collectserver *thecollectserver) {
401 // can't add a null collection server
402 assert (thecollectserver != NULL);
403 if (thecollectserver == NULL) return;
404
405 // can't add an collection server with no collection name
406 assert (!(thecollectserver->get_collection_name()).empty());
407 if ((thecollectserver->get_collection_name()).empty()) return;
408
409 collectserverptr cptr;
410 cptr.c = thecollectserver;
411 collectserverptrs[thecollectserver->get_collection_name()] = cptr;
412}
413
414// getcollectserver will return NULL if the collectserver could not be found
415collectserver *collectservermapclass::getcollectserver (const text_t &collection) {
416 // can't find a collection with no name
417 if (collection.empty()) return NULL;
418
419 iterator here = collectserverptrs.find (collection);
420 if (here == collectserverptrs.end()) return NULL;
421
422 return (*here).second.c;
423}
Note: See TracBrowser for help on using the repository browser.