source: main/trunk/greenstone2/runtime-src/src/recpt/receptionist.cpp@ 30374

Last change on this file since 30374 was 30374, checked in by kjdon, 8 years ago

just testing committing

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 59.4 KB
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT 1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include "securitytools.h"
39#include <assert.h>
40#include <time.h>
41#include <stdio.h> // for open()
42#include <fcntl.h> // for open() flags
43// following 2 are for printing Last-Modified http header.
44#include <sys/stat.h>
45#include <time.h>
46
47#if defined (GSDL_USE_IOS_H)
48#include <fstream.h>
49#else
50#include <fstream>
51#endif
52
53void recptconf::clear () {
54 gsdlhome.clear();
55 collecthome.clear();
56 dbhome.clear();
57 collectinfo.erase(collectinfo.begin(), collectinfo.end());
58 collection.clear();
59 collectdir.clear();
60 httpprefix.clear();
61 httpweb.clear();
62 gwcgi.clear();
63 macrofiles.erase(macrofiles.begin(), macrofiles.end());
64 saveconf.clear();
65 usecookies = false;
66 logcgiargs = false;
67 LogDateFormat = LocalTime;
68
69 maintainer.clear();
70 MailServer.clear();
71 LogEvents = Disabled;
72 EmailEvents = Disabled;
73 EmailUserEvents = false;
74
75 languages.erase(languages.begin(), languages.end());
76 encodings.erase(encodings.begin(), encodings.end());
77
78 site_auth = false;
79 HomePageType = "images";
80 HomePageCols = 3;
81
82 // these default page parameters can always be overriden
83 // in the configuration file
84 pageparams.erase(pageparams.begin(), pageparams.end());
85 pageparams["c"] = "";
86 pageparams["l"] = "en";
87
88#ifdef MACROPRECEDENCE
89 macroprecedence = MACROPRECEDENCE;
90#else
91 macroprecedence.clear();
92#endif
93}
94
95
96void collectioninfo_t::clear () {
97 gsdl_gsdlhome.clear();
98 gsdl_dbhome.clear();
99
100 info_loaded = false;
101 info.clear();
102}
103
104void languageinfo_t::clear () {
105 longname.clear();
106 defaultencoding.clear();
107}
108
109receptionist::receptionist () {
110 // create a list of cgi arguments
111 // this must be done before the configuration
112
113 cgiarginfo ainfo;
114
115 ainfo.shortname = "e";
116 ainfo.longname = "compressed arguments";
117 ainfo.multiplechar = true;
118 ainfo.defaultstatus = cgiarginfo::good;
119 ainfo.argdefault = g_EmptyText;
120 ainfo.savedarginfo = cgiarginfo::mustnot;
121 argsinfo.addarginfo (NULL, ainfo);
122
123 ainfo.shortname = "a";
124 ainfo.longname = "action";
125 ainfo.multiplechar = true;
126 ainfo.defaultstatus = cgiarginfo::none;
127 ainfo.argdefault = g_EmptyText;
128 ainfo.savedarginfo = cgiarginfo::must;
129 argsinfo.addarginfo (NULL, ainfo);
130
131 // w=western
132 ainfo.shortname = "w";
133 ainfo.longname = "encoding";
134 ainfo.multiplechar = true;
135 ainfo.defaultstatus = cgiarginfo::none;
136 ainfo.argdefault = g_EmptyText;
137 ainfo.savedarginfo = cgiarginfo::must;
138 argsinfo.addarginfo (NULL, ainfo);
139
140 ainfo.shortname = "nw";
141 ainfo.longname = "new encoding";
142 ainfo.multiplechar = true;
143 ainfo.defaultstatus = cgiarginfo::none;
144 ainfo.argdefault = g_EmptyText;
145 ainfo.savedarginfo = cgiarginfo::mustnot;
146 argsinfo.addarginfo (NULL, ainfo);
147
148 ainfo.shortname = "c";
149 ainfo.longname = "collection";
150 ainfo.multiplechar = true;
151 ainfo.defaultstatus = cgiarginfo::none;
152 ainfo.argdefault = g_EmptyText;
153 ainfo.savedarginfo = cgiarginfo::must;
154 argsinfo.addarginfo (NULL, ainfo);
155
156 // the interface language name should use the ISO 639
157 // standard
158 ainfo.shortname = "l";
159 ainfo.longname = "interface language";
160 ainfo.multiplechar = true;
161 ainfo.defaultstatus = cgiarginfo::weak;
162 ainfo.argdefault = "en";
163 ainfo.savedarginfo = cgiarginfo::must;
164 argsinfo.addarginfo (NULL, ainfo);
165
166 ainfo.shortname = "nl";
167 ainfo.longname = "new language";
168 ainfo.multiplechar = false;
169 ainfo.defaultstatus = cgiarginfo::none;
170 ainfo.argdefault = "0";
171 ainfo.savedarginfo = cgiarginfo::mustnot;
172 argsinfo.addarginfo (NULL, ainfo);
173
174 // the GSDL_UID (cookie)
175 ainfo.shortname = "z";
176 ainfo.longname = "gsdl uid";
177 ainfo.multiplechar = true;
178 ainfo.defaultstatus = cgiarginfo::none;
179 ainfo.argdefault = g_EmptyText;
180 ainfo.savedarginfo = cgiarginfo::mustnot;
181 argsinfo.addarginfo (NULL, ainfo);
182}
183
184
185void receptionist::add_action (action *theaction) {
186 // make sure we have an action to add
187 if (theaction == NULL) return;
188
189 // add this action to the list of actions
190 actions.addaction(theaction);
191
192 // add the cgi arguments from this action
193 argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
194}
195
196
197void receptionist::add_browser (browserclass *thebrowser) {
198 // make sure we have a browser to add
199 if (thebrowser == NULL) return;
200
201 // add this browser to the list of browsers
202 browsers.addbrowser(thebrowser);
203}
204
205
206void receptionist::setdefaultbrowser (const text_t &browsername) {
207 browsers.setdefaultbrowser (browsername);
208}
209
210
211// configure should be called for each line in the
212// configuration files to configure the receptionist and everything
213// it contains. The configuration should take place after everything
214// has been added but before the initialisation.
215
216void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
217 // configure the receptionist
218
219
220
221 if (cfgline.size() >= 1) {
222 cgiarginfo *info = NULL;
223 if (key == "gsdlhome") {
224 configinfo.gsdlhome = cfgline[0];
225 if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
226 }
227 else if (key == "collecthome") configinfo.collecthome = cfgline[0];
228 else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
229 else if (key == "collection") {
230 configinfo.collection = cfgline[0];
231 // also need to set the default arg to this collection
232 if ((info = argsinfo.getarginfo("c")) != NULL) {
233 info->defaultstatus = cgiarginfo::good;
234 info->argdefault = cfgline[0];
235 }
236
237 }
238 else if (key == "collectdir") configinfo.collectdir = cfgline[0];
239 else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
240 else if (key == "httpweb") configinfo.httpweb = cfgline[0];
241 else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
242 else if (key == "macrofiles") {
243 // want to append to macrofiles (i.e. may be several config files
244 // contributing, maybe from several collections).
245 text_tarray::const_iterator here = cfgline.begin();
246 text_tarray::const_iterator end = cfgline.end();
247 while (here != end) {
248 configinfo.macrofiles.insert (*here);
249 ++here;
250 }
251 }
252 else if (key == "saveconf") configinfo.saveconf = cfgline[0];
253 else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
254 else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
255 else if (key == "maintainer") configinfo.maintainer = cfgline[0];
256 else if (key == "MailServer") configinfo.MailServer = cfgline[0];
257 else if (key == "LogDateFormat") {
258 if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
259 else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
260 }
261 else if (key == "LogEvents") {
262 if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
263 else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
264 }
265 else if (key == "EmailEvents") {
266 if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
267 else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
268 }
269 else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
270 else if (key == "pageparam") {
271 if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
272 else configinfo.pageparams[cfgline[0]] = "";
273 }
274 else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
275 else if (key == "collectinfo") {
276 if (cfgline.size() == 3) {
277 // for backwards compatability with older collections that only use
278 // gsdlhome and dbhome
279 collectioninfo_t cinfo;
280 cinfo.gsdl_gsdlhome = cfgline[1];
281 cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
282 cinfo.gsdl_dbhome = cfgline[2];
283 configinfo.collectinfo[cfgline[0]] = cinfo;
284 }
285 else if (cfgline.size() >= 4) {
286 collectioninfo_t cinfo;
287 cinfo.gsdl_gsdlhome = cfgline[1];
288 cinfo.gsdl_collecthome = cfgline[2];
289 cinfo.gsdl_dbhome = cfgline[3];
290 configinfo.collectinfo[cfgline[0]] = cinfo;
291 }
292 }
293
294 // Read in the value for the site_auth directive either true or false
295 else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
296
297 else if (key == "site_group")
298 joinchar(cfgline,',',configinfo.site_group);
299
300 else if (key == "SiteFormat") {
301 if (cfgline[0] == "HomePageType") {
302 configinfo.HomePageType = cfgline[1];
303 } else if (cfgline[0] == "HomePageCols") {
304 configinfo.HomePageCols = cfgline[1].getint();
305 }
306 }
307
308 else if (key == "cgiarg") {
309 // get shortname
310 bool seen_defaultstatus = false;
311 text_t subkey, subvalue;
312 text_t shortname;
313 text_t::const_iterator cfglinesub_here;
314 text_tarray::const_iterator cfgline_here = cfgline.begin();
315 text_tarray::const_iterator cfgline_end = cfgline.end();
316 while (cfgline_here != cfgline_end) {
317 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
318 (*cfgline_here).end(), '=', subkey);
319 if (subkey == "shortname") {
320 shortname = substr (cfglinesub_here, (*cfgline_here).end());
321 }
322 ++cfgline_here;
323 }
324
325 // if we found the shortname process the line again filling in values
326 if (!shortname.empty()) {
327 cgiarginfo &chinfo = argsinfo[shortname];
328 chinfo.shortname = shortname; // in case this is a new argument
329
330 cfgline_here = cfgline.begin();
331 while (cfgline_here != cfgline_end) {
332 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
333 (*cfgline_here).end(), '=', subkey);
334 subvalue = substr (cfglinesub_here, (*cfgline_here).end());
335
336 if (subkey == "longname") chinfo.longname = subvalue;
337 else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
338 else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
339 else if (subkey == "defaultstatus") {
340 seen_defaultstatus = true;
341 if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
342 else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
343 else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
344 else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
345 else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
346 }
347 else if (subkey == "argdefault") {
348 chinfo.argdefault = subvalue;
349 if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
350 }
351 else if (subkey == "savedarginfo") {
352 if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
353 else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
354 else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
355 }
356
357 ++cfgline_here;
358 }
359 }
360
361 } else if (key == "Encoding") {
362
363 configure_encoding (cfgline);
364
365 } else if (key == "Language") {
366 text_t subkey, subvalue, shortname;
367 languageinfo_t lang;
368 text_t::const_iterator cfglinesub_here;
369 text_tarray::const_iterator cfgline_here = cfgline.begin();
370 text_tarray::const_iterator cfgline_end = cfgline.end();
371 while (cfgline_here != cfgline_end) {
372 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
373 (*cfgline_here).end(), '=', subkey);
374 if (subkey == "shortname") {
375 shortname = substr (cfglinesub_here, (*cfgline_here).end());
376 } else if (subkey == "longname") {
377 lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
378 } else if (subkey == "default_encoding") {
379 lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
380 }
381 ++cfgline_here;
382 }
383 if (!shortname.empty()) {
384 if (lang.longname.empty()) lang.longname = shortname;
385 configinfo.languages[shortname] = lang;
386 }
387 }
388 }
389
390 // configure the actions
391 actionptrmap::iterator actionhere = actions.begin ();
392 actionptrmap::iterator actionend = actions.end ();
393
394 while (actionhere != actionend) {
395 assert ((*actionhere).second.a != NULL);
396 if ((*actionhere).second.a != NULL)
397 (*actionhere).second.a->configure(key, cfgline);
398
399 ++actionhere;
400 }
401
402 // configure the protocols
403 recptprotolistclass::iterator protohere = protocols.begin ();
404 recptprotolistclass::iterator protoend = protocols.end ();
405
406 while (protohere != protoend) {
407 assert ((*protohere).p != NULL);
408 comerror_t err;
409 if ((*protohere).p != NULL)
410 (*protohere).p->configure(key, cfgline, err);
411
412 ++protohere;
413 }
414
415 // configure the browsers
416 browserptrmap::iterator browserhere = browsers.begin ();
417 browserptrmap::iterator browserend = browsers.end ();
418
419 while (browserhere != browserend) {
420 assert ((*browserhere).second.b != NULL);
421 if ((*browserhere).second.b != NULL)
422 (*browserhere).second.b->configure(key, cfgline);
423
424 ++browserhere;
425 }
426}
427
428
429void receptionist::configure (const text_t &key, const text_t &value) {
430 text_tarray cfgline;
431 cfgline.push_back (value);
432 configure(key, cfgline);
433}
434
435
436// init should be called after all the actions and protocols have been
437// added to the receptionist and after everything has been configured but
438// before any pages are created. It returns true on success and false on
439// failure. If false is returned getpage should not be called (without
440// producing meaningless output), instead an error page should be produced
441// by the calling code.
442bool receptionist::init (ostream &logout) {
443
444 // first configure collectdir
445 if (!configinfo.collection.empty()) {
446
447 // collection specific mode
448
449 text_t collectdir = configinfo.gsdlhome;
450
451 if (!configinfo.collectdir.empty()) {
452 // has already been configured
453 collectdir = configinfo.collectdir;
454 } else {
455
456 // decide where collectdir is by searching for collect.cfg
457 // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
458 // then $GSDLHOME/etc/collect.cfg
459 collectdir = filename_cat (configinfo.gsdlhome, "collect");
460 collectdir = filename_cat (collectdir, configinfo.collection);
461 text_t filename = filename_cat (collectdir, "etc");
462 filename = filename_cat (filename, "collect.cfg");
463
464 if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
465 }
466
467 configure("collectdir", collectdir);
468
469 }
470 else {
471
472 text_t collecthome;
473 if (configinfo.collecthome.empty()) {
474 collecthome = filename_cat(configinfo.gsdlhome,"collect");
475 }
476 else {
477 collecthome = configinfo.collecthome;
478 }
479
480 configure("collecthome", collecthome);
481
482 // for backwards compatability collectdir set to gsdlhome
483 // (possible it could now be removed)
484 configure("collectdir", configinfo.gsdlhome);
485 }
486
487
488 // read in the macro files
489 if (!read_macrofiles (logout)) return false;
490
491 // there must be at least one action defined
492 if (actions.empty()) {
493 logout << "Error: no actions have been added to the receptionist\n";
494 return false;
495 }
496
497 // there must be at least one browser defined
498 if (browsers.empty()) {
499 logout << "Error: no browsers have been added to the receptionist\n";
500 return false;
501 }
502
503 // create a saveconf string if there isn't one already
504 if (configinfo.saveconf.empty())
505 configinfo.saveconf = create_save_conf_str (argsinfo, logout);
506
507 // check the saveconf string
508 if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
509 return false;
510
511 // set a random seed
512 srand (time(NULL));
513
514 // if maintainer email address is something dodgy (for now I'll define
515 // dodgy as being anything that doesn't contain '@') disable EmailEvents
516 // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
517 // in this case but we will as it seems likely that MailServer will also
518 // be screwed up if maintainer is).
519 text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
520 text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
521 maintainer_end, '@');
522 if (maintainer_here == maintainer_end) {
523 configinfo.EmailEvents = Disabled;
524 configinfo.EmailUserEvents = Disabled;
525 } else {
526 // if MailServer isn't set it should default to mail.maintainer-domain
527 if (configinfo.MailServer.empty()) {
528 configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
529 }
530 }
531
532 // init the actions
533 actionptrmap::iterator actionhere = actions.begin ();
534 actionptrmap::iterator actionend = actions.end ();
535 while (actionhere != actionend) {
536 if (((*actionhere).second.a == NULL) ||
537 !(*actionhere).second.a->init(logout)) return false;
538 ++actionhere;
539 }
540
541 // init the protocols
542 recptprotolistclass::iterator protohere = protocols.begin ();
543 recptprotolistclass::iterator protoend = protocols.end ();
544 while (protohere != protoend) {
545 comerror_t err;
546 if (((*protohere).p == NULL) ||
547 !(*protohere).p->init(err, logout)) return false;
548 ++protohere;
549 }
550
551 // init the browsers
552 browserptrmap::iterator browserhere = browsers.begin ();
553 browserptrmap::iterator browserend = browsers.end ();
554 while (browserhere != browserend) {
555 if (((*browserhere).second.b == NULL) ||
556 !(*browserhere).second.b->init(logout)) return false;
557 ++browserhere;
558 }
559
560 return true;
561}
562
563// get the default encoding for the given language - if it fails for any
564// reason return ""
565text_t receptionist::get_default_encoding (const text_t &language) {
566
567 // make sure language is valid
568 if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
569
570 text_t default_encoding = configinfo.languages[language].defaultencoding;
571
572 // make sure the encoding is valid
573 if (converters.find(default_encoding) == converters.end()) {
574 // we don't support the encoding specified as default for this language
575 if (configinfo.encodings.size()==1) {
576 // only 1 encoding specified in main.cfg, so use it
577 return configinfo.encodings.begin()->second;
578 }
579 return "";
580 }
581
582 return default_encoding;
583}
584
585// parse_cgi_args parses cgi arguments into an argument class.
586// This function should be called for each page request. It returns false
587// if there was a major problem with the cgi arguments.
588bool receptionist::parse_cgi_args (const text_t &argstr,
589 fileupload_tmap &fileuploads,
590 cgiargsclass &args,
591 ostream &logout, text_tmap &fcgienv) {
592
593 // get an initial list of cgi arguments
594 args.clear();
595 split_cgi_args (argsinfo, argstr, args);
596
597 // expand the compressed argument (if there was one)
598 if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
599
600 // add the defaults
601 add_default_args (argsinfo, args, logout);
602
603 // add any file upload arguments
604 add_fileupload_args(argsinfo, args, fileuploads, logout);
605
606 // get the cookie
607 if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
608
609 // if we're changing languages, set the encoding to the default for the new language
610 if (args["nl"] == "1") {
611 args["nw"] = get_default_encoding(args["l"]);
612 }
613
614 // get the input encoding
615 // if encoding isn't set, set it to the default for the current language
616 if ((args.getarg("w") == NULL) || args["w"].empty()) {
617 args["w"] = get_default_encoding(args["l"]);
618 }
619
620 text_t &arg_w = args["w"];
621
622 inconvertclass defaultinconvert;
623 inconvertclass *inconvert = converters.get_inconverter (arg_w);
624 if (inconvert == NULL) inconvert = &defaultinconvert;
625
626 // see if the next page will have a different encoding
627 if (args.getarg("nw") != NULL) arg_w = args["nw"];
628
629 // convert arguments which aren't in unicode to unicode
630 args_tounicode (args, *inconvert);
631
632 // decide on the output conversion class (needed for checking the external
633 // cgi arguments)
634 rzwsoutconvertclass defaultoutconverter;
635 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
636 if (outconverter == NULL) outconverter = &defaultoutconverter;
637 outconverter->reset();
638
639 // check the main cgi arguments
640 if (!check_mainargs (args, logout)) return false;
641
642 // check the arguments for the action
643 action *a = actions.getaction (args["a"]);
644 if (a != NULL) {
645 if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
646 } else {
647 // the action was not found!!
648 outconvertclass text_t2ascii;
649 logout << text_t2ascii << "Error: the action \"" << args["a"]
650 << "\" could not be found.\n";
651 return false;
652 }
653
654 // check external cgi arguments for each action
655 actionptrmap::iterator actionhere = actions.begin ();
656 actionptrmap::iterator actionend = actions.end ();
657 while (actionhere != actionend) {
658 assert ((*actionhere).second.a != NULL);
659 if ((*actionhere).second.a != NULL) {
660 if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
661 configinfo.saveconf, logout))
662 return false;
663 }
664 ++actionhere;
665 }
666
667 // the action might have changed but we will assume that
668 // the cgiargs were checked properly when the change was made
669
670 return true;
671}
672
673
674// Returns true if cookie already existed, false if it was generated
675bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv)
676{
677 // See if we can get the GSDL_UID cookie
678 text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
679 if (!cookiestring.empty()) // This should really be handled by the findword function...
680 {
681 // Check if the cookie contains GSDL_UID
682 text_t gsdl_uid = "GSDL_UID=";
683 text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid);
684 if (gsdl_uid_start != cookiestring.end())
685 {
686 // Yes, so extract its value
687 cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';'));
688 return true;
689 }
690 }
691
692 // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
693 cookie.clear();
694 text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
695 time_t ttime = time(NULL);
696 if (!host.empty())
697 {
698 cookie += host;
699 cookie.push_back ('-');
700 }
701 cookie += text_t(ttime);
702
703 return false;
704}
705
706
707// Same as above but just tests if cookie exists
708bool receptionist::get_cookie (text_tmap &fcgienv)
709{
710 text_t cookie_jar = "";
711 return get_cookie(cookie_jar, fcgienv);
712}
713
714
715bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
716
717 // see if we want to log the cgi arguments
718 if (!configinfo.logcgiargs) return true;
719
720 text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
721 text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
722 if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
723 text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
724
725 cgiargsclass::const_iterator args_here = args.begin();
726 cgiargsclass::const_iterator args_end = args.end();
727
728 text_t argstr;
729 bool first = true;
730 while (args_here != args_end) {
731 if (!first) argstr += ", ";
732 argstr += (*args_here).first + "=" + (*args_here).second.value;
733 first = false;
734 ++args_here;
735 }
736
737 text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
738
739 text_t logstr = script_name;
740 logstr += " " + host;
741 logstr += " [";
742 if (configinfo.LogDateFormat == UTCTime) {
743 logstr += get_date (false);
744 } else if (configinfo.LogDateFormat == Absolute) {
745 time_t ttime = time(NULL);
746 logstr += ttime;
747 } else {
748 // LocalTime
749 logstr += get_date (true);
750 }
751 logstr += "] (" + argstr + ") \"";
752 logstr += browser;
753 logstr += "\"\n";
754
755 return append_logstr (logfile, logstr, logout);
756}
757
758bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
759 ostream &logout) {
760
761 char *lfile = filename.getcstr();
762
763 int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777);
764 //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777);
765
766 if (fd == -1) {
767 logout << "Error: Couldn't open file " << lfile << "\n";
768 delete []lfile;
769 return false;
770 }
771
772 // lock_val is set to 0 if file is locked successfully
773 int lock_val = 1;
774 GSDL_LOCK_FILE (fd);
775 if (lock_val == 0) {
776 // Write the string out in UTF-8
777 text_t tmp_log_str_utf8 = to_utf8(logstr);
778 char *buffer = tmp_log_str_utf8.getcstr();
779 size_t num_chars = tmp_log_str_utf8.size();
780 write(fd, buffer, num_chars);
781 GSDL_UNLOCK_FILE (fd);
782 delete []buffer;
783 } else {
784 logout << "Error: Couldn't lock file " << lfile << "\n";
785 close(fd);
786 delete []lfile;
787 return false;
788 }
789
790 close(fd);
791
792 delete []lfile;
793 return true;
794}
795
796text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
797 ostream &logout) {
798 text_t outstring;
799 outconvertclass text_t2ascii;
800
801 action *a = actions.getaction (args["a"]);
802 if (a != NULL)
803 {
804 prepare_page (a, args, text_t2ascii, logout);
805 }
806 disp.expandstring (displayclass::defaultpackage, astring, outstring);
807 return outstring;
808}
809
810// produce_cgi_page will call get_cgihead_info and
811// produce_content in the appropriate way to output a cgi header and
812// the page content (if needed). If a page could not be created it
813// will return false
814bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
815 ostream &logout, text_tmap &fcgienv) {
816 outconvertclass text_t2ascii;
817
818 response_t response;
819 text_t response_data;
820
821 // produce cgi header
822 get_cgihead_info (args, response, response_data, logout, fcgienv);
823 if (response == location) {
824 // location response (url may contain macros!!)
825 response_data = expandmacros (response_data, args, logout);
826
827 contentout << text_t2ascii << "Location: " << response_data << "\n\n";
828 contentout << flush;
829
830 return true;
831 } else if (response == content) {
832 // content response
833
834#ifdef GSDL_NOCACHE
835 contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
836 tm *tm_ptr = NULL;
837 time_t t = time(NULL);
838 tm_ptr = gmtime (&t);
839 if (tm_ptr != NULL) {
840 char *timestr = new char[128];
841 strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
842 contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
843 delete []timestr;
844 }
845 contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
846 contentout << "Pragma: no-cache\n"; // HTTP/1.0
847
848#else
849
850 // use the later of build.cfg and collect.cfg modification times
851 // as the Last-Modified: header, for caching values
852 struct stat file_info;
853 time_t latest=0;
854
855 text_t collectname="";
856 collectname=args["c"];
857 if (collectname != "") {
858
859 text_t collecthome;
860 if (!configinfo.collecthome.empty()) {
861 collecthome = configinfo.collecthome;
862 }
863 else {
864 collecthome=filename_cat(configinfo.gsdlhome,"collect");
865 }
866 text_t collectdir=filename_cat(collecthome,collectname);
867
868 text_t buildcfg=filename_cat(collectdir,"index");
869 buildcfg=filename_cat(buildcfg,"build.cfg");
870 char *buildcfg_ptr=buildcfg.getcstr();
871 text_t collectcfg=filename_cat(collectdir,"etc");
872 collectcfg=filename_cat(collectcfg,"collect.cfg");
873 char *collectcfg_ptr=collectcfg.getcstr();
874
875 if (stat(buildcfg_ptr, &file_info)) {
876 // we got an error. Currently don't handle error :(
877 // logout <<
878 } else {
879 latest=file_info.st_mtime;
880 }
881
882 if (stat(collectcfg_ptr, &file_info)) {
883 // error - unhandled for now
884 } else {
885 if (latest<file_info.st_mtime) latest=file_info.st_mtime;
886 }
887 delete []buildcfg_ptr;
888 delete []collectcfg_ptr;
889
890 if (latest>0) {
891 // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
892 // c library takes care of mem for this string... (has \n at end!!!!)
893 // latest is currently local time, convert to UTC.
894 struct tm* utc_latest;
895 utc_latest=gmtime(&latest);
896 contentout << "Last-Modified: " << asctime(utc_latest);
897 }
898 } // end of collection != ""
899
900#endif
901
902 contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
903 }
904 else if (response == undecided_location) {
905 // Wait until later to output the target location
906 // Used for the "I'm feeling lucky" functionality
907 }
908 else {
909 // unknown response
910 logout << "Error: get_cgihead_info returned an unknown response type.\n";
911 return false;
912 }
913
914 // produce cgi page
915 if (!produce_content (args, contentout, logout)) return false;
916
917 // flush contentout
918 contentout << flush;
919 return true;
920}
921
922
923// get_cgihead_info determines the cgi header information for
924// a set of cgi arguments. If response contains location then
925// response_data contains the redirect address. If reponse
926// contains content then reponse_data contains the content-type.
927// Note that images can now be produced by the receptionist.
928// Note also, alternative for get_cgihead_info below which
929// stores the information in a text_tmap so it is more easily digested
930
931void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
932 text_t &response_data, ostream &logout,
933 text_tmap &fcgienv) {
934 outconvertclass text_t2ascii;
935
936 // get the action
937 action *a = actions.getaction (args["a"]);
938 if (a != NULL) {
939 a->get_cgihead_info (args, &protocols, response, response_data, logout);
940
941 } else {
942 // the action was not found!!
943 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
944 << args["a"] << "\" could not be found.\n";
945 response = content;
946 response_data = "text/html";
947 }
948
949 // add the encoding information
950 if (response == content) {
951 if (converters.find(args["w"]) != converters.end()) {
952 response_data += "; charset=" + args["w"];
953 } else {
954 // default to latin 1
955 response_data += "; charset=ISO-8859-1";
956 }
957
958 // add cookie if required
959 if (configinfo.usecookies && !get_cookie(fcgienv))
960 response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
961 + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
962 }
963}
964
965
966// Alternative version of get_cgihead_info, stores fielded infomation
967// in text_tmap rather than concatenated string
968void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
969 ostream &logout, text_tmap &fcgienv) {
970
971 response_t response;
972 text_t response_data;
973
974 // get the action
975 action *a = actions.getaction (args["a"]);
976 if (a != NULL) {
977 a->get_cgihead_info (args, &protocols, response, response_data, logout);
978
979 } else {
980 // the action was not found!!
981 outconvertclass text_t2ascii;
982 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
983 << args["a"] << "\" could not be found.\n";
984 response = content;
985 response_data = "text/html";
986 }
987
988 if (response == location) {
989 response_data = expandmacros(response_data, args, logout);
990 headers["Location"] = response_data;
991 return;
992 }
993
994 // add the encoding information
995 if (response == content) {
996
997 if (converters.find(args["w"]) != converters.end()) {
998 headers["content-encoding"] = args["w"];
999 response_data += "; charset=" + args["w"];
1000 } else {
1001 // default to utf-8
1002 headers["content-encoding"] = "utf-8";
1003 response_data += "; charset=utf-8";
1004 }
1005
1006 headers["content-type"] = response_data;
1007
1008 }
1009
1010}
1011
1012
1013
1014// produce the page content
1015bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1016 ostream &logout) {
1017
1018 // decide on the output conversion class
1019 text_t &arg_w = args["w"];
1020 rzwsoutconvertclass defaultoutconverter;
1021 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1022 if (outconverter == NULL) outconverter = &defaultoutconverter;
1023 outconverter->reset();
1024
1025 // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1026 if (arg_w=="utf-16be") {
1027 contentout << '\xfe' << '\xff' ;
1028 }
1029
1030 recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1031 if (collectproto != NULL) {
1032 // get browsers to process OID
1033 text_t OID = args["d"];
1034 if (OID.empty()) OID = args["cl"];
1035 if (!OID.empty()) {
1036 text_tset metadata;
1037 text_tarray OIDs;
1038 OIDs.push_back (OID);
1039 if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1040 FilterResponse_t response;
1041 metadata.insert ("childtype");
1042 if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1043 text_t classifytype;
1044 if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1045 classifytype = response.docInfo[0].metadata["childtype"].values[0];
1046 else if (!is_top (OID)) {
1047 // not sure why this is occasionally not set, but it will
1048 // cause a segfault... possibly if built with no_text? jrm21
1049 if (response.docInfo[1].metadata.find("childtype")
1050 == response.docInfo[1].metadata.end()) {
1051 cerr << "receptionist: no childtype element in metadata map!"
1052 << endl;
1053 } else {
1054 if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1055 classifytype = response.docInfo[1].metadata["childtype"].values[0];
1056 }
1057 }
1058 browserclass *b = browsers.getbrowser (classifytype);
1059 b->processOID (args, collectproto, logout);
1060 }
1061 }
1062
1063 // translate "d" and "cl" arguments if required
1064 translate_OIDs (args, collectproto, logout);
1065 }
1066
1067 // produce the page using the desired action
1068 action *a = actions.getaction (args["a"]);
1069 if (a != NULL) {
1070 if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1071 if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1072 return false;
1073 } else {
1074 // the action was not found!!
1075 outconvertclass text_t2ascii;
1076
1077 logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1078 << args["a"] << "\" could not be found.\n";
1079
1080 contentout << (*outconverter)
1081 << "<html>\n"
1082 << "<head>\n"
1083 << "<title>Error</title>\n"
1084 << "</head>\n"
1085 << "<body>\n"
1086 << "<h2>Oops!</h2>\n"
1087 << "Undefined Page. The action \""
1088 << args["a"] << "\" could not be found.\n"
1089 << "</body>\n"
1090 << "</html>\n";
1091 }
1092 return true;
1093}
1094
1095
1096// returns the compressed argument ("e") corresponding to the argument
1097// list. This can be used to save preferences between sessions.
1098text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1099 // decide on the output conversion class
1100 text_t &arg_w = args["w"];
1101 rzwsoutconvertclass defaultoutconverter;
1102 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1103 if (outconverter == NULL) outconverter = &defaultoutconverter;
1104 outconverter->reset();
1105
1106 text_t compressed_args;
1107 if (compress_save_args (argsinfo, configinfo.saveconf, args,
1108 compressed_args, *outconverter, logout))
1109 return compressed_args;
1110
1111 return g_EmptyText;
1112}
1113
1114
1115// will read in all the macro files. If one is not found an
1116// error message will be written to logout and the method will
1117// return false.
1118bool receptionist::read_macrofiles (ostream &logout) {
1119 outconvertclass text_t2ascii;
1120
1121 // redirect the error output to logout
1122 ostream *savedlogout = disp.setlogout (&logout);
1123
1124 // unload any macros that were previously loaded - this allows us to call
1125 // this function a second time to reload all the macro files (useful for
1126 // reading in changed macro files in server versions of greenstone)
1127 disp.unloaddefaultmacros();
1128
1129 // load up the default macro files, the collection directory
1130 // is searched first for the file (if this is being used in
1131 // collection specific mode) and then the main directory(s)
1132 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1133
1134 text_tset maindirs;
1135 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1136 maindirs.insert (gsdlmacrodir);
1137 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1138 colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1139 while (colhere != colend) {
1140 if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1141 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1142 maindirs.insert (gsdlmacrodir);
1143 }
1144 ++colhere;
1145 }
1146
1147 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1148 text_tset::iterator arrend = configinfo.macrofiles.end();
1149 text_t filename;
1150 while (arrhere != arrend) {
1151 bool foundfile = false;
1152
1153 // try in the collection directory if this is being
1154 // run in collection specific mode
1155 if (!configinfo.collection.empty()) {
1156 filename = filename_cat (colmacrodir, *arrhere);
1157 if (file_exists (filename)) {
1158 disp.loaddefaultmacros(filename);
1159 foundfile = true;
1160 }
1161 }
1162
1163 // if we haven't found the macro file yet try in
1164 // the main macro directory(s)
1165 // if file is found in more than one main directory
1166 // we'll load all copies
1167 if (!foundfile) {
1168 text_tset::const_iterator dirhere = maindirs.begin();
1169 text_tset::const_iterator dirend = maindirs.end();
1170 while (dirhere != dirend) {
1171 filename = filename_cat (*dirhere, *arrhere);
1172 if (file_exists (filename)) {
1173 disp.loaddefaultmacros(filename);
1174 foundfile = true;
1175 }
1176 ++dirhere;
1177 }
1178 }
1179
1180 // see if we found the file or not
1181 if (!foundfile) {
1182 logout << text_t2ascii
1183 << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1184 if (configinfo.collection.empty()) {
1185 text_t dirs;
1186 joinchar (maindirs, ", ", dirs);
1187 logout << text_t2ascii
1188 << "It should be in either of the following directories ("
1189 << dirs << ").\n\n";
1190
1191 } else {
1192 logout << text_t2ascii
1193 << "It should be in either " << colmacrodir << " or in "
1194 << gsdlmacrodir << ".\n\n";
1195 }
1196 // don't crap out if a macro file is missing
1197 //disp.setlogout (savedlogout);
1198 //return false;
1199 }
1200 ++arrhere;
1201 }
1202
1203 // success
1204
1205 // reset logout to what it was
1206 disp.setlogout (savedlogout);
1207 return true;
1208}
1209
1210
1211
1212
1213// Go through the list of macro files looking to see
1214// if any exist in the collectoin specific area. If they
1215// do then read them in and add them to the set of existing
1216// current macros
1217
1218void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1219{
1220 outconvertclass text_t2ascii;
1221
1222 // disp.unloadcollectionmacros();
1223
1224 // redirect the error output to logout
1225 ostream *savedlogout = disp.setlogout (&logout);
1226
1227 text_t colmacrodir
1228 = filename_cat (configinfo.collecthome,collection, "macros");
1229
1230 if (directory_exists (colmacrodir)) {
1231
1232 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1233 text_tset::iterator arrend = configinfo.macrofiles.end();
1234 text_t filename;
1235 while (arrhere != arrend) {
1236
1237 filename = filename_cat (colmacrodir, *arrhere);
1238 if (file_exists (filename)) {
1239 disp.loadcollectionmacros(filename);
1240 }
1241
1242 ++arrhere;
1243 }
1244 }
1245
1246 // reset logout to what it was
1247 disp.setlogout (savedlogout);
1248}
1249
1250
1251
1252
1253// check_mainargs will check all the main arguments. If a major
1254// error is found it will return false and no cgi page should
1255// be created using the arguments.
1256
1257bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1258
1259 if(configinfo.site_auth)
1260 {
1261 args["uan"] = "1";
1262 args["ug"] = configinfo.site_group;
1263 }
1264
1265
1266 // if this receptionist is running in collection dependant mode
1267 // then it should always set the collection argument to the
1268 // collection
1269 if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1270
1271 // if current collection uses ccscols make sure
1272 // "ccs" argument is set and make "cc" default to
1273 // all collections in "ccs"
1274 if (args["a"] != "config" && !args["c"].empty()) {
1275
1276 text_t &arg_c = args["c"];
1277 recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1278 if (collectproto == NULL) {
1279 // oops, this collection isn't valid
1280 outconvertclass text_t2ascii;
1281 logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1282 // args["c"].clear();
1283
1284 } else {
1285
1286 ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1287
1288 if(cinfo->authenticate == "collection")
1289 {
1290 args["uan"] = "1";
1291 args["ug"] = cinfo->auth_group;
1292 }
1293
1294
1295 if (cinfo != NULL) {
1296 if (!cinfo->ccsCols.empty()) {
1297 args["ccs"] = 1;
1298 if (args["cc"].empty()) {
1299 text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1300 text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1301 bool first = true;
1302 while (col_here != col_end) {
1303 // make sure it's a valid collection
1304 if (protocols.getrecptproto (*col_here, logout) != NULL) {
1305 if (!first) args["cc"].push_back (',');
1306 args["cc"] += *col_here;
1307 first = false;
1308 }
1309 ++col_here;
1310 }
1311 }
1312 }
1313 } else {
1314 logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1315 }
1316 }
1317 }
1318
1319 // argument "v" can only be 0 or 1. Use the default value
1320 // if it is out of range
1321 int arg_v = args.getintarg ("v");
1322 if (arg_v != 0 && arg_v != 1) {
1323 cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1324 if (vinfo != NULL) args["v"] = vinfo->argdefault;
1325 }
1326
1327 // argument "f" can only be 0 or 1. Use the default value
1328 // if it is out of range
1329 int arg_f = args.getintarg ("f");
1330 if (arg_f != 0 && arg_f != 1) {
1331 cgiarginfo *finfo = argsinfo.getarginfo ("f");
1332 if (finfo != NULL) args["f"] = finfo->argdefault;
1333 }
1334
1335 return true;
1336}
1337
1338// translate_OIDs translates the "d" and "cl" arguments to their correct values
1339// if they use the tricky ".fc", ".lc" type syntax.
1340void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1341 ostream &logout) {
1342
1343 FilterResponse_t response;
1344 FilterRequest_t request;
1345 comerror_t err;
1346 text_t &arg_d = args["d"];
1347 text_t &arg_cl = args["cl"];
1348 text_t &collection = args["c"];
1349
1350 // do a call to translate OIDs if required
1351 request.filterName = "NullFilter";
1352 request.filterResultOptions = FROID;
1353 if (!arg_d.empty() && needs_translating (arg_d)) {
1354 request.docSet.push_back (arg_d);
1355 collectproto->filter (collection, request, response, err, logout);
1356 arg_d = response.docInfo[0].OID;
1357 request.clear();
1358 }
1359 // we'll also check here that the "cl" argument has a "classify" doctype
1360 // (in case ".fc" or ".lc" have screwed up)
1361 if (needs_translating (arg_cl)) {
1362 request.fields.insert ("doctype");
1363 request.docSet.push_back (arg_cl);
1364 request.filterResultOptions = FRmetadata;
1365 collectproto->filter (collection, request, response, err, logout);
1366 // set to original value (without .xx stuff) if doctype isn't "classify" or if no doctype
1367 if (response.docInfo[0].metadata["doctype"].values.size() == 0 || response.docInfo[0].metadata["doctype"].values[0] != "classify")
1368 strip_suffix (arg_cl);
1369 else
1370 arg_cl = response.docInfo[0].OID;
1371 }
1372}
1373
1374// prepare_page sets up page parameters, sets display macros
1375// and opens the page ready for output
1376void receptionist::prepare_page (action *a, cgiargsclass &args,
1377 outconvertclass &outconvert,
1378 ostream &logout) {
1379 // set up page parameters
1380 text_t pageparams;
1381 bool first = true;
1382
1383 text_tmap::iterator params_here = configinfo.pageparams.begin();
1384 text_tmap::iterator params_end = configinfo.pageparams.end();
1385 while (params_here != params_end) {
1386 // page params are those from main.cfg (eg pageparam v 0) plus
1387 // two defaults set in recptconf.clear() (c="" and l=en)
1388 // This used to check if the current value of the page param
1389 // == the default value, then don't add in it the list
1390 // but if l=en, and there is a macro with [l=en], then it doesn't
1391 // find it.
1392 // so now all page params will go into the list. I assume this will
1393 // mean more attempts to find each macro, but nothing worsee than
1394 // that. --kjdon
1395 //if (args[(*params_here).first] != (*params_here).second) {
1396 if (first)
1397 first = false;
1398 else
1399 pageparams += ",";
1400
1401 pageparams += (*params_here).first;
1402 pageparams += "=";
1403 pageparams += args[(*params_here).first];
1404 // }
1405
1406 ++params_here;
1407 }
1408
1409
1410 // open the page
1411 disp.openpage(pageparams, configinfo.macroprecedence);
1412
1413 disp.unloadcollectionmacros();
1414
1415 text_t collection = args["c"];
1416 if (!collection.empty()) {
1417 read_collection_macrofiles(collection,logout);
1418 }
1419
1420 // define external macros for each action
1421 actionptrmap::iterator actionhere = actions.begin ();
1422 actionptrmap::iterator actionend = actions.end ();
1423
1424 while (actionhere != actionend) {
1425 assert ((*actionhere).second.a != NULL);
1426 if ((*actionhere).second.a != NULL) {
1427 (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1428 }
1429 ++actionhere;
1430 }
1431
1432
1433 // define internal macros for the current action
1434 a->define_internal_macros (disp, args, &protocols, logout);
1435
1436 // define general macros. the defining of general macros is done here so that
1437 // the last possible version of the cgi arguments are used
1438 define_general_macros (args, outconvert, logout);
1439}
1440
1441
1442void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1443 ostream &logout) {
1444
1445 text_t &collection = args["c"];
1446
1447 disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1448 disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1449 disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1450 disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1451
1452 // This perhaps should be done with gsdl_getenv() which takes the
1453 // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1454 // additional parameter would need to be passed into here (not sure how
1455 // that would effect any virtual inheritence going on), or else moved
1456 // higher up the calling to chain to, e.g., produce_cgi_page()
1457
1458 char* remote_addr = getenv("REMOTE_ADDR");
1459
1460 if (remote_addr != NULL) {
1461 text_t remote_addr_t(remote_addr);
1462 disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1463 }
1464
1465 char* remote_host = getenv("REMOTE_HOST");
1466 if (remote_host != NULL) {
1467 text_t remote_host_t(remote_host);
1468 disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1469 }
1470 else {
1471 // setting this to "unknown" is easier to deal with in format/macro
1472 // statements, rather than testing for _remoteHost_
1473 disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1474 }
1475
1476
1477 text_t compressedoptions = get_compressed_arg(args, logout);
1478 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1479 //disp.setmacro ("compressedoptionsUrlsafe", displayclass::defaultpackage, encodeForURL(dm_safe(compressedoptions))); // seems to be unnecessary after testing e=hack or e=hack<collect>... or e=...<collect>hack in a live server
1480
1481 // need a decoded version of compressedoptions for use within forms
1482 // as browsers encode values from forms before sending to server
1483 // (e.g. %25 becomes %2525)
1484 decode_cgi_arg (compressedoptions);
1485 if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1486 // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1487 compressedoptions = to_uni(compressedoptions);
1488 }
1489
1490 text_t dmacrovalue = dm_safe(compressedoptions);
1491 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dmacrovalue);
1492 disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(dmacrovalue));
1493
1494#if defined (__WIN32__)
1495 disp.setmacro ("win32", displayclass::defaultpackage, "1");
1496#endif
1497
1498 // set _cgiargX_ macros for each cgi argument
1499 cgiargsclass::const_iterator argshere = args.begin();
1500 cgiargsclass::const_iterator argsend = args.end();
1501 while (argshere != argsend) {
1502
1503 text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp")
1504
1505 if (((*argshere).first == "q") ||
1506 ((*argshere).first == "qa") ||
1507 ((*argshere).first == "qtt") ||
1508 ((*argshere).first == "qty") ||
1509 ((*argshere).first == "qp") ||
1510 ((*argshere).first == "qpl") ||
1511 ((*argshere).first == "qr") ||
1512 ((*argshere).first == "q2")) {
1513
1514 // need to escape special characters from query string
1515 macrovalue = html_safe(macrovalue);
1516
1517 } else if ((*argshere).first == "hp") {
1518 if(!isValidURLProtocol(macrovalue)) {
1519 macrovalue = encodeForURL(macrovalue); // URL has invalid protocol like javascript:, so URL encode it
1520 }
1521 }
1522 else {
1523 macrovalue = dm_safe(macrovalue);
1524 }
1525
1526 // set the default value for the macro
1527 disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue);
1528
1529 // set macros for the encoded versions of the same value. Uses the functions in securitytools.h
1530 // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
1531
1532 text_t htmlsafe = encodeForHTML(macrovalue);
1533 text_t attrsafe = encodeForHTMLAttr(macrovalue);
1534 text_t urlsafe = encodeForURL(macrovalue);
1535 text_t jssafe = encodeForJavascript(macrovalue); // with default setting will return \\x and \\u for macro files
1536 text_t csssafe = encodeForCSS(macrovalue); // not yet used anywhere, but is available for use in macros
1537 text_t sqlsafe = encodeForSQL(macrovalue);
1538
1539 disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe);
1540 disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe);
1541 disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe);
1542 disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe);
1543 disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe);
1544 disp.setmacro ("cgiarg" + (*argshere).first + "Sqlsafe", displayclass::defaultpackage, sqlsafe);
1545
1546
1547 ++argshere;
1548 }
1549
1550 // set collection specific macros
1551 if (!collection.empty()) {
1552 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1553 if (collectproto != NULL) {
1554 FilterResponse_t response;
1555 text_tset metadata;
1556 get_info ("collection", collection, args["l"], metadata, false,
1557 collectproto, response, logout);
1558
1559 if (!response.docInfo[0].metadata.empty()) {
1560 MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1561 MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1562 while (here != end) {
1563 if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1564 ((*here).first != "hasprevious")) {
1565 // check for args in form name:lang
1566 text_t name = g_EmptyText;
1567 text_t lang = g_EmptyText;
1568 bool colonfound=false;
1569 text_t::const_iterator a = (*here).first.begin();
1570 text_t::const_iterator b = (*here).first.end();
1571 while (a !=b) {
1572 if (*a==':') {
1573 colonfound=true;
1574 }
1575 else {
1576 if (colonfound)
1577 lang.push_back(*a);
1578 else name.push_back(*a);
1579 }
1580 ++a;
1581 }
1582 if (!lang.empty()) {
1583 if (args["l"]==lang) {
1584 disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1585 }
1586 }
1587 else { // the default one
1588 disp.setcollectionmacro(displayclass::defaultpackage, (*here).first, "", (*here).second.values[0]);
1589 }
1590 }
1591 ++here;
1592 }
1593 }
1594
1595 text_t iconcollection;
1596 disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1597 if (!iconcollection.empty())
1598 {
1599 ColInfoResponse_t cinfo;
1600 comerror_t err;
1601 collectproto->get_collectinfo (collection, cinfo, err, logout);
1602 if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1603 {
1604 // local but with full path
1605 iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1606 disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1607 }
1608 }
1609 }
1610 }
1611
1612 if (!collection.empty()) {
1613 ColInfoResponse_t cinfo;
1614 comerror_t err;
1615 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1616 if (collectproto != NULL) {
1617 collectproto->get_collectinfo (collection, cinfo, err, logout);
1618
1619
1620 // This part of the code used to use "cinfo.httpprefix" regardless
1621 // of the value it contained. Since
1622 // this can come back with an empty (in the case of gsdl_mod), the
1623 // URL produced was invalid.
1624 //
1625 // Changed to test for empty first, and use configinfo.httpprefix as
1626 // a "backup"
1627 //
1628 // Point to consider: since configinfo.httpprefix has been offically
1629 // set as "httpprefix" in macros, it seems to make more sense to use
1630 // always use that version and not the cinfo version at all.
1631
1632 text_t httpprefix
1633 = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1634
1635 text_t httpcollection;
1636 if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1637 httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1638 + encodeForURL(collection);
1639 disp.setmacro ("httpcollection", displayclass::defaultpackage,
1640 httpcollection);
1641
1642 // as of gsdl 2.53, collect.cfg can specify macros
1643 if (cinfo.collection_macros.size() > 0) {
1644 collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1645 collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1646 while (this_macro != done_macro) {
1647 text_t package = "Global";
1648 text_t macroname = this_macro->first;
1649 // if this macro name is AAA:bbb then extract the package name
1650 text_t::const_iterator thischar, donechar;
1651 thischar = macroname.begin();
1652 donechar = macroname.end();
1653 while (thischar < donechar) {
1654 if (*thischar == ':') {
1655 package = substr(macroname.begin(),thischar);
1656 macroname = substr(thischar+1,donechar);
1657 break;
1658 }
1659 ++thischar;
1660 }
1661
1662 text_tmap params_map = this_macro->second;
1663 text_tmap::const_iterator this_param = params_map.begin();
1664 text_tmap::const_iterator done_param = params_map.end();
1665 while (this_param != done_param) {
1666 disp.setcollectionmacro(package,
1667 macroname,
1668 this_param->first,
1669 this_param->second);
1670 ++this_param;
1671 }
1672
1673 ++this_macro;
1674 }
1675 } // col macros
1676 } // collectproto != NULL
1677 }
1678
1679}
1680
1681// gets collection info from cache if found or
1682// calls collection server (and updates cache)
1683// returns NULL if there's an error
1684ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1685 const text_t &collection,
1686 ostream &logout) {
1687
1688 // check the cache
1689 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1690 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1691 // found it
1692 return &((*it).second.info);
1693 }
1694
1695 // not cached, get info from collection server
1696 if (collectproto == NULL) {
1697 logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1698 return NULL;
1699 }
1700
1701 comerror_t err;
1702 if (it == configinfo.collectinfo.end()) {
1703 collectioninfo_t cinfo;
1704 collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1705 if (err != noError) {
1706 outconvertclass text_t2ascii;
1707 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1708 << get_comerror_string (err) << "\"while getting collectinfo\n";
1709 return NULL;
1710 }
1711 cinfo.info_loaded = true;
1712 configinfo.collectinfo[collection] = cinfo;
1713 return &(configinfo.collectinfo[collection].info);
1714 } else {
1715 collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1716 if (err != noError) {
1717 outconvertclass text_t2ascii;
1718 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1719 << get_comerror_string (err) << "\"while getting collectinfo\n";
1720 return NULL;
1721 }
1722 (*it).second.info_loaded = true;
1723 return &((*it).second.info);
1724 }
1725}
1726
1727// removes a collection from the cache so that the next
1728// call to get_collectinfo_ptr() for that collection will
1729// retrieve the collection info from the collection server
1730void receptionist::uncache_collection (const text_t &collection) {
1731
1732 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1733 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1734
1735 (*it).second.info_loaded = false;
1736
1737 }
1738}
1739
1740// Handles an "Encoding" line from a configuration file - note that the
1741// configinfo.encodings map is a bit of a hack (to be fixed when the
1742// configuration files are tidied up).
1743void receptionist::configure_encoding (const text_tarray &cfgline) {
1744
1745 text_t subkey, subvalue, shortname, longname, mapfile;
1746 int multibyte = 0;
1747 text_t::const_iterator cfglinesub_here;
1748 text_tarray::const_iterator cfgline_here = cfgline.begin();
1749 text_tarray::const_iterator cfgline_end = cfgline.end();
1750 while (cfgline_here != cfgline_end) {
1751 if (*cfgline_here == "multibyte") {
1752 multibyte = 1;
1753 } else {
1754 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1755 (*cfgline_here).end(), '=', subkey);
1756 if (subkey == "shortname") {
1757 shortname = substr (cfglinesub_here, (*cfgline_here).end());
1758 } else if (subkey == "longname") {
1759 longname = substr (cfglinesub_here, (*cfgline_here).end());
1760 } else if (subkey == "map") {
1761 mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1762 }
1763 }
1764 ++cfgline_here;
1765 }
1766 if (!shortname.empty()) {
1767 if (longname.empty()) longname = shortname;
1768
1769 // add the converter
1770 if (shortname == "utf-8") {
1771 utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1772 utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1773 utf8outconvert->set_rzws(1);
1774 add_converter (shortname, utf8inconvert, utf8outconvert);
1775 configinfo.encodings[longname] = shortname;
1776
1777 } else if (shortname == "utf-16be") {
1778 // we use the default input converter as this shouldn't ever be used
1779 // for converting from unicode...
1780 inconvertclass *inconverter = new inconvertclass();
1781 utf16outconvertclass *outconverter = new utf16outconvertclass();
1782 add_converter (shortname, inconverter, outconverter);
1783 configinfo.encodings[longname] = shortname;
1784
1785 } else if (!mapfile.empty()) {
1786
1787 if (mapfile == "8859_1.ump") {
1788 // iso-8859-1 is a special case as it'll always be supported by the
1789 // standard converter class and therefore doesn't need to use its
1790 // mapping file
1791 inconvertclass *inconvert = new inconvertclass();
1792 rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1793 outconvert->set_rzws(1);
1794 add_converter (shortname, inconvert, outconvert);
1795 configinfo.encodings[longname] = shortname;
1796
1797 } else {
1798 text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1799 text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1800 if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1801
1802 mapinconvertclass *mapinconvert = new mapinconvertclass();
1803 mapinconvert->setmapfile (to_uc_map, 0x003F);
1804 mapinconvert->set_multibyte (multibyte);
1805 mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1806 mapoutconvert->setmapfile (from_uc_map, 0x3F);
1807 mapoutconvert->set_multibyte (multibyte);
1808 mapoutconvert->set_rzws(1);
1809 add_converter (shortname, mapinconvert, mapoutconvert);
1810 configinfo.encodings[longname] = shortname;
1811 }
1812 }
1813 }
1814 }
1815}
Note: See TracBrowser for help on using the repository browser.