source: main/trunk/greenstone2/runtime-src/packages/security/gs-code-changes/receptionist.cpp@ 28886

Last change on this file since 28886 was 28886, checked in by ak19, 10 years ago

Additions to the OWASP-for-C++ security package, even though we are no longer using this now. Adding Dr Bainbridge's new files esapitools.cpp and .h, in case we use this in future, along with its Makefile.in. Adding configure files to enable/disable the security package. (The Makefile in runtime-src/packages is not yet complete.)

  • Property svn:executable set to *
File size: 61.1 KB
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT 1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include "securitytools.h"
39#include <assert.h>
40#include <time.h>
41#include <stdio.h> // for open()
42#include <fcntl.h> // for open() flags
43// following 2 are for printing Last-Modified http header.
44#include <sys/stat.h>
45#include <time.h>
46
47#include "EsapiCommon.h"
48//using esapi::String;
49#include "Encoder.h"
50using esapi::Encoder;
51#include "EsapiTypes.h"
52#include "reference/DefaultEncoder.h"
53using esapi::DefaultEncoder;
54#include "esapitools.h"
55//#include <string>
56
57#if defined (GSDL_USE_IOS_H)
58#include <fstream.h>
59#else
60#include <fstream>
61#endif
62
63void recptconf::clear () {
64 gsdlhome.clear();
65 collecthome.clear();
66 dbhome.clear();
67 collectinfo.erase(collectinfo.begin(), collectinfo.end());
68 collection.clear();
69 collectdir.clear();
70 httpprefix.clear();
71 httpweb.clear();
72 gwcgi.clear();
73 macrofiles.erase(macrofiles.begin(), macrofiles.end());
74 saveconf.clear();
75 usecookies = false;
76 logcgiargs = false;
77 LogDateFormat = LocalTime;
78
79 maintainer.clear();
80 MailServer.clear();
81 LogEvents = Disabled;
82 EmailEvents = Disabled;
83 EmailUserEvents = false;
84
85 languages.erase(languages.begin(), languages.end());
86 encodings.erase(encodings.begin(), encodings.end());
87
88 site_auth = false;
89 HomePageType = "images";
90 HomePageCols = 3;
91
92 // these default page parameters can always be overriden
93 // in the configuration file
94 pageparams.erase(pageparams.begin(), pageparams.end());
95 pageparams["c"] = "";
96 pageparams["l"] = "en";
97
98#ifdef MACROPRECEDENCE
99 macroprecedence = MACROPRECEDENCE;
100#else
101 macroprecedence.clear();
102#endif
103}
104
105
106void collectioninfo_t::clear () {
107 gsdl_gsdlhome.clear();
108 gsdl_dbhome.clear();
109
110 info_loaded = false;
111 info.clear();
112}
113
114void languageinfo_t::clear () {
115 longname.clear();
116 defaultencoding.clear();
117}
118
119receptionist::receptionist () {
120 // create a list of cgi arguments
121 // this must be done before the configuration
122
123 cgiarginfo ainfo;
124
125 ainfo.shortname = "e";
126 ainfo.longname = "compressed arguments";
127 ainfo.multiplechar = true;
128 ainfo.defaultstatus = cgiarginfo::good;
129 ainfo.argdefault = g_EmptyText;
130 ainfo.savedarginfo = cgiarginfo::mustnot;
131 argsinfo.addarginfo (NULL, ainfo);
132
133 ainfo.shortname = "a";
134 ainfo.longname = "action";
135 ainfo.multiplechar = true;
136 ainfo.defaultstatus = cgiarginfo::none;
137 ainfo.argdefault = g_EmptyText;
138 ainfo.savedarginfo = cgiarginfo::must;
139 argsinfo.addarginfo (NULL, ainfo);
140
141 // w=western
142 ainfo.shortname = "w";
143 ainfo.longname = "encoding";
144 ainfo.multiplechar = true;
145 ainfo.defaultstatus = cgiarginfo::none;
146 ainfo.argdefault = g_EmptyText;
147 ainfo.savedarginfo = cgiarginfo::must;
148 argsinfo.addarginfo (NULL, ainfo);
149
150 ainfo.shortname = "nw";
151 ainfo.longname = "new encoding";
152 ainfo.multiplechar = true;
153 ainfo.defaultstatus = cgiarginfo::none;
154 ainfo.argdefault = g_EmptyText;
155 ainfo.savedarginfo = cgiarginfo::mustnot;
156 argsinfo.addarginfo (NULL, ainfo);
157
158 ainfo.shortname = "c";
159 ainfo.longname = "collection";
160 ainfo.multiplechar = true;
161 ainfo.defaultstatus = cgiarginfo::none;
162 ainfo.argdefault = g_EmptyText;
163 ainfo.savedarginfo = cgiarginfo::must;
164 argsinfo.addarginfo (NULL, ainfo);
165
166 // the interface language name should use the ISO 639
167 // standard
168 ainfo.shortname = "l";
169 ainfo.longname = "interface language";
170 ainfo.multiplechar = true;
171 ainfo.defaultstatus = cgiarginfo::weak;
172 ainfo.argdefault = "en";
173 ainfo.savedarginfo = cgiarginfo::must;
174 argsinfo.addarginfo (NULL, ainfo);
175
176 ainfo.shortname = "nl";
177 ainfo.longname = "new language";
178 ainfo.multiplechar = false;
179 ainfo.defaultstatus = cgiarginfo::none;
180 ainfo.argdefault = "0";
181 ainfo.savedarginfo = cgiarginfo::mustnot;
182 argsinfo.addarginfo (NULL, ainfo);
183
184 // the GSDL_UID (cookie)
185 ainfo.shortname = "z";
186 ainfo.longname = "gsdl uid";
187 ainfo.multiplechar = true;
188 ainfo.defaultstatus = cgiarginfo::none;
189 ainfo.argdefault = g_EmptyText;
190 ainfo.savedarginfo = cgiarginfo::mustnot;
191 argsinfo.addarginfo (NULL, ainfo);
192}
193
194
195void receptionist::add_action (action *theaction) {
196 // make sure we have an action to add
197 if (theaction == NULL) return;
198
199 // add this action to the list of actions
200 actions.addaction(theaction);
201
202 // add the cgi arguments from this action
203 argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
204}
205
206
207void receptionist::add_browser (browserclass *thebrowser) {
208 // make sure we have a browser to add
209 if (thebrowser == NULL) return;
210
211 // add this browser to the list of browsers
212 browsers.addbrowser(thebrowser);
213}
214
215
216void receptionist::setdefaultbrowser (const text_t &browsername) {
217 browsers.setdefaultbrowser (browsername);
218}
219
220
221// configure should be called for each line in the
222// configuration files to configure the receptionist and everything
223// it contains. The configuration should take place after everything
224// has been added but before the initialisation.
225
226void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
227 // configure the receptionist
228
229
230
231 if (cfgline.size() >= 1) {
232 cgiarginfo *info = NULL;
233 if (key == "gsdlhome") {
234 configinfo.gsdlhome = cfgline[0];
235 if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
236 }
237 else if (key == "collecthome") configinfo.collecthome = cfgline[0];
238 else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
239 else if (key == "collection") {
240 configinfo.collection = cfgline[0];
241 // also need to set the default arg to this collection
242 if ((info = argsinfo.getarginfo("c")) != NULL) {
243 info->defaultstatus = cgiarginfo::good;
244 info->argdefault = cfgline[0];
245 }
246
247 }
248 else if (key == "collectdir") configinfo.collectdir = cfgline[0];
249 else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
250 else if (key == "httpweb") configinfo.httpweb = cfgline[0];
251 else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
252 else if (key == "macrofiles") {
253 // want to append to macrofiles (i.e. may be several config files
254 // contributing, maybe from several collections).
255 text_tarray::const_iterator here = cfgline.begin();
256 text_tarray::const_iterator end = cfgline.end();
257 while (here != end) {
258 configinfo.macrofiles.insert (*here);
259 ++here;
260 }
261 }
262 else if (key == "saveconf") configinfo.saveconf = cfgline[0];
263 else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
264 else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
265 else if (key == "maintainer") configinfo.maintainer = cfgline[0];
266 else if (key == "MailServer") configinfo.MailServer = cfgline[0];
267 else if (key == "LogDateFormat") {
268 if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
269 else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
270 }
271 else if (key == "LogEvents") {
272 if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
273 else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
274 }
275 else if (key == "EmailEvents") {
276 if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
277 else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
278 }
279 else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
280 else if (key == "pageparam") {
281 if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
282 else configinfo.pageparams[cfgline[0]] = "";
283 }
284 else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
285 else if (key == "collectinfo") {
286 if (cfgline.size() == 3) {
287 // for backwards compatability with older collections that only use
288 // gsdlhome and dbhome
289 collectioninfo_t cinfo;
290 cinfo.gsdl_gsdlhome = cfgline[1];
291 cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
292 cinfo.gsdl_dbhome = cfgline[2];
293 configinfo.collectinfo[cfgline[0]] = cinfo;
294 }
295 else if (cfgline.size() >= 4) {
296 collectioninfo_t cinfo;
297 cinfo.gsdl_gsdlhome = cfgline[1];
298 cinfo.gsdl_collecthome = cfgline[2];
299 cinfo.gsdl_dbhome = cfgline[3];
300 configinfo.collectinfo[cfgline[0]] = cinfo;
301 }
302 }
303
304 // Read in the value for the site_auth directive either true or false
305 else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
306
307 else if (key == "site_group")
308 joinchar(cfgline,',',configinfo.site_group);
309
310 else if (key == "SiteFormat") {
311 if (cfgline[0] == "HomePageType") {
312 configinfo.HomePageType = cfgline[1];
313 } else if (cfgline[0] == "HomePageCols") {
314 configinfo.HomePageCols = cfgline[1].getint();
315 }
316 }
317
318 else if (key == "cgiarg") {
319 // get shortname
320 bool seen_defaultstatus = false;
321 text_t subkey, subvalue;
322 text_t shortname;
323 text_t::const_iterator cfglinesub_here;
324 text_tarray::const_iterator cfgline_here = cfgline.begin();
325 text_tarray::const_iterator cfgline_end = cfgline.end();
326 while (cfgline_here != cfgline_end) {
327 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
328 (*cfgline_here).end(), '=', subkey);
329 if (subkey == "shortname") {
330 shortname = substr (cfglinesub_here, (*cfgline_here).end());
331 }
332 ++cfgline_here;
333 }
334
335 // if we found the shortname process the line again filling in values
336 if (!shortname.empty()) {
337 cgiarginfo &chinfo = argsinfo[shortname];
338 chinfo.shortname = shortname; // in case this is a new argument
339
340 cfgline_here = cfgline.begin();
341 while (cfgline_here != cfgline_end) {
342 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
343 (*cfgline_here).end(), '=', subkey);
344 subvalue = substr (cfglinesub_here, (*cfgline_here).end());
345
346 if (subkey == "longname") chinfo.longname = subvalue;
347 else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
348 else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
349 else if (subkey == "defaultstatus") {
350 seen_defaultstatus = true;
351 if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
352 else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
353 else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
354 else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
355 else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
356 }
357 else if (subkey == "argdefault") {
358 chinfo.argdefault = subvalue;
359 if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
360 }
361 else if (subkey == "savedarginfo") {
362 if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
363 else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
364 else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
365 }
366
367 ++cfgline_here;
368 }
369 }
370
371 } else if (key == "Encoding") {
372
373 configure_encoding (cfgline);
374
375 } else if (key == "Language") {
376 text_t subkey, subvalue, shortname;
377 languageinfo_t lang;
378 text_t::const_iterator cfglinesub_here;
379 text_tarray::const_iterator cfgline_here = cfgline.begin();
380 text_tarray::const_iterator cfgline_end = cfgline.end();
381 while (cfgline_here != cfgline_end) {
382 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
383 (*cfgline_here).end(), '=', subkey);
384 if (subkey == "shortname") {
385 shortname = substr (cfglinesub_here, (*cfgline_here).end());
386 } else if (subkey == "longname") {
387 lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
388 } else if (subkey == "default_encoding") {
389 lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
390 }
391 ++cfgline_here;
392 }
393 if (!shortname.empty()) {
394 if (lang.longname.empty()) lang.longname = shortname;
395 configinfo.languages[shortname] = lang;
396 }
397 }
398 }
399
400 // configure the actions
401 actionptrmap::iterator actionhere = actions.begin ();
402 actionptrmap::iterator actionend = actions.end ();
403
404 while (actionhere != actionend) {
405 assert ((*actionhere).second.a != NULL);
406 if ((*actionhere).second.a != NULL)
407 (*actionhere).second.a->configure(key, cfgline);
408
409 ++actionhere;
410 }
411
412 // configure the protocols
413 recptprotolistclass::iterator protohere = protocols.begin ();
414 recptprotolistclass::iterator protoend = protocols.end ();
415
416 while (protohere != protoend) {
417 assert ((*protohere).p != NULL);
418 comerror_t err;
419 if ((*protohere).p != NULL)
420 (*protohere).p->configure(key, cfgline, err);
421
422 ++protohere;
423 }
424
425 // configure the browsers
426 browserptrmap::iterator browserhere = browsers.begin ();
427 browserptrmap::iterator browserend = browsers.end ();
428
429 while (browserhere != browserend) {
430 assert ((*browserhere).second.b != NULL);
431 if ((*browserhere).second.b != NULL)
432 (*browserhere).second.b->configure(key, cfgline);
433
434 ++browserhere;
435 }
436}
437
438
439void receptionist::configure (const text_t &key, const text_t &value) {
440 text_tarray cfgline;
441 cfgline.push_back (value);
442 configure(key, cfgline);
443}
444
445
446// init should be called after all the actions and protocols have been
447// added to the receptionist and after everything has been configured but
448// before any pages are created. It returns true on success and false on
449// failure. If false is returned getpage should not be called (without
450// producing meaningless output), instead an error page should be produced
451// by the calling code.
452bool receptionist::init (ostream &logout) {
453
454 // first configure collectdir
455 if (!configinfo.collection.empty()) {
456
457 // collection specific mode
458
459 text_t collectdir = configinfo.gsdlhome;
460
461 if (!configinfo.collectdir.empty()) {
462 // has already been configured
463 collectdir = configinfo.collectdir;
464 } else {
465
466 // decide where collectdir is by searching for collect.cfg
467 // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
468 // then $GSDLHOME/etc/collect.cfg
469 collectdir = filename_cat (configinfo.gsdlhome, "collect");
470 collectdir = filename_cat (collectdir, configinfo.collection);
471 text_t filename = filename_cat (collectdir, "etc");
472 filename = filename_cat (filename, "collect.cfg");
473
474 if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
475 }
476
477 configure("collectdir", collectdir);
478
479 }
480 else {
481
482 text_t collecthome;
483 if (configinfo.collecthome.empty()) {
484 collecthome = filename_cat(configinfo.gsdlhome,"collect");
485 }
486 else {
487 collecthome = configinfo.collecthome;
488 }
489
490 configure("collecthome", collecthome);
491
492 // for backwards compatability collectdir set to gsdlhome
493 // (possible it could now be removed)
494 configure("collectdir", configinfo.gsdlhome);
495 }
496
497
498 // read in the macro files
499 if (!read_macrofiles (logout)) return false;
500
501 // there must be at least one action defined
502 if (actions.empty()) {
503 logout << "Error: no actions have been added to the receptionist\n";
504 return false;
505 }
506
507 // there must be at least one browser defined
508 if (browsers.empty()) {
509 logout << "Error: no browsers have been added to the receptionist\n";
510 return false;
511 }
512
513 // create a saveconf string if there isn't one already
514 if (configinfo.saveconf.empty())
515 configinfo.saveconf = create_save_conf_str (argsinfo, logout);
516
517 // check the saveconf string
518 if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
519 return false;
520
521 // set a random seed
522 srand (time(NULL));
523
524 // if maintainer email address is something dodgy (for now I'll define
525 // dodgy as being anything that doesn't contain '@') disable EmailEvents
526 // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
527 // in this case but we will as it seems likely that MailServer will also
528 // be screwed up if maintainer is).
529 text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
530 text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
531 maintainer_end, '@');
532 if (maintainer_here == maintainer_end) {
533 configinfo.EmailEvents = Disabled;
534 configinfo.EmailUserEvents = Disabled;
535 } else {
536 // if MailServer isn't set it should default to mail.maintainer-domain
537 if (configinfo.MailServer.empty()) {
538 configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
539 }
540 }
541
542 // init the actions
543 actionptrmap::iterator actionhere = actions.begin ();
544 actionptrmap::iterator actionend = actions.end ();
545 while (actionhere != actionend) {
546 if (((*actionhere).second.a == NULL) ||
547 !(*actionhere).second.a->init(logout)) return false;
548 ++actionhere;
549 }
550
551 // init the protocols
552 recptprotolistclass::iterator protohere = protocols.begin ();
553 recptprotolistclass::iterator protoend = protocols.end ();
554 while (protohere != protoend) {
555 comerror_t err;
556 if (((*protohere).p == NULL) ||
557 !(*protohere).p->init(err, logout)) return false;
558 ++protohere;
559 }
560
561 // init the browsers
562 browserptrmap::iterator browserhere = browsers.begin ();
563 browserptrmap::iterator browserend = browsers.end ();
564 while (browserhere != browserend) {
565 if (((*browserhere).second.b == NULL) ||
566 !(*browserhere).second.b->init(logout)) return false;
567 ++browserhere;
568 }
569
570 return true;
571}
572
573// get the default encoding for the given language - if it fails for any
574// reason return ""
575text_t receptionist::get_default_encoding (const text_t &language) {
576
577 // make sure language is valid
578 if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
579
580 text_t default_encoding = configinfo.languages[language].defaultencoding;
581
582 // make sure the encoding is valid
583 if (converters.find(default_encoding) == converters.end()) {
584 // we don't support the encoding specified as default for this language
585 if (configinfo.encodings.size()==1) {
586 // only 1 encoding specified in main.cfg, so use it
587 return configinfo.encodings.begin()->second;
588 }
589 return "";
590 }
591
592 return default_encoding;
593}
594
595// parse_cgi_args parses cgi arguments into an argument class.
596// This function should be called for each page request. It returns false
597// if there was a major problem with the cgi arguments.
598bool receptionist::parse_cgi_args (const text_t &argstr,
599 fileupload_tmap &fileuploads,
600 cgiargsclass &args,
601 ostream &logout, text_tmap &fcgienv) {
602
603 // get an initial list of cgi arguments
604 args.clear();
605 split_cgi_args (argsinfo, argstr, args);
606
607 // expand the compressed argument (if there was one)
608 if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
609
610 // add the defaults
611 add_default_args (argsinfo, args, logout);
612
613 // add any file upload arguments
614 add_fileupload_args(argsinfo, args, fileuploads, logout);
615
616 // get the cookie
617 if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
618
619 // if we're changing languages, set the encoding to the default for the new language
620 if (args["nl"] == "1") {
621 args["nw"] = get_default_encoding(args["l"]);
622 }
623
624 // get the input encoding
625 // if encoding isn't set, set it to the default for the current language
626 if ((args.getarg("w") == NULL) || args["w"].empty()) {
627 args["w"] = get_default_encoding(args["l"]);
628 }
629
630 text_t &arg_w = args["w"];
631
632 inconvertclass defaultinconvert;
633 inconvertclass *inconvert = converters.get_inconverter (arg_w);
634 if (inconvert == NULL) inconvert = &defaultinconvert;
635
636 // see if the next page will have a different encoding
637 if (args.getarg("nw") != NULL) arg_w = args["nw"];
638
639 // convert arguments which aren't in unicode to unicode
640 args_tounicode (args, *inconvert);
641
642
643 // decide on the output conversion class (needed for checking the external
644 // cgi arguments)
645 rzwsoutconvertclass defaultoutconverter;
646 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
647 if (outconverter == NULL) outconverter = &defaultoutconverter;
648 outconverter->reset();
649
650 // check the main cgi arguments
651 if (!check_mainargs (args, logout)) return false;
652
653 // check the arguments for the action
654 action *a = actions.getaction (args["a"]);
655 if (a != NULL) {
656 if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
657 } else {
658 // the action was not found!!
659 outconvertclass text_t2ascii;
660 logout << text_t2ascii << "Error: the action \"" << args["a"]
661 << "\" could not be found.\n";
662 return false;
663 }
664
665 // check external cgi arguments for each action
666 actionptrmap::iterator actionhere = actions.begin ();
667 actionptrmap::iterator actionend = actions.end ();
668 while (actionhere != actionend) {
669 assert ((*actionhere).second.a != NULL);
670 if ((*actionhere).second.a != NULL) {
671 if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
672 configinfo.saveconf, logout))
673 return false;
674 }
675 ++actionhere;
676 }
677
678 // the action might have changed but we will assume that
679 // the cgiargs were checked properly when the change was made
680
681 return true;
682}
683
684
685// Returns true if cookie already existed, false if it was generated
686bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv)
687{
688 // See if we can get the GSDL_UID cookie
689 text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
690 if (!cookiestring.empty()) // This should really be handled by the findword function...
691 {
692 // Check if the cookie contains GSDL_UID
693 text_t gsdl_uid = "GSDL_UID=";
694 text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid);
695 if (gsdl_uid_start != cookiestring.end())
696 {
697 // Yes, so extract its value
698 cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';'));
699 return true;
700 }
701 }
702
703 // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
704 cookie.clear();
705 text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
706 time_t ttime = time(NULL);
707 if (!host.empty())
708 {
709 cookie += host;
710 cookie.push_back ('-');
711 }
712 cookie += text_t(ttime);
713
714 return false;
715}
716
717
718// Same as above but just tests if cookie exists
719bool receptionist::get_cookie (text_tmap &fcgienv)
720{
721 text_t cookie_jar = "";
722 return get_cookie(cookie_jar, fcgienv);
723}
724
725
726bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
727
728 // see if we want to log the cgi arguments
729 if (!configinfo.logcgiargs) return true;
730
731 text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
732 text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
733 if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
734 text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
735
736 cgiargsclass::const_iterator args_here = args.begin();
737 cgiargsclass::const_iterator args_end = args.end();
738
739 text_t argstr;
740 bool first = true;
741 while (args_here != args_end) {
742 if (!first) argstr += ", ";
743 argstr += (*args_here).first + "=" + (*args_here).second.value;
744 first = false;
745 ++args_here;
746 }
747
748 text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
749
750 text_t logstr = script_name;
751 logstr += " " + host;
752 logstr += " [";
753 if (configinfo.LogDateFormat == UTCTime) {
754 logstr += get_date (false);
755 } else if (configinfo.LogDateFormat == Absolute) {
756 time_t ttime = time(NULL);
757 logstr += ttime;
758 } else {
759 // LocalTime
760 logstr += get_date (true);
761 }
762 logstr += "] (" + argstr + ") \"";
763 logstr += browser;
764 logstr += "\"\n";
765
766 return append_logstr (logfile, logstr, logout);
767}
768
769bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
770 ostream &logout) {
771
772 char *lfile = filename.getcstr();
773
774 int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777);
775 //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777);
776
777 if (fd == -1) {
778 logout << "Error: Couldn't open file " << lfile << "\n";
779 delete []lfile;
780 return false;
781 }
782
783 // lock_val is set to 0 if file is locked successfully
784 int lock_val = 1;
785 GSDL_LOCK_FILE (fd);
786 if (lock_val == 0) {
787 // Write the string out in UTF-8
788 text_t tmp_log_str_utf8 = to_utf8(logstr);
789 char *buffer = tmp_log_str_utf8.getcstr();
790 size_t num_chars = tmp_log_str_utf8.size();
791 write(fd, buffer, num_chars);
792 GSDL_UNLOCK_FILE (fd);
793 delete []buffer;
794 } else {
795 logout << "Error: Couldn't lock file " << lfile << "\n";
796 close(fd);
797 delete []lfile;
798 return false;
799 }
800
801 close(fd);
802
803 delete []lfile;
804 return true;
805}
806
807text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
808 ostream &logout) {
809 text_t outstring;
810 outconvertclass text_t2ascii;
811
812 action *a = actions.getaction (args["a"]);
813 if (a != NULL)
814 {
815 prepare_page (a, args, text_t2ascii, logout);
816 }
817 disp.expandstring (displayclass::defaultpackage, astring, outstring);
818 return outstring;
819}
820
821// produce_cgi_page will call get_cgihead_info and
822// produce_content in the appropriate way to output a cgi header and
823// the page content (if needed). If a page could not be created it
824// will return false
825bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
826 ostream &logout, text_tmap &fcgienv) {
827 outconvertclass text_t2ascii;
828
829 response_t response;
830 text_t response_data;
831
832 // produce cgi header
833 get_cgihead_info (args, response, response_data, logout, fcgienv);
834 if (response == location) {
835 // location response (url may contain macros!!)
836 response_data = expandmacros (response_data, args, logout);
837
838 contentout << text_t2ascii << "Location: " << response_data << "\n\n";
839 contentout << flush;
840
841 return true;
842 } else if (response == content) {
843 // content response
844
845#ifdef GSDL_NOCACHE
846 contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
847 tm *tm_ptr = NULL;
848 time_t t = time(NULL);
849 tm_ptr = gmtime (&t);
850 if (tm_ptr != NULL) {
851 char *timestr = new char[128];
852 strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
853 contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
854 delete []timestr;
855 }
856 contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
857 contentout << "Pragma: no-cache\n"; // HTTP/1.0
858
859#else
860
861 // use the later of build.cfg and collect.cfg modification times
862 // as the Last-Modified: header, for caching values
863 struct stat file_info;
864 time_t latest=0;
865
866 text_t collectname="";
867 collectname=args["c"];
868 if (collectname != "") {
869
870 text_t collecthome;
871 if (!configinfo.collecthome.empty()) {
872 collecthome = configinfo.collecthome;
873 }
874 else {
875 collecthome=filename_cat(configinfo.gsdlhome,"collect");
876 }
877 text_t collectdir=filename_cat(collecthome,collectname);
878
879 text_t buildcfg=filename_cat(collectdir,"index");
880 buildcfg=filename_cat(buildcfg,"build.cfg");
881 char *buildcfg_ptr=buildcfg.getcstr();
882 text_t collectcfg=filename_cat(collectdir,"etc");
883 collectcfg=filename_cat(collectcfg,"collect.cfg");
884 char *collectcfg_ptr=collectcfg.getcstr();
885
886 if (stat(buildcfg_ptr, &file_info)) {
887 // we got an error. Currently don't handle error :(
888 // logout <<
889 } else {
890 latest=file_info.st_mtime;
891 }
892
893 if (stat(collectcfg_ptr, &file_info)) {
894 // error - unhandled for now
895 } else {
896 if (latest<file_info.st_mtime) latest=file_info.st_mtime;
897 }
898 delete []buildcfg_ptr;
899 delete []collectcfg_ptr;
900
901 if (latest>0) {
902 // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
903 // c library takes care of mem for this string... (has \n at end!!!!)
904 // latest is currently local time, convert to UTC.
905 struct tm* utc_latest;
906 utc_latest=gmtime(&latest);
907 contentout << "Last-Modified: " << asctime(utc_latest);
908 }
909 } // end of collection != ""
910
911#endif
912
913 contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
914 }
915 else if (response == undecided_location) {
916 // Wait until later to output the target location
917 // Used for the "I'm feeling lucky" functionality
918 }
919 else {
920 // unknown response
921 logout << "Error: get_cgihead_info returned an unknown response type.\n";
922 return false;
923 }
924
925 // produce cgi page
926 if (!produce_content (args, contentout, logout)) return false;
927
928 // flush contentout
929 contentout << flush;
930 return true;
931}
932
933
934// get_cgihead_info determines the cgi header information for
935// a set of cgi arguments. If response contains location then
936// response_data contains the redirect address. If reponse
937// contains content then reponse_data contains the content-type.
938// Note that images can now be produced by the receptionist.
939// Note also, alternative for get_cgihead_info below which
940// stores the information in a text_tmap so it is more easily digested
941
942void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
943 text_t &response_data, ostream &logout,
944 text_tmap &fcgienv) {
945 outconvertclass text_t2ascii;
946
947 // get the action
948 action *a = actions.getaction (args["a"]);
949 if (a != NULL) {
950 a->get_cgihead_info (args, &protocols, response, response_data, logout);
951
952 } else {
953 // the action was not found!!
954 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
955 << args["a"] << "\" could not be found.\n";
956 response = content;
957 response_data = "text/html";
958 }
959
960 // add the encoding information
961 if (response == content) {
962 if (converters.find(args["w"]) != converters.end()) {
963 response_data += "; charset=" + args["w"];
964 } else {
965 // default to latin 1
966 response_data += "; charset=ISO-8859-1";
967 }
968
969 // add cookie if required
970 if (configinfo.usecookies && !get_cookie(fcgienv))
971 response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
972 + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
973 }
974}
975
976
977// Alternative version of get_cgihead_info, stores fielded infomation
978// in text_tmap rather than concatenated string
979void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
980 ostream &logout, text_tmap &fcgienv) {
981
982 response_t response;
983 text_t response_data;
984
985 // get the action
986 action *a = actions.getaction (args["a"]);
987 if (a != NULL) {
988 a->get_cgihead_info (args, &protocols, response, response_data, logout);
989
990 } else {
991 // the action was not found!!
992 outconvertclass text_t2ascii;
993 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
994 << args["a"] << "\" could not be found.\n";
995 response = content;
996 response_data = "text/html";
997 }
998
999 if (response == location) {
1000 response_data = expandmacros(response_data, args, logout);
1001 headers["Location"] = response_data;
1002 return;
1003 }
1004
1005 // add the encoding information
1006 if (response == content) {
1007
1008 if (converters.find(args["w"]) != converters.end()) {
1009 headers["content-encoding"] = args["w"];
1010 response_data += "; charset=" + args["w"];
1011 } else {
1012 // default to utf-8
1013 headers["content-encoding"] = "utf-8";
1014 response_data += "; charset=utf-8";
1015 }
1016
1017 headers["content-type"] = response_data;
1018
1019 }
1020
1021}
1022
1023
1024
1025// produce the page content
1026bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1027 ostream &logout) {
1028
1029 // decide on the output conversion class
1030 text_t &arg_w = args["w"];
1031 rzwsoutconvertclass defaultoutconverter;
1032 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1033 if (outconverter == NULL) outconverter = &defaultoutconverter;
1034 outconverter->reset();
1035
1036 // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1037 if (arg_w=="utf-16be") {
1038 contentout << '\xfe' << '\xff' ;
1039 }
1040
1041 recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1042 if (collectproto != NULL) {
1043 // get browsers to process OID
1044 text_t OID = args["d"];
1045 if (OID.empty()) OID = args["cl"];
1046 if (!OID.empty()) {
1047 text_tset metadata;
1048 text_tarray OIDs;
1049 OIDs.push_back (OID);
1050 if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1051 FilterResponse_t response;
1052 metadata.insert ("childtype");
1053 if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1054 text_t classifytype;
1055 if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1056 classifytype = response.docInfo[0].metadata["childtype"].values[0];
1057 else if (!is_top (OID)) {
1058 // not sure why this is occasionally not set, but it will
1059 // cause a segfault... possibly if built with no_text? jrm21
1060 if (response.docInfo[1].metadata.find("childtype")
1061 == response.docInfo[1].metadata.end()) {
1062 cerr << "receptionist: no childtype element in metadata map!"
1063 << endl;
1064 } else {
1065 if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1066 classifytype = response.docInfo[1].metadata["childtype"].values[0];
1067 }
1068 }
1069 browserclass *b = browsers.getbrowser (classifytype);
1070 b->processOID (args, collectproto, logout);
1071 }
1072 }
1073
1074 // translate "d" and "cl" arguments if required
1075 translate_OIDs (args, collectproto, logout);
1076 }
1077
1078 // produce the page using the desired action
1079 action *a = actions.getaction (args["a"]);
1080 if (a != NULL) {
1081 if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1082 if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1083 return false;
1084 } else {
1085 // the action was not found!!
1086 outconvertclass text_t2ascii;
1087
1088 logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1089 << args["a"] << "\" could not be found.\n";
1090
1091 contentout << (*outconverter)
1092 << "<html>\n"
1093 << "<head>\n"
1094 << "<title>Error</title>\n"
1095 << "</head>\n"
1096 << "<body>\n"
1097 << "<h2>Oops!</h2>\n"
1098 << "Undefined Page. The action \""
1099 << args["a"] << "\" could not be found.\n"
1100 << "</body>\n"
1101 << "</html>\n";
1102 }
1103 return true;
1104}
1105
1106
1107// returns the compressed argument ("e") corresponding to the argument
1108// list. This can be used to save preferences between sessions.
1109text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1110 // decide on the output conversion class
1111 text_t &arg_w = args["w"];
1112 rzwsoutconvertclass defaultoutconverter;
1113 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1114 if (outconverter == NULL) outconverter = &defaultoutconverter;
1115 outconverter->reset();
1116
1117 text_t compressed_args;
1118 if (compress_save_args (argsinfo, configinfo.saveconf, args,
1119 compressed_args, *outconverter, logout))
1120 return compressed_args;
1121
1122 return g_EmptyText;
1123}
1124
1125
1126// will read in all the macro files. If one is not found an
1127// error message will be written to logout and the method will
1128// return false.
1129bool receptionist::read_macrofiles (ostream &logout) {
1130 outconvertclass text_t2ascii;
1131
1132 // redirect the error output to logout
1133 ostream *savedlogout = disp.setlogout (&logout);
1134
1135 // unload any macros that were previously loaded - this allows us to call
1136 // this function a second time to reload all the macro files (useful for
1137 // reading in changed macro files in server versions of greenstone)
1138 disp.unloaddefaultmacros();
1139
1140 // load up the default macro files, the collection directory
1141 // is searched first for the file (if this is being used in
1142 // collection specific mode) and then the main directory(s)
1143 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1144
1145 text_tset maindirs;
1146 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1147 maindirs.insert (gsdlmacrodir);
1148 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1149 colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1150 while (colhere != colend) {
1151 if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1152 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1153 maindirs.insert (gsdlmacrodir);
1154 }
1155 ++colhere;
1156 }
1157
1158 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1159 text_tset::iterator arrend = configinfo.macrofiles.end();
1160 text_t filename;
1161 while (arrhere != arrend) {
1162 bool foundfile = false;
1163
1164 // try in the collection directory if this is being
1165 // run in collection specific mode
1166 if (!configinfo.collection.empty()) {
1167 filename = filename_cat (colmacrodir, *arrhere);
1168 if (file_exists (filename)) {
1169 disp.loaddefaultmacros(filename);
1170 foundfile = true;
1171 }
1172 }
1173
1174 // if we haven't found the macro file yet try in
1175 // the main macro directory(s)
1176 // if file is found in more than one main directory
1177 // we'll load all copies
1178 if (!foundfile) {
1179 text_tset::const_iterator dirhere = maindirs.begin();
1180 text_tset::const_iterator dirend = maindirs.end();
1181 while (dirhere != dirend) {
1182 filename = filename_cat (*dirhere, *arrhere);
1183 if (file_exists (filename)) {
1184 disp.loaddefaultmacros(filename);
1185 foundfile = true;
1186 }
1187 ++dirhere;
1188 }
1189 }
1190
1191 // see if we found the file or not
1192 if (!foundfile) {
1193 logout << text_t2ascii
1194 << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1195 if (configinfo.collection.empty()) {
1196 text_t dirs;
1197 joinchar (maindirs, ", ", dirs);
1198 logout << text_t2ascii
1199 << "It should be in either of the following directories ("
1200 << dirs << ").\n\n";
1201
1202 } else {
1203 logout << text_t2ascii
1204 << "It should be in either " << colmacrodir << " or in "
1205 << gsdlmacrodir << ".\n\n";
1206 }
1207 // don't crap out if a macro file is missing
1208 //disp.setlogout (savedlogout);
1209 //return false;
1210 }
1211 ++arrhere;
1212 }
1213
1214 // success
1215
1216 // reset logout to what it was
1217 disp.setlogout (savedlogout);
1218 return true;
1219}
1220
1221
1222
1223
1224// Go through the list of macro files looking to see
1225// if any exist in the collectoin specific area. If they
1226// do then read them in and add them to the set of existing
1227// current macros
1228
1229void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1230{
1231 outconvertclass text_t2ascii;
1232
1233 // disp.unloadcollectionmacros();
1234
1235 // redirect the error output to logout
1236 ostream *savedlogout = disp.setlogout (&logout);
1237
1238 text_t colmacrodir
1239 = filename_cat (configinfo.collecthome,collection, "macros");
1240
1241 if (directory_exists (colmacrodir)) {
1242
1243 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1244 text_tset::iterator arrend = configinfo.macrofiles.end();
1245 text_t filename;
1246 while (arrhere != arrend) {
1247
1248 filename = filename_cat (colmacrodir, *arrhere);
1249 if (file_exists (filename)) {
1250 disp.loadcollectionmacros(filename);
1251 }
1252
1253 ++arrhere;
1254 }
1255 }
1256
1257 // reset logout to what it was
1258 disp.setlogout (savedlogout);
1259}
1260
1261
1262
1263
1264// check_mainargs will check all the main arguments. If a major
1265// error is found it will return false and no cgi page should
1266// be created using the arguments.
1267
1268bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1269
1270 if(configinfo.site_auth)
1271 {
1272 args["uan"] = "1";
1273 args["ug"] = configinfo.site_group;
1274 }
1275
1276
1277 // if this receptionist is running in collection dependant mode
1278 // then it should always set the collection argument to the
1279 // collection
1280 if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1281
1282 // if current collection uses ccscols make sure
1283 // "ccs" argument is set and make "cc" default to
1284 // all collections in "ccs"
1285 if (args["a"] != "config" && !args["c"].empty()) {
1286
1287 text_t &arg_c = args["c"];
1288 recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1289 if (collectproto == NULL) {
1290 // oops, this collection isn't valid
1291 outconvertclass text_t2ascii;
1292 logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1293 // args["c"].clear();
1294
1295 } else {
1296
1297 ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1298
1299 if(cinfo->authenticate == "collection")
1300 {
1301 args["uan"] = "1";
1302 args["ug"] = cinfo->auth_group;
1303 }
1304
1305
1306 if (cinfo != NULL) {
1307 if (!cinfo->ccsCols.empty()) {
1308 args["ccs"] = 1;
1309 if (args["cc"].empty()) {
1310 text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1311 text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1312 bool first = true;
1313 while (col_here != col_end) {
1314 // make sure it's a valid collection
1315 if (protocols.getrecptproto (*col_here, logout) != NULL) {
1316 if (!first) args["cc"].push_back (',');
1317 args["cc"] += *col_here;
1318 first = false;
1319 }
1320 ++col_here;
1321 }
1322 }
1323 }
1324 } else {
1325 logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1326 }
1327 }
1328 }
1329
1330 // argument "v" can only be 0 or 1. Use the default value
1331 // if it is out of range
1332 int arg_v = args.getintarg ("v");
1333 if (arg_v != 0 && arg_v != 1) {
1334 cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1335 if (vinfo != NULL) args["v"] = vinfo->argdefault;
1336 }
1337
1338 // argument "f" can only be 0 or 1. Use the default value
1339 // if it is out of range
1340 int arg_f = args.getintarg ("f");
1341 if (arg_f != 0 && arg_f != 1) {
1342 cgiarginfo *finfo = argsinfo.getarginfo ("f");
1343 if (finfo != NULL) args["f"] = finfo->argdefault;
1344 }
1345
1346 return true;
1347}
1348
1349// translate_OIDs translates the "d" and "cl" arguments to their correct values
1350// if they use the tricky ".fc", ".lc" type syntax.
1351void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1352 ostream &logout) {
1353
1354 FilterResponse_t response;
1355 FilterRequest_t request;
1356 comerror_t err;
1357 text_t &arg_d = args["d"];
1358 text_t &arg_cl = args["cl"];
1359 text_t &collection = args["c"];
1360
1361 // do a call to translate OIDs if required
1362 request.filterName = "NullFilter";
1363 request.filterResultOptions = FROID;
1364 if (!arg_d.empty() && needs_translating (arg_d)) {
1365 request.docSet.push_back (arg_d);
1366 collectproto->filter (collection, request, response, err, logout);
1367 arg_d = response.docInfo[0].OID;
1368 request.clear();
1369 }
1370 // we'll also check here that the "cl" argument has a "classify" doctype
1371 // (in case ".fc" or ".lc" have screwed up)
1372 if (needs_translating (arg_cl)) {
1373 request.fields.insert ("doctype");
1374 request.docSet.push_back (arg_cl);
1375 request.filterResultOptions = FRmetadata;
1376 collectproto->filter (collection, request, response, err, logout);
1377 // set to original value (without .xx stuff) if doctype isn't "classify"
1378 if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1379 strip_suffix (arg_cl);
1380 else
1381 arg_cl = response.docInfo[0].OID;
1382 }
1383}
1384
1385// prepare_page sets up page parameters, sets display macros
1386// and opens the page ready for output
1387void receptionist::prepare_page (action *a, cgiargsclass &args,
1388 outconvertclass &outconvert,
1389 ostream &logout) {
1390 // set up page parameters
1391 text_t pageparams;
1392 bool first = true;
1393
1394 text_tmap::iterator params_here = configinfo.pageparams.begin();
1395 text_tmap::iterator params_end = configinfo.pageparams.end();
1396 while (params_here != params_end) {
1397 // page params are those from main.cfg (eg pageparam v 0) plus
1398 // two defaults set in recptconf.clear() (c="" and l=en)
1399 // This used to check if the current value of the page param
1400 // == the default value, then don't add in it the list
1401 // but if l=en, and there is a macro with [l=en], then it doesn't
1402 // find it.
1403 // so now all page params will go into the list. I assume this will
1404 // mean more attempts to find each macro, but nothing worsee than
1405 // that. --kjdon
1406 //if (args[(*params_here).first] != (*params_here).second) {
1407 if (first)
1408 first = false;
1409 else
1410 pageparams += ",";
1411
1412 pageparams += (*params_here).first;
1413 pageparams += "=";
1414 pageparams += args[(*params_here).first];
1415 // }
1416
1417 ++params_here;
1418 }
1419
1420
1421 // open the page
1422 disp.openpage(pageparams, configinfo.macroprecedence);
1423
1424 disp.unloadcollectionmacros();
1425
1426 text_t collection = args["c"];
1427 if (!collection.empty()) {
1428 read_collection_macrofiles(collection,logout);
1429 }
1430
1431 // define external macros for each action
1432 actionptrmap::iterator actionhere = actions.begin ();
1433 actionptrmap::iterator actionend = actions.end ();
1434
1435 while (actionhere != actionend) {
1436 assert ((*actionhere).second.a != NULL);
1437 if ((*actionhere).second.a != NULL) {
1438 (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1439 }
1440 ++actionhere;
1441 }
1442
1443
1444 // define internal macros for the current action
1445 a->define_internal_macros (disp, args, &protocols, logout);
1446
1447 // define general macros. the defining of general macros is done here so that
1448 // the last possible version of the cgi arguments are used
1449 define_general_macros (args, outconvert, logout);
1450}
1451
1452
1453void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1454 ostream &logout) {
1455
1456 text_t &collection = args["c"];
1457
1458 disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1459 disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1460 disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1461 disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1462
1463 // This perhaps should be done with gsdl_getenv() which takes the
1464 // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1465 // additional parameter would need to be passed into here (not sure how
1466 // that would effect any virtual inheritence going on), or else moved
1467 // higher up the calling to chain to, e.g., produce_cgi_page()
1468
1469 char* remote_addr = getenv("REMOTE_ADDR");
1470
1471 if (remote_addr != NULL) {
1472 text_t remote_addr_t(remote_addr);
1473 disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1474 }
1475
1476 char* remote_host = getenv("REMOTE_HOST");
1477 if (remote_host != NULL) {
1478 text_t remote_host_t(remote_host);
1479 disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1480 }
1481 else {
1482 // setting this to "unknown" is easier to deal with in format/macro
1483 // statements, rather than testing for _remoteHost_
1484 disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1485 }
1486
1487
1488 text_t compressedoptions = get_compressed_arg(args, logout);
1489 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1490 // need a decoded version of compressedoptions for use within forms
1491 // as browsers encode values from forms before sending to server
1492 // (e.g. %25 becomes %2525)
1493 decode_cgi_arg (compressedoptions);
1494 if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1495 // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1496 compressedoptions = to_uni(compressedoptions);
1497 }
1498 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1499
1500#if defined (__WIN32__)
1501 disp.setmacro ("win32", displayclass::defaultpackage, "1");
1502#endif
1503
1504 // set _cgiargX_ macros for each cgi argument
1505 cgiargsclass::const_iterator argshere = args.begin();
1506 cgiargsclass::const_iterator argsend = args.end();
1507 while (argshere != argsend) {
1508
1509 text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp")
1510
1511 if (((*argshere).first == "q") ||
1512 ((*argshere).first == "qa") ||
1513 ((*argshere).first == "qtt") ||
1514 ((*argshere).first == "qty") ||
1515 ((*argshere).first == "qp") ||
1516 ((*argshere).first == "qpl") ||
1517 ((*argshere).first == "qr") ||
1518 ((*argshere).first == "q2"))
1519 // need to escape special characters from query string
1520 //disp.setmacro ("cgiarg" + (*argshere).first,
1521 // displayclass::defaultpackage, html_safe((*argshere).second.value));
1522 macrovalue = html_safe(macrovalue);
1523 else if ((*argshere).first != "hp") {
1524 macrovalue = dm_safe(macrovalue);
1525 }
1526
1527 // set the default value for the macro
1528 disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue);
1529
1530 // set the encoded values for the same macro
1531 // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
1532 // http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_encoder.html
1533 // http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/classesapi_1_1_default_encoder.html
1534 // http://owasp-esapi-cplusplus.googlecode.com/svn/trunk/doc/html/_default_encoder_8cpp_source.html
1535
1536 // IMPORTANT: the API online is outdated/different to the actual method definitions in runtime-src/packages/security/installed/include
1537
1538 // Need to cast to Encoder& since getInstance() returns a const Encoder& which can't be used in the encode... functions
1539 Encoder& encoder = (Encoder&)DefaultEncoder::getInstance();
1540
1541 // Contrary to the online API, where the param and return type of encode are WideString typedef to std::wstring defined in <string>
1542 // actual encode() functions in security/installed/include return NarrowString which is typedef-ed to std::string defined in <string>
1543 esapi::NarrowString macrovalue_nstring = text_t_to_esapi_narrow_string(macrovalue);
1544
1545
1546 //esapi::NarrowString htmlsafe_nstring = encoder.encodeForHTML(macrovalue_nstring);
1547 esapi::NarrowString htmlsafe_nstring = encodeForHTML(macrovalue_nstring);
1548 text_t esapi_htmlsafe = esapi_narrow_string_to_text_t(htmlsafe_nstring);
1549
1550 text_t htmlsafe = encodeForHTML(macrovalue);
1551 text_t attrsafe = encodeForHTMLAttr(macrovalue);
1552 text_t urlsafe = encodeForURL(macrovalue);
1553 text_t jssafe = encodeForJavascript(macrovalue);
1554 text_t csssafe = encodeForCSS(macrovalue);
1555
1556 /*
1557 if((*argshere).first == "o" ) {
1558
1559 cout << "esapi::htmlsafe: " << esapi_htmlsafe << endl;
1560 cout << "myhtmlsafe: " << htmlsafe << endl;
1561 cout << "myattrsafe: " << attrsafe << endl;
1562 cout << "myurlsafe: " << urlsafe << endl;
1563 cout << "myjssafe: " << jssafe << endl;
1564 cout << "mycsssafe: " << csssafe << endl;
1565 }
1566 */
1567 /*
1568 esapi::NarrowString attrsafe_nstring = encoder.encodeForHTMLAttribute(macrovalue_nstring);
1569 text_t attrsafe = esapi_narrow_string_to_text_t(attrsafe_nstring);
1570
1571 esapi::NarrowString jssafe_nstring = encoder.encodeForJavaScript(macrovalue_nstring);
1572 text_t jssafe = esapi_narrow_string_to_text_t(jssafe_nstring);
1573
1574 //text_t jsonsafe = esapi_narrow_string_to_text_t(encoder.encodeFor...(macrovalue_nstring));
1575
1576 esapi::NarrowString csssafe_nstring = encoder.encodeForCSS(macrovalue_nstring);
1577 text_t csssafe = esapi_narrow_string_to_text_t(csssafe_nstring);
1578
1579 esapi::NarrowString urlsafe_nstring = encoder.encodeForURL(macrovalue_nstring);
1580 text_t urlsafe = esapi_narrow_string_to_text_t(urlsafe_nstring);
1581
1582 */
1583 disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe);
1584 disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe);
1585 disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe);
1586 disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe);
1587 disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe);
1588
1589
1590 ++argshere;
1591 }
1592
1593 // set collection specific macros
1594 if (!collection.empty()) {
1595 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1596 if (collectproto != NULL) {
1597 FilterResponse_t response;
1598 text_tset metadata;
1599 get_info ("collection", collection, args["l"], metadata, false,
1600 collectproto, response, logout);
1601
1602 if (!response.docInfo[0].metadata.empty()) {
1603 MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1604 MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1605 while (here != end) {
1606 if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1607 ((*here).first != "hasprevious")) {
1608 // check for args in form name:lang
1609 text_t name = g_EmptyText;
1610 text_t lang = g_EmptyText;
1611 bool colonfound=false;
1612 text_t::const_iterator a = (*here).first.begin();
1613 text_t::const_iterator b = (*here).first.end();
1614 while (a !=b) {
1615 if (*a==':') {
1616 colonfound=true;
1617 }
1618 else {
1619 if (colonfound)
1620 lang.push_back(*a);
1621 else name.push_back(*a);
1622 }
1623 ++a;
1624 }
1625 if (!lang.empty()) {
1626 if (args["l"]==lang) {
1627 disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1628 }
1629 }
1630 else { // the default one
1631 disp.setcollectionmacro(displayclass::defaultpackage, (*here).first, "", (*here).second.values[0]);
1632 }
1633 }
1634 ++here;
1635 }
1636 }
1637
1638 text_t iconcollection;
1639 disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1640 if (!iconcollection.empty())
1641 {
1642 ColInfoResponse_t cinfo;
1643 comerror_t err;
1644 collectproto->get_collectinfo (collection, cinfo, err, logout);
1645 if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1646 {
1647 // local but with full path
1648 iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1649 disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1650 }
1651 }
1652 }
1653 }
1654
1655 if (!collection.empty()) {
1656 ColInfoResponse_t cinfo;
1657 comerror_t err;
1658 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1659 if (collectproto != NULL) {
1660 collectproto->get_collectinfo (collection, cinfo, err, logout);
1661
1662
1663 // This part of the code used to use "cinfo.httpprefix" regardless
1664 // of the value it contained. Since
1665 // this can come back with an empty (in the case of gsdl_mod), the
1666 // URL produced was invalid.
1667 //
1668 // Changed to test for empty first, and use configinfo.httpprefix as
1669 // a "backup"
1670 //
1671 // Point to consider: since configinfo.httpprefix has been offically
1672 // set as "httpprefix" in macros, it seems to make more sense to use
1673 // always use that version and not the cinfo version at all.
1674
1675 text_t httpprefix
1676 = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1677
1678 text_t httpcollection;
1679 if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1680 httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1681 + collection;
1682 disp.setmacro ("httpcollection", displayclass::defaultpackage,
1683 httpcollection);
1684
1685 // as of gsdl 2.53, collect.cfg can specify macros
1686 if (cinfo.collection_macros.size() > 0) {
1687 collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1688 collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1689 while (this_macro != done_macro) {
1690 text_t package = "Global";
1691 text_t macroname = this_macro->first;
1692 // if this macro name is AAA:bbb then extract the package name
1693 text_t::const_iterator thischar, donechar;
1694 thischar = macroname.begin();
1695 donechar = macroname.end();
1696 while (thischar < donechar) {
1697 if (*thischar == ':') {
1698 package = substr(macroname.begin(),thischar);
1699 macroname = substr(thischar+1,donechar);
1700 break;
1701 }
1702 ++thischar;
1703 }
1704
1705 text_tmap params_map = this_macro->second;
1706 text_tmap::const_iterator this_param = params_map.begin();
1707 text_tmap::const_iterator done_param = params_map.end();
1708 while (this_param != done_param) {
1709 disp.setcollectionmacro(package,
1710 macroname,
1711 this_param->first,
1712 this_param->second);
1713 ++this_param;
1714 }
1715
1716 ++this_macro;
1717 }
1718 } // col macros
1719 } // collectproto != NULL
1720 }
1721
1722}
1723
1724// gets collection info from cache if found or
1725// calls collection server (and updates cache)
1726// returns NULL if there's an error
1727ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1728 const text_t &collection,
1729 ostream &logout) {
1730
1731 // check the cache
1732 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1733 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1734 // found it
1735 return &((*it).second.info);
1736 }
1737
1738 // not cached, get info from collection server
1739 if (collectproto == NULL) {
1740 logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1741 return NULL;
1742 }
1743
1744 comerror_t err;
1745 if (it == configinfo.collectinfo.end()) {
1746 collectioninfo_t cinfo;
1747 collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1748 if (err != noError) {
1749 outconvertclass text_t2ascii;
1750 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1751 << get_comerror_string (err) << "\"while getting collectinfo\n";
1752 return NULL;
1753 }
1754 cinfo.info_loaded = true;
1755 configinfo.collectinfo[collection] = cinfo;
1756 return &(configinfo.collectinfo[collection].info);
1757 } else {
1758 collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1759 if (err != noError) {
1760 outconvertclass text_t2ascii;
1761 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1762 << get_comerror_string (err) << "\"while getting collectinfo\n";
1763 return NULL;
1764 }
1765 (*it).second.info_loaded = true;
1766 return &((*it).second.info);
1767 }
1768}
1769
1770// removes a collection from the cache so that the next
1771// call to get_collectinfo_ptr() for that collection will
1772// retrieve the collection info from the collection server
1773void receptionist::uncache_collection (const text_t &collection) {
1774
1775 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1776 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1777
1778 (*it).second.info_loaded = false;
1779
1780 }
1781}
1782
1783// Handles an "Encoding" line from a configuration file - note that the
1784// configinfo.encodings map is a bit of a hack (to be fixed when the
1785// configuration files are tidied up).
1786void receptionist::configure_encoding (const text_tarray &cfgline) {
1787
1788 text_t subkey, subvalue, shortname, longname, mapfile;
1789 int multibyte = 0;
1790 text_t::const_iterator cfglinesub_here;
1791 text_tarray::const_iterator cfgline_here = cfgline.begin();
1792 text_tarray::const_iterator cfgline_end = cfgline.end();
1793 while (cfgline_here != cfgline_end) {
1794 if (*cfgline_here == "multibyte") {
1795 multibyte = 1;
1796 } else {
1797 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1798 (*cfgline_here).end(), '=', subkey);
1799 if (subkey == "shortname") {
1800 shortname = substr (cfglinesub_here, (*cfgline_here).end());
1801 } else if (subkey == "longname") {
1802 longname = substr (cfglinesub_here, (*cfgline_here).end());
1803 } else if (subkey == "map") {
1804 mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1805 }
1806 }
1807 ++cfgline_here;
1808 }
1809 if (!shortname.empty()) {
1810 if (longname.empty()) longname = shortname;
1811
1812 // add the converter
1813 if (shortname == "utf-8") {
1814 utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1815 utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1816 utf8outconvert->set_rzws(1);
1817 add_converter (shortname, utf8inconvert, utf8outconvert);
1818 configinfo.encodings[longname] = shortname;
1819
1820 } else if (shortname == "utf-16be") {
1821 // we use the default input converter as this shouldn't ever be used
1822 // for converting from unicode...
1823 inconvertclass *inconverter = new inconvertclass();
1824 utf16outconvertclass *outconverter = new utf16outconvertclass();
1825 add_converter (shortname, inconverter, outconverter);
1826 configinfo.encodings[longname] = shortname;
1827
1828 } else if (!mapfile.empty()) {
1829
1830 if (mapfile == "8859_1.ump") {
1831 // iso-8859-1 is a special case as it'll always be supported by the
1832 // standard converter class and therefore doesn't need to use its
1833 // mapping file
1834 inconvertclass *inconvert = new inconvertclass();
1835 rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1836 outconvert->set_rzws(1);
1837 add_converter (shortname, inconvert, outconvert);
1838 configinfo.encodings[longname] = shortname;
1839
1840 } else {
1841 text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1842 text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1843 if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1844
1845 mapinconvertclass *mapinconvert = new mapinconvertclass();
1846 mapinconvert->setmapfile (to_uc_map, 0x003F);
1847 mapinconvert->set_multibyte (multibyte);
1848 mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1849 mapoutconvert->setmapfile (from_uc_map, 0x3F);
1850 mapoutconvert->set_multibyte (multibyte);
1851 mapoutconvert->set_rzws(1);
1852 add_converter (shortname, mapinconvert, mapoutconvert);
1853 configinfo.encodings[longname] = shortname;
1854 }
1855 }
1856 }
1857 }
1858}
Note: See TracBrowser for help on using the repository browser.