source: main/trunk/greenstone2/runtime-src/src/recpt/receptionist.cpp@ 28888

Last change on this file since 28888 was 28888, checked in by ak19, 10 years ago

First security commit. 1. Introducing the new securitools.h and .cpp files, which port the functions necessary to implement security in Greenstone from OWASP-ESAPI for Java, since OWASP's C++ version is largely not yet implemented, even though their code compiles. The newly added runtime-src/packages/security which contains OWASP ESAPI for C++ will therefore be removed again shortly. 2. receptionist.cpp now sets various web-encoded variants for each cgiarg macro, such as HTML entity encoded, attr encoded, javascript encoded (and css encoded variants). These are now used in the macro files based on which variant is suited to the context. 3. This commit further contains the minimum changes to protect the c, d, and p cgi variables.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 58.8 KB
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT 1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include "securitytools.h"
39#include <assert.h>
40#include <time.h>
41#include <stdio.h> // for open()
42#include <fcntl.h> // for open() flags
43// following 2 are for printing Last-Modified http header.
44#include <sys/stat.h>
45#include <time.h>
46
47#if defined (GSDL_USE_IOS_H)
48#include <fstream.h>
49#else
50#include <fstream>
51#endif
52
53void recptconf::clear () {
54 gsdlhome.clear();
55 collecthome.clear();
56 dbhome.clear();
57 collectinfo.erase(collectinfo.begin(), collectinfo.end());
58 collection.clear();
59 collectdir.clear();
60 httpprefix.clear();
61 httpweb.clear();
62 gwcgi.clear();
63 macrofiles.erase(macrofiles.begin(), macrofiles.end());
64 saveconf.clear();
65 usecookies = false;
66 logcgiargs = false;
67 LogDateFormat = LocalTime;
68
69 maintainer.clear();
70 MailServer.clear();
71 LogEvents = Disabled;
72 EmailEvents = Disabled;
73 EmailUserEvents = false;
74
75 languages.erase(languages.begin(), languages.end());
76 encodings.erase(encodings.begin(), encodings.end());
77
78 site_auth = false;
79 HomePageType = "images";
80 HomePageCols = 3;
81
82 // these default page parameters can always be overriden
83 // in the configuration file
84 pageparams.erase(pageparams.begin(), pageparams.end());
85 pageparams["c"] = "";
86 pageparams["l"] = "en";
87
88#ifdef MACROPRECEDENCE
89 macroprecedence = MACROPRECEDENCE;
90#else
91 macroprecedence.clear();
92#endif
93}
94
95
96void collectioninfo_t::clear () {
97 gsdl_gsdlhome.clear();
98 gsdl_dbhome.clear();
99
100 info_loaded = false;
101 info.clear();
102}
103
104void languageinfo_t::clear () {
105 longname.clear();
106 defaultencoding.clear();
107}
108
109receptionist::receptionist () {
110 // create a list of cgi arguments
111 // this must be done before the configuration
112
113 cgiarginfo ainfo;
114
115 ainfo.shortname = "e";
116 ainfo.longname = "compressed arguments";
117 ainfo.multiplechar = true;
118 ainfo.defaultstatus = cgiarginfo::good;
119 ainfo.argdefault = g_EmptyText;
120 ainfo.savedarginfo = cgiarginfo::mustnot;
121 argsinfo.addarginfo (NULL, ainfo);
122
123 ainfo.shortname = "a";
124 ainfo.longname = "action";
125 ainfo.multiplechar = true;
126 ainfo.defaultstatus = cgiarginfo::none;
127 ainfo.argdefault = g_EmptyText;
128 ainfo.savedarginfo = cgiarginfo::must;
129 argsinfo.addarginfo (NULL, ainfo);
130
131 // w=western
132 ainfo.shortname = "w";
133 ainfo.longname = "encoding";
134 ainfo.multiplechar = true;
135 ainfo.defaultstatus = cgiarginfo::none;
136 ainfo.argdefault = g_EmptyText;
137 ainfo.savedarginfo = cgiarginfo::must;
138 argsinfo.addarginfo (NULL, ainfo);
139
140 ainfo.shortname = "nw";
141 ainfo.longname = "new encoding";
142 ainfo.multiplechar = true;
143 ainfo.defaultstatus = cgiarginfo::none;
144 ainfo.argdefault = g_EmptyText;
145 ainfo.savedarginfo = cgiarginfo::mustnot;
146 argsinfo.addarginfo (NULL, ainfo);
147
148 ainfo.shortname = "c";
149 ainfo.longname = "collection";
150 ainfo.multiplechar = true;
151 ainfo.defaultstatus = cgiarginfo::none;
152 ainfo.argdefault = g_EmptyText;
153 ainfo.savedarginfo = cgiarginfo::must;
154 argsinfo.addarginfo (NULL, ainfo);
155
156 // the interface language name should use the ISO 639
157 // standard
158 ainfo.shortname = "l";
159 ainfo.longname = "interface language";
160 ainfo.multiplechar = true;
161 ainfo.defaultstatus = cgiarginfo::weak;
162 ainfo.argdefault = "en";
163 ainfo.savedarginfo = cgiarginfo::must;
164 argsinfo.addarginfo (NULL, ainfo);
165
166 ainfo.shortname = "nl";
167 ainfo.longname = "new language";
168 ainfo.multiplechar = false;
169 ainfo.defaultstatus = cgiarginfo::none;
170 ainfo.argdefault = "0";
171 ainfo.savedarginfo = cgiarginfo::mustnot;
172 argsinfo.addarginfo (NULL, ainfo);
173
174 // the GSDL_UID (cookie)
175 ainfo.shortname = "z";
176 ainfo.longname = "gsdl uid";
177 ainfo.multiplechar = true;
178 ainfo.defaultstatus = cgiarginfo::none;
179 ainfo.argdefault = g_EmptyText;
180 ainfo.savedarginfo = cgiarginfo::mustnot;
181 argsinfo.addarginfo (NULL, ainfo);
182}
183
184
185void receptionist::add_action (action *theaction) {
186 // make sure we have an action to add
187 if (theaction == NULL) return;
188
189 // add this action to the list of actions
190 actions.addaction(theaction);
191
192 // add the cgi arguments from this action
193 argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
194}
195
196
197void receptionist::add_browser (browserclass *thebrowser) {
198 // make sure we have a browser to add
199 if (thebrowser == NULL) return;
200
201 // add this browser to the list of browsers
202 browsers.addbrowser(thebrowser);
203}
204
205
206void receptionist::setdefaultbrowser (const text_t &browsername) {
207 browsers.setdefaultbrowser (browsername);
208}
209
210
211// configure should be called for each line in the
212// configuration files to configure the receptionist and everything
213// it contains. The configuration should take place after everything
214// has been added but before the initialisation.
215
216void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
217 // configure the receptionist
218
219
220
221 if (cfgline.size() >= 1) {
222 cgiarginfo *info = NULL;
223 if (key == "gsdlhome") {
224 configinfo.gsdlhome = cfgline[0];
225 if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
226 }
227 else if (key == "collecthome") configinfo.collecthome = cfgline[0];
228 else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
229 else if (key == "collection") {
230 configinfo.collection = cfgline[0];
231 // also need to set the default arg to this collection
232 if ((info = argsinfo.getarginfo("c")) != NULL) {
233 info->defaultstatus = cgiarginfo::good;
234 info->argdefault = cfgline[0];
235 }
236
237 }
238 else if (key == "collectdir") configinfo.collectdir = cfgline[0];
239 else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
240 else if (key == "httpweb") configinfo.httpweb = cfgline[0];
241 else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
242 else if (key == "macrofiles") {
243 // want to append to macrofiles (i.e. may be several config files
244 // contributing, maybe from several collections).
245 text_tarray::const_iterator here = cfgline.begin();
246 text_tarray::const_iterator end = cfgline.end();
247 while (here != end) {
248 configinfo.macrofiles.insert (*here);
249 ++here;
250 }
251 }
252 else if (key == "saveconf") configinfo.saveconf = cfgline[0];
253 else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
254 else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
255 else if (key == "maintainer") configinfo.maintainer = cfgline[0];
256 else if (key == "MailServer") configinfo.MailServer = cfgline[0];
257 else if (key == "LogDateFormat") {
258 if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
259 else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
260 }
261 else if (key == "LogEvents") {
262 if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
263 else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
264 }
265 else if (key == "EmailEvents") {
266 if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
267 else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
268 }
269 else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
270 else if (key == "pageparam") {
271 if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
272 else configinfo.pageparams[cfgline[0]] = "";
273 }
274 else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
275 else if (key == "collectinfo") {
276 if (cfgline.size() == 3) {
277 // for backwards compatability with older collections that only use
278 // gsdlhome and dbhome
279 collectioninfo_t cinfo;
280 cinfo.gsdl_gsdlhome = cfgline[1];
281 cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
282 cinfo.gsdl_dbhome = cfgline[2];
283 configinfo.collectinfo[cfgline[0]] = cinfo;
284 }
285 else if (cfgline.size() >= 4) {
286 collectioninfo_t cinfo;
287 cinfo.gsdl_gsdlhome = cfgline[1];
288 cinfo.gsdl_collecthome = cfgline[2];
289 cinfo.gsdl_dbhome = cfgline[3];
290 configinfo.collectinfo[cfgline[0]] = cinfo;
291 }
292 }
293
294 // Read in the value for the site_auth directive either true or false
295 else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
296
297 else if (key == "site_group")
298 joinchar(cfgline,',',configinfo.site_group);
299
300 else if (key == "SiteFormat") {
301 if (cfgline[0] == "HomePageType") {
302 configinfo.HomePageType = cfgline[1];
303 } else if (cfgline[0] == "HomePageCols") {
304 configinfo.HomePageCols = cfgline[1].getint();
305 }
306 }
307
308 else if (key == "cgiarg") {
309 // get shortname
310 bool seen_defaultstatus = false;
311 text_t subkey, subvalue;
312 text_t shortname;
313 text_t::const_iterator cfglinesub_here;
314 text_tarray::const_iterator cfgline_here = cfgline.begin();
315 text_tarray::const_iterator cfgline_end = cfgline.end();
316 while (cfgline_here != cfgline_end) {
317 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
318 (*cfgline_here).end(), '=', subkey);
319 if (subkey == "shortname") {
320 shortname = substr (cfglinesub_here, (*cfgline_here).end());
321 }
322 ++cfgline_here;
323 }
324
325 // if we found the shortname process the line again filling in values
326 if (!shortname.empty()) {
327 cgiarginfo &chinfo = argsinfo[shortname];
328 chinfo.shortname = shortname; // in case this is a new argument
329
330 cfgline_here = cfgline.begin();
331 while (cfgline_here != cfgline_end) {
332 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
333 (*cfgline_here).end(), '=', subkey);
334 subvalue = substr (cfglinesub_here, (*cfgline_here).end());
335
336 if (subkey == "longname") chinfo.longname = subvalue;
337 else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
338 else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
339 else if (subkey == "defaultstatus") {
340 seen_defaultstatus = true;
341 if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
342 else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
343 else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
344 else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
345 else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
346 }
347 else if (subkey == "argdefault") {
348 chinfo.argdefault = subvalue;
349 if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
350 }
351 else if (subkey == "savedarginfo") {
352 if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
353 else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
354 else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
355 }
356
357 ++cfgline_here;
358 }
359 }
360
361 } else if (key == "Encoding") {
362
363 configure_encoding (cfgline);
364
365 } else if (key == "Language") {
366 text_t subkey, subvalue, shortname;
367 languageinfo_t lang;
368 text_t::const_iterator cfglinesub_here;
369 text_tarray::const_iterator cfgline_here = cfgline.begin();
370 text_tarray::const_iterator cfgline_end = cfgline.end();
371 while (cfgline_here != cfgline_end) {
372 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
373 (*cfgline_here).end(), '=', subkey);
374 if (subkey == "shortname") {
375 shortname = substr (cfglinesub_here, (*cfgline_here).end());
376 } else if (subkey == "longname") {
377 lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
378 } else if (subkey == "default_encoding") {
379 lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
380 }
381 ++cfgline_here;
382 }
383 if (!shortname.empty()) {
384 if (lang.longname.empty()) lang.longname = shortname;
385 configinfo.languages[shortname] = lang;
386 }
387 }
388 }
389
390 // configure the actions
391 actionptrmap::iterator actionhere = actions.begin ();
392 actionptrmap::iterator actionend = actions.end ();
393
394 while (actionhere != actionend) {
395 assert ((*actionhere).second.a != NULL);
396 if ((*actionhere).second.a != NULL)
397 (*actionhere).second.a->configure(key, cfgline);
398
399 ++actionhere;
400 }
401
402 // configure the protocols
403 recptprotolistclass::iterator protohere = protocols.begin ();
404 recptprotolistclass::iterator protoend = protocols.end ();
405
406 while (protohere != protoend) {
407 assert ((*protohere).p != NULL);
408 comerror_t err;
409 if ((*protohere).p != NULL)
410 (*protohere).p->configure(key, cfgline, err);
411
412 ++protohere;
413 }
414
415 // configure the browsers
416 browserptrmap::iterator browserhere = browsers.begin ();
417 browserptrmap::iterator browserend = browsers.end ();
418
419 while (browserhere != browserend) {
420 assert ((*browserhere).second.b != NULL);
421 if ((*browserhere).second.b != NULL)
422 (*browserhere).second.b->configure(key, cfgline);
423
424 ++browserhere;
425 }
426}
427
428
429void receptionist::configure (const text_t &key, const text_t &value) {
430 text_tarray cfgline;
431 cfgline.push_back (value);
432 configure(key, cfgline);
433}
434
435
436// init should be called after all the actions and protocols have been
437// added to the receptionist and after everything has been configured but
438// before any pages are created. It returns true on success and false on
439// failure. If false is returned getpage should not be called (without
440// producing meaningless output), instead an error page should be produced
441// by the calling code.
442bool receptionist::init (ostream &logout) {
443
444 // first configure collectdir
445 if (!configinfo.collection.empty()) {
446
447 // collection specific mode
448
449 text_t collectdir = configinfo.gsdlhome;
450
451 if (!configinfo.collectdir.empty()) {
452 // has already been configured
453 collectdir = configinfo.collectdir;
454 } else {
455
456 // decide where collectdir is by searching for collect.cfg
457 // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
458 // then $GSDLHOME/etc/collect.cfg
459 collectdir = filename_cat (configinfo.gsdlhome, "collect");
460 collectdir = filename_cat (collectdir, configinfo.collection);
461 text_t filename = filename_cat (collectdir, "etc");
462 filename = filename_cat (filename, "collect.cfg");
463
464 if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
465 }
466
467 configure("collectdir", collectdir);
468
469 }
470 else {
471
472 text_t collecthome;
473 if (configinfo.collecthome.empty()) {
474 collecthome = filename_cat(configinfo.gsdlhome,"collect");
475 }
476 else {
477 collecthome = configinfo.collecthome;
478 }
479
480 configure("collecthome", collecthome);
481
482 // for backwards compatability collectdir set to gsdlhome
483 // (possible it could now be removed)
484 configure("collectdir", configinfo.gsdlhome);
485 }
486
487
488 // read in the macro files
489 if (!read_macrofiles (logout)) return false;
490
491 // there must be at least one action defined
492 if (actions.empty()) {
493 logout << "Error: no actions have been added to the receptionist\n";
494 return false;
495 }
496
497 // there must be at least one browser defined
498 if (browsers.empty()) {
499 logout << "Error: no browsers have been added to the receptionist\n";
500 return false;
501 }
502
503 // create a saveconf string if there isn't one already
504 if (configinfo.saveconf.empty())
505 configinfo.saveconf = create_save_conf_str (argsinfo, logout);
506
507 // check the saveconf string
508 if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
509 return false;
510
511 // set a random seed
512 srand (time(NULL));
513
514 // if maintainer email address is something dodgy (for now I'll define
515 // dodgy as being anything that doesn't contain '@') disable EmailEvents
516 // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
517 // in this case but we will as it seems likely that MailServer will also
518 // be screwed up if maintainer is).
519 text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
520 text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
521 maintainer_end, '@');
522 if (maintainer_here == maintainer_end) {
523 configinfo.EmailEvents = Disabled;
524 configinfo.EmailUserEvents = Disabled;
525 } else {
526 // if MailServer isn't set it should default to mail.maintainer-domain
527 if (configinfo.MailServer.empty()) {
528 configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
529 }
530 }
531
532 // init the actions
533 actionptrmap::iterator actionhere = actions.begin ();
534 actionptrmap::iterator actionend = actions.end ();
535 while (actionhere != actionend) {
536 if (((*actionhere).second.a == NULL) ||
537 !(*actionhere).second.a->init(logout)) return false;
538 ++actionhere;
539 }
540
541 // init the protocols
542 recptprotolistclass::iterator protohere = protocols.begin ();
543 recptprotolistclass::iterator protoend = protocols.end ();
544 while (protohere != protoend) {
545 comerror_t err;
546 if (((*protohere).p == NULL) ||
547 !(*protohere).p->init(err, logout)) return false;
548 ++protohere;
549 }
550
551 // init the browsers
552 browserptrmap::iterator browserhere = browsers.begin ();
553 browserptrmap::iterator browserend = browsers.end ();
554 while (browserhere != browserend) {
555 if (((*browserhere).second.b == NULL) ||
556 !(*browserhere).second.b->init(logout)) return false;
557 ++browserhere;
558 }
559
560 return true;
561}
562
563// get the default encoding for the given language - if it fails for any
564// reason return ""
565text_t receptionist::get_default_encoding (const text_t &language) {
566
567 // make sure language is valid
568 if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
569
570 text_t default_encoding = configinfo.languages[language].defaultencoding;
571
572 // make sure the encoding is valid
573 if (converters.find(default_encoding) == converters.end()) {
574 // we don't support the encoding specified as default for this language
575 if (configinfo.encodings.size()==1) {
576 // only 1 encoding specified in main.cfg, so use it
577 return configinfo.encodings.begin()->second;
578 }
579 return "";
580 }
581
582 return default_encoding;
583}
584
585// parse_cgi_args parses cgi arguments into an argument class.
586// This function should be called for each page request. It returns false
587// if there was a major problem with the cgi arguments.
588bool receptionist::parse_cgi_args (const text_t &argstr,
589 fileupload_tmap &fileuploads,
590 cgiargsclass &args,
591 ostream &logout, text_tmap &fcgienv) {
592
593 // get an initial list of cgi arguments
594 args.clear();
595 split_cgi_args (argsinfo, argstr, args);
596
597 // expand the compressed argument (if there was one)
598 if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
599
600 // add the defaults
601 add_default_args (argsinfo, args, logout);
602
603 // add any file upload arguments
604 add_fileupload_args(argsinfo, args, fileuploads, logout);
605
606 // get the cookie
607 if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
608
609 // if we're changing languages, set the encoding to the default for the new language
610 if (args["nl"] == "1") {
611 args["nw"] = get_default_encoding(args["l"]);
612 }
613
614 // get the input encoding
615 // if encoding isn't set, set it to the default for the current language
616 if ((args.getarg("w") == NULL) || args["w"].empty()) {
617 args["w"] = get_default_encoding(args["l"]);
618 }
619
620 text_t &arg_w = args["w"];
621
622 inconvertclass defaultinconvert;
623 inconvertclass *inconvert = converters.get_inconverter (arg_w);
624 if (inconvert == NULL) inconvert = &defaultinconvert;
625
626 // see if the next page will have a different encoding
627 if (args.getarg("nw") != NULL) arg_w = args["nw"];
628
629 // convert arguments which aren't in unicode to unicode
630 args_tounicode (args, *inconvert);
631
632
633 // decide on the output conversion class (needed for checking the external
634 // cgi arguments)
635 rzwsoutconvertclass defaultoutconverter;
636 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
637 if (outconverter == NULL) outconverter = &defaultoutconverter;
638 outconverter->reset();
639
640 // check the main cgi arguments
641 if (!check_mainargs (args, logout)) return false;
642
643 // check the arguments for the action
644 action *a = actions.getaction (args["a"]);
645 if (a != NULL) {
646 if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
647 } else {
648 // the action was not found!!
649 outconvertclass text_t2ascii;
650 logout << text_t2ascii << "Error: the action \"" << args["a"]
651 << "\" could not be found.\n";
652 return false;
653 }
654
655 // check external cgi arguments for each action
656 actionptrmap::iterator actionhere = actions.begin ();
657 actionptrmap::iterator actionend = actions.end ();
658 while (actionhere != actionend) {
659 assert ((*actionhere).second.a != NULL);
660 if ((*actionhere).second.a != NULL) {
661 if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
662 configinfo.saveconf, logout))
663 return false;
664 }
665 ++actionhere;
666 }
667
668 // the action might have changed but we will assume that
669 // the cgiargs were checked properly when the change was made
670
671 return true;
672}
673
674
675// Returns true if cookie already existed, false if it was generated
676bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv)
677{
678 // See if we can get the GSDL_UID cookie
679 text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
680 if (!cookiestring.empty()) // This should really be handled by the findword function...
681 {
682 // Check if the cookie contains GSDL_UID
683 text_t gsdl_uid = "GSDL_UID=";
684 text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid);
685 if (gsdl_uid_start != cookiestring.end())
686 {
687 // Yes, so extract its value
688 cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';'));
689 return true;
690 }
691 }
692
693 // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
694 cookie.clear();
695 text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
696 time_t ttime = time(NULL);
697 if (!host.empty())
698 {
699 cookie += host;
700 cookie.push_back ('-');
701 }
702 cookie += text_t(ttime);
703
704 return false;
705}
706
707
708// Same as above but just tests if cookie exists
709bool receptionist::get_cookie (text_tmap &fcgienv)
710{
711 text_t cookie_jar = "";
712 return get_cookie(cookie_jar, fcgienv);
713}
714
715
716bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
717
718 // see if we want to log the cgi arguments
719 if (!configinfo.logcgiargs) return true;
720
721 text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
722 text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
723 if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
724 text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
725
726 cgiargsclass::const_iterator args_here = args.begin();
727 cgiargsclass::const_iterator args_end = args.end();
728
729 text_t argstr;
730 bool first = true;
731 while (args_here != args_end) {
732 if (!first) argstr += ", ";
733 argstr += (*args_here).first + "=" + (*args_here).second.value;
734 first = false;
735 ++args_here;
736 }
737
738 text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
739
740 text_t logstr = script_name;
741 logstr += " " + host;
742 logstr += " [";
743 if (configinfo.LogDateFormat == UTCTime) {
744 logstr += get_date (false);
745 } else if (configinfo.LogDateFormat == Absolute) {
746 time_t ttime = time(NULL);
747 logstr += ttime;
748 } else {
749 // LocalTime
750 logstr += get_date (true);
751 }
752 logstr += "] (" + argstr + ") \"";
753 logstr += browser;
754 logstr += "\"\n";
755
756 return append_logstr (logfile, logstr, logout);
757}
758
759bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
760 ostream &logout) {
761
762 char *lfile = filename.getcstr();
763
764 int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777);
765 //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777);
766
767 if (fd == -1) {
768 logout << "Error: Couldn't open file " << lfile << "\n";
769 delete []lfile;
770 return false;
771 }
772
773 // lock_val is set to 0 if file is locked successfully
774 int lock_val = 1;
775 GSDL_LOCK_FILE (fd);
776 if (lock_val == 0) {
777 // Write the string out in UTF-8
778 text_t tmp_log_str_utf8 = to_utf8(logstr);
779 char *buffer = tmp_log_str_utf8.getcstr();
780 size_t num_chars = tmp_log_str_utf8.size();
781 write(fd, buffer, num_chars);
782 GSDL_UNLOCK_FILE (fd);
783 delete []buffer;
784 } else {
785 logout << "Error: Couldn't lock file " << lfile << "\n";
786 close(fd);
787 delete []lfile;
788 return false;
789 }
790
791 close(fd);
792
793 delete []lfile;
794 return true;
795}
796
797text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
798 ostream &logout) {
799 text_t outstring;
800 outconvertclass text_t2ascii;
801
802 action *a = actions.getaction (args["a"]);
803 if (a != NULL)
804 {
805 prepare_page (a, args, text_t2ascii, logout);
806 }
807 disp.expandstring (displayclass::defaultpackage, astring, outstring);
808 return outstring;
809}
810
811// produce_cgi_page will call get_cgihead_info and
812// produce_content in the appropriate way to output a cgi header and
813// the page content (if needed). If a page could not be created it
814// will return false
815bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
816 ostream &logout, text_tmap &fcgienv) {
817 outconvertclass text_t2ascii;
818
819 response_t response;
820 text_t response_data;
821
822 // produce cgi header
823 get_cgihead_info (args, response, response_data, logout, fcgienv);
824 if (response == location) {
825 // location response (url may contain macros!!)
826 response_data = expandmacros (response_data, args, logout);
827
828 contentout << text_t2ascii << "Location: " << response_data << "\n\n";
829 contentout << flush;
830
831 return true;
832 } else if (response == content) {
833 // content response
834
835#ifdef GSDL_NOCACHE
836 contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
837 tm *tm_ptr = NULL;
838 time_t t = time(NULL);
839 tm_ptr = gmtime (&t);
840 if (tm_ptr != NULL) {
841 char *timestr = new char[128];
842 strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
843 contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
844 delete []timestr;
845 }
846 contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
847 contentout << "Pragma: no-cache\n"; // HTTP/1.0
848
849#else
850
851 // use the later of build.cfg and collect.cfg modification times
852 // as the Last-Modified: header, for caching values
853 struct stat file_info;
854 time_t latest=0;
855
856 text_t collectname="";
857 collectname=args["c"];
858 if (collectname != "") {
859
860 text_t collecthome;
861 if (!configinfo.collecthome.empty()) {
862 collecthome = configinfo.collecthome;
863 }
864 else {
865 collecthome=filename_cat(configinfo.gsdlhome,"collect");
866 }
867 text_t collectdir=filename_cat(collecthome,collectname);
868
869 text_t buildcfg=filename_cat(collectdir,"index");
870 buildcfg=filename_cat(buildcfg,"build.cfg");
871 char *buildcfg_ptr=buildcfg.getcstr();
872 text_t collectcfg=filename_cat(collectdir,"etc");
873 collectcfg=filename_cat(collectcfg,"collect.cfg");
874 char *collectcfg_ptr=collectcfg.getcstr();
875
876 if (stat(buildcfg_ptr, &file_info)) {
877 // we got an error. Currently don't handle error :(
878 // logout <<
879 } else {
880 latest=file_info.st_mtime;
881 }
882
883 if (stat(collectcfg_ptr, &file_info)) {
884 // error - unhandled for now
885 } else {
886 if (latest<file_info.st_mtime) latest=file_info.st_mtime;
887 }
888 delete []buildcfg_ptr;
889 delete []collectcfg_ptr;
890
891 if (latest>0) {
892 // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
893 // c library takes care of mem for this string... (has \n at end!!!!)
894 // latest is currently local time, convert to UTC.
895 struct tm* utc_latest;
896 utc_latest=gmtime(&latest);
897 contentout << "Last-Modified: " << asctime(utc_latest);
898 }
899 } // end of collection != ""
900
901#endif
902
903 contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
904 }
905 else if (response == undecided_location) {
906 // Wait until later to output the target location
907 // Used for the "I'm feeling lucky" functionality
908 }
909 else {
910 // unknown response
911 logout << "Error: get_cgihead_info returned an unknown response type.\n";
912 return false;
913 }
914
915 // produce cgi page
916 if (!produce_content (args, contentout, logout)) return false;
917
918 // flush contentout
919 contentout << flush;
920 return true;
921}
922
923
924// get_cgihead_info determines the cgi header information for
925// a set of cgi arguments. If response contains location then
926// response_data contains the redirect address. If reponse
927// contains content then reponse_data contains the content-type.
928// Note that images can now be produced by the receptionist.
929// Note also, alternative for get_cgihead_info below which
930// stores the information in a text_tmap so it is more easily digested
931
932void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
933 text_t &response_data, ostream &logout,
934 text_tmap &fcgienv) {
935 outconvertclass text_t2ascii;
936
937 // get the action
938 action *a = actions.getaction (args["a"]);
939 if (a != NULL) {
940 a->get_cgihead_info (args, &protocols, response, response_data, logout);
941
942 } else {
943 // the action was not found!!
944 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
945 << args["a"] << "\" could not be found.\n";
946 response = content;
947 response_data = "text/html";
948 }
949
950 // add the encoding information
951 if (response == content) {
952 if (converters.find(args["w"]) != converters.end()) {
953 response_data += "; charset=" + args["w"];
954 } else {
955 // default to latin 1
956 response_data += "; charset=ISO-8859-1";
957 }
958
959 // add cookie if required
960 if (configinfo.usecookies && !get_cookie(fcgienv))
961 response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
962 + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
963 }
964}
965
966
967// Alternative version of get_cgihead_info, stores fielded infomation
968// in text_tmap rather than concatenated string
969void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
970 ostream &logout, text_tmap &fcgienv) {
971
972 response_t response;
973 text_t response_data;
974
975 // get the action
976 action *a = actions.getaction (args["a"]);
977 if (a != NULL) {
978 a->get_cgihead_info (args, &protocols, response, response_data, logout);
979
980 } else {
981 // the action was not found!!
982 outconvertclass text_t2ascii;
983 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
984 << args["a"] << "\" could not be found.\n";
985 response = content;
986 response_data = "text/html";
987 }
988
989 if (response == location) {
990 response_data = expandmacros(response_data, args, logout);
991 headers["Location"] = response_data;
992 return;
993 }
994
995 // add the encoding information
996 if (response == content) {
997
998 if (converters.find(args["w"]) != converters.end()) {
999 headers["content-encoding"] = args["w"];
1000 response_data += "; charset=" + args["w"];
1001 } else {
1002 // default to utf-8
1003 headers["content-encoding"] = "utf-8";
1004 response_data += "; charset=utf-8";
1005 }
1006
1007 headers["content-type"] = response_data;
1008
1009 }
1010
1011}
1012
1013
1014
1015// produce the page content
1016bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1017 ostream &logout) {
1018
1019 // decide on the output conversion class
1020 text_t &arg_w = args["w"];
1021 rzwsoutconvertclass defaultoutconverter;
1022 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1023 if (outconverter == NULL) outconverter = &defaultoutconverter;
1024 outconverter->reset();
1025
1026 // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1027 if (arg_w=="utf-16be") {
1028 contentout << '\xfe' << '\xff' ;
1029 }
1030
1031 recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1032 if (collectproto != NULL) {
1033 // get browsers to process OID
1034 text_t OID = args["d"];
1035 if (OID.empty()) OID = args["cl"];
1036 if (!OID.empty()) {
1037 text_tset metadata;
1038 text_tarray OIDs;
1039 OIDs.push_back (OID);
1040 if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1041 FilterResponse_t response;
1042 metadata.insert ("childtype");
1043 if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1044 text_t classifytype;
1045 if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1046 classifytype = response.docInfo[0].metadata["childtype"].values[0];
1047 else if (!is_top (OID)) {
1048 // not sure why this is occasionally not set, but it will
1049 // cause a segfault... possibly if built with no_text? jrm21
1050 if (response.docInfo[1].metadata.find("childtype")
1051 == response.docInfo[1].metadata.end()) {
1052 cerr << "receptionist: no childtype element in metadata map!"
1053 << endl;
1054 } else {
1055 if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1056 classifytype = response.docInfo[1].metadata["childtype"].values[0];
1057 }
1058 }
1059 browserclass *b = browsers.getbrowser (classifytype);
1060 b->processOID (args, collectproto, logout);
1061 }
1062 }
1063
1064 // translate "d" and "cl" arguments if required
1065 translate_OIDs (args, collectproto, logout);
1066 }
1067
1068 // produce the page using the desired action
1069 action *a = actions.getaction (args["a"]);
1070 if (a != NULL) {
1071 if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1072 if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1073 return false;
1074 } else {
1075 // the action was not found!!
1076 outconvertclass text_t2ascii;
1077
1078 logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1079 << args["a"] << "\" could not be found.\n";
1080
1081 contentout << (*outconverter)
1082 << "<html>\n"
1083 << "<head>\n"
1084 << "<title>Error</title>\n"
1085 << "</head>\n"
1086 << "<body>\n"
1087 << "<h2>Oops!</h2>\n"
1088 << "Undefined Page. The action \""
1089 << args["a"] << "\" could not be found.\n"
1090 << "</body>\n"
1091 << "</html>\n";
1092 }
1093 return true;
1094}
1095
1096
1097// returns the compressed argument ("e") corresponding to the argument
1098// list. This can be used to save preferences between sessions.
1099text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1100 // decide on the output conversion class
1101 text_t &arg_w = args["w"];
1102 rzwsoutconvertclass defaultoutconverter;
1103 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1104 if (outconverter == NULL) outconverter = &defaultoutconverter;
1105 outconverter->reset();
1106
1107 text_t compressed_args;
1108 if (compress_save_args (argsinfo, configinfo.saveconf, args,
1109 compressed_args, *outconverter, logout))
1110 return compressed_args;
1111
1112 return g_EmptyText;
1113}
1114
1115
1116// will read in all the macro files. If one is not found an
1117// error message will be written to logout and the method will
1118// return false.
1119bool receptionist::read_macrofiles (ostream &logout) {
1120 outconvertclass text_t2ascii;
1121
1122 // redirect the error output to logout
1123 ostream *savedlogout = disp.setlogout (&logout);
1124
1125 // unload any macros that were previously loaded - this allows us to call
1126 // this function a second time to reload all the macro files (useful for
1127 // reading in changed macro files in server versions of greenstone)
1128 disp.unloaddefaultmacros();
1129
1130 // load up the default macro files, the collection directory
1131 // is searched first for the file (if this is being used in
1132 // collection specific mode) and then the main directory(s)
1133 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1134
1135 text_tset maindirs;
1136 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1137 maindirs.insert (gsdlmacrodir);
1138 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1139 colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1140 while (colhere != colend) {
1141 if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1142 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1143 maindirs.insert (gsdlmacrodir);
1144 }
1145 ++colhere;
1146 }
1147
1148 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1149 text_tset::iterator arrend = configinfo.macrofiles.end();
1150 text_t filename;
1151 while (arrhere != arrend) {
1152 bool foundfile = false;
1153
1154 // try in the collection directory if this is being
1155 // run in collection specific mode
1156 if (!configinfo.collection.empty()) {
1157 filename = filename_cat (colmacrodir, *arrhere);
1158 if (file_exists (filename)) {
1159 disp.loaddefaultmacros(filename);
1160 foundfile = true;
1161 }
1162 }
1163
1164 // if we haven't found the macro file yet try in
1165 // the main macro directory(s)
1166 // if file is found in more than one main directory
1167 // we'll load all copies
1168 if (!foundfile) {
1169 text_tset::const_iterator dirhere = maindirs.begin();
1170 text_tset::const_iterator dirend = maindirs.end();
1171 while (dirhere != dirend) {
1172 filename = filename_cat (*dirhere, *arrhere);
1173 if (file_exists (filename)) {
1174 disp.loaddefaultmacros(filename);
1175 foundfile = true;
1176 }
1177 ++dirhere;
1178 }
1179 }
1180
1181 // see if we found the file or not
1182 if (!foundfile) {
1183 logout << text_t2ascii
1184 << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1185 if (configinfo.collection.empty()) {
1186 text_t dirs;
1187 joinchar (maindirs, ", ", dirs);
1188 logout << text_t2ascii
1189 << "It should be in either of the following directories ("
1190 << dirs << ").\n\n";
1191
1192 } else {
1193 logout << text_t2ascii
1194 << "It should be in either " << colmacrodir << " or in "
1195 << gsdlmacrodir << ".\n\n";
1196 }
1197 // don't crap out if a macro file is missing
1198 //disp.setlogout (savedlogout);
1199 //return false;
1200 }
1201 ++arrhere;
1202 }
1203
1204 // success
1205
1206 // reset logout to what it was
1207 disp.setlogout (savedlogout);
1208 return true;
1209}
1210
1211
1212
1213
1214// Go through the list of macro files looking to see
1215// if any exist in the collectoin specific area. If they
1216// do then read them in and add them to the set of existing
1217// current macros
1218
1219void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1220{
1221 outconvertclass text_t2ascii;
1222
1223 // disp.unloadcollectionmacros();
1224
1225 // redirect the error output to logout
1226 ostream *savedlogout = disp.setlogout (&logout);
1227
1228 text_t colmacrodir
1229 = filename_cat (configinfo.collecthome,collection, "macros");
1230
1231 if (directory_exists (colmacrodir)) {
1232
1233 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1234 text_tset::iterator arrend = configinfo.macrofiles.end();
1235 text_t filename;
1236 while (arrhere != arrend) {
1237
1238 filename = filename_cat (colmacrodir, *arrhere);
1239 if (file_exists (filename)) {
1240 disp.loadcollectionmacros(filename);
1241 }
1242
1243 ++arrhere;
1244 }
1245 }
1246
1247 // reset logout to what it was
1248 disp.setlogout (savedlogout);
1249}
1250
1251
1252
1253
1254// check_mainargs will check all the main arguments. If a major
1255// error is found it will return false and no cgi page should
1256// be created using the arguments.
1257
1258bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1259
1260 if(configinfo.site_auth)
1261 {
1262 args["uan"] = "1";
1263 args["ug"] = configinfo.site_group;
1264 }
1265
1266
1267 // if this receptionist is running in collection dependant mode
1268 // then it should always set the collection argument to the
1269 // collection
1270 if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1271
1272 // if current collection uses ccscols make sure
1273 // "ccs" argument is set and make "cc" default to
1274 // all collections in "ccs"
1275 if (args["a"] != "config" && !args["c"].empty()) {
1276
1277 text_t &arg_c = args["c"];
1278 recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1279 if (collectproto == NULL) {
1280 // oops, this collection isn't valid
1281 outconvertclass text_t2ascii;
1282 logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1283 // args["c"].clear();
1284
1285 } else {
1286
1287 ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1288
1289 if(cinfo->authenticate == "collection")
1290 {
1291 args["uan"] = "1";
1292 args["ug"] = cinfo->auth_group;
1293 }
1294
1295
1296 if (cinfo != NULL) {
1297 if (!cinfo->ccsCols.empty()) {
1298 args["ccs"] = 1;
1299 if (args["cc"].empty()) {
1300 text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1301 text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1302 bool first = true;
1303 while (col_here != col_end) {
1304 // make sure it's a valid collection
1305 if (protocols.getrecptproto (*col_here, logout) != NULL) {
1306 if (!first) args["cc"].push_back (',');
1307 args["cc"] += *col_here;
1308 first = false;
1309 }
1310 ++col_here;
1311 }
1312 }
1313 }
1314 } else {
1315 logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1316 }
1317 }
1318 }
1319
1320 // argument "v" can only be 0 or 1. Use the default value
1321 // if it is out of range
1322 int arg_v = args.getintarg ("v");
1323 if (arg_v != 0 && arg_v != 1) {
1324 cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1325 if (vinfo != NULL) args["v"] = vinfo->argdefault;
1326 }
1327
1328 // argument "f" can only be 0 or 1. Use the default value
1329 // if it is out of range
1330 int arg_f = args.getintarg ("f");
1331 if (arg_f != 0 && arg_f != 1) {
1332 cgiarginfo *finfo = argsinfo.getarginfo ("f");
1333 if (finfo != NULL) args["f"] = finfo->argdefault;
1334 }
1335
1336 return true;
1337}
1338
1339// translate_OIDs translates the "d" and "cl" arguments to their correct values
1340// if they use the tricky ".fc", ".lc" type syntax.
1341void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1342 ostream &logout) {
1343
1344 FilterResponse_t response;
1345 FilterRequest_t request;
1346 comerror_t err;
1347 text_t &arg_d = args["d"];
1348 text_t &arg_cl = args["cl"];
1349 text_t &collection = args["c"];
1350
1351 // do a call to translate OIDs if required
1352 request.filterName = "NullFilter";
1353 request.filterResultOptions = FROID;
1354 if (!arg_d.empty() && needs_translating (arg_d)) {
1355 request.docSet.push_back (arg_d);
1356 collectproto->filter (collection, request, response, err, logout);
1357 arg_d = response.docInfo[0].OID;
1358 request.clear();
1359 }
1360 // we'll also check here that the "cl" argument has a "classify" doctype
1361 // (in case ".fc" or ".lc" have screwed up)
1362 if (needs_translating (arg_cl)) {
1363 request.fields.insert ("doctype");
1364 request.docSet.push_back (arg_cl);
1365 request.filterResultOptions = FRmetadata;
1366 collectproto->filter (collection, request, response, err, logout);
1367 // set to original value (without .xx stuff) if doctype isn't "classify"
1368 if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1369 strip_suffix (arg_cl);
1370 else
1371 arg_cl = response.docInfo[0].OID;
1372 }
1373}
1374
1375// prepare_page sets up page parameters, sets display macros
1376// and opens the page ready for output
1377void receptionist::prepare_page (action *a, cgiargsclass &args,
1378 outconvertclass &outconvert,
1379 ostream &logout) {
1380 // set up page parameters
1381 text_t pageparams;
1382 bool first = true;
1383
1384 text_tmap::iterator params_here = configinfo.pageparams.begin();
1385 text_tmap::iterator params_end = configinfo.pageparams.end();
1386 while (params_here != params_end) {
1387 // page params are those from main.cfg (eg pageparam v 0) plus
1388 // two defaults set in recptconf.clear() (c="" and l=en)
1389 // This used to check if the current value of the page param
1390 // == the default value, then don't add in it the list
1391 // but if l=en, and there is a macro with [l=en], then it doesn't
1392 // find it.
1393 // so now all page params will go into the list. I assume this will
1394 // mean more attempts to find each macro, but nothing worsee than
1395 // that. --kjdon
1396 //if (args[(*params_here).first] != (*params_here).second) {
1397 if (first)
1398 first = false;
1399 else
1400 pageparams += ",";
1401
1402 pageparams += (*params_here).first;
1403 pageparams += "=";
1404 pageparams += args[(*params_here).first];
1405 // }
1406
1407 ++params_here;
1408 }
1409
1410
1411 // open the page
1412 disp.openpage(pageparams, configinfo.macroprecedence);
1413
1414 disp.unloadcollectionmacros();
1415
1416 text_t collection = args["c"];
1417 if (!collection.empty()) {
1418 read_collection_macrofiles(collection,logout);
1419 }
1420
1421 // define external macros for each action
1422 actionptrmap::iterator actionhere = actions.begin ();
1423 actionptrmap::iterator actionend = actions.end ();
1424
1425 while (actionhere != actionend) {
1426 assert ((*actionhere).second.a != NULL);
1427 if ((*actionhere).second.a != NULL) {
1428 (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1429 }
1430 ++actionhere;
1431 }
1432
1433
1434 // define internal macros for the current action
1435 a->define_internal_macros (disp, args, &protocols, logout);
1436
1437 // define general macros. the defining of general macros is done here so that
1438 // the last possible version of the cgi arguments are used
1439 define_general_macros (args, outconvert, logout);
1440}
1441
1442
1443void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1444 ostream &logout) {
1445
1446 text_t &collection = args["c"];
1447
1448 disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1449 disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1450 disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1451 disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1452
1453 // This perhaps should be done with gsdl_getenv() which takes the
1454 // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1455 // additional parameter would need to be passed into here (not sure how
1456 // that would effect any virtual inheritence going on), or else moved
1457 // higher up the calling to chain to, e.g., produce_cgi_page()
1458
1459 char* remote_addr = getenv("REMOTE_ADDR");
1460
1461 if (remote_addr != NULL) {
1462 text_t remote_addr_t(remote_addr);
1463 disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1464 }
1465
1466 char* remote_host = getenv("REMOTE_HOST");
1467 if (remote_host != NULL) {
1468 text_t remote_host_t(remote_host);
1469 disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1470 }
1471 else {
1472 // setting this to "unknown" is easier to deal with in format/macro
1473 // statements, rather than testing for _remoteHost_
1474 disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1475 }
1476
1477
1478 text_t compressedoptions = get_compressed_arg(args, logout);
1479 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1480 // need a decoded version of compressedoptions for use within forms
1481 // as browsers encode values from forms before sending to server
1482 // (e.g. %25 becomes %2525)
1483 decode_cgi_arg (compressedoptions);
1484 if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1485 // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1486 compressedoptions = to_uni(compressedoptions);
1487 }
1488
1489 text_t dmacrovalue = dm_safe(compressedoptions);
1490 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dmacrovalue);
1491 disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(dmacrovalue));
1492
1493#if defined (__WIN32__)
1494 disp.setmacro ("win32", displayclass::defaultpackage, "1");
1495#endif
1496
1497 // set _cgiargX_ macros for each cgi argument
1498 cgiargsclass::const_iterator argshere = args.begin();
1499 cgiargsclass::const_iterator argsend = args.end();
1500 while (argshere != argsend) {
1501
1502 text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp")
1503
1504 if (((*argshere).first == "q") ||
1505 ((*argshere).first == "qa") ||
1506 ((*argshere).first == "qtt") ||
1507 ((*argshere).first == "qty") ||
1508 ((*argshere).first == "qp") ||
1509 ((*argshere).first == "qpl") ||
1510 ((*argshere).first == "qr") ||
1511 ((*argshere).first == "q2")) {
1512
1513 // need to escape special characters from query string
1514 macrovalue = html_safe(macrovalue);
1515
1516 } else if ((*argshere).first == "hp") {
1517 if(!isValidURLProtocol(macrovalue)) {
1518 macrovalue = encodeForURL(macrovalue); // URL has invalid protocol like javascript:, so URL encode it
1519 }
1520 }
1521 else {
1522 macrovalue = dm_safe(macrovalue);
1523 }
1524
1525 // set the default value for the macro
1526 disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue);
1527
1528 // set macros for the encoded versions of the same value. Uses the functions in securitytools.h
1529 // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
1530
1531 text_t htmlsafe = encodeForHTML(macrovalue);
1532 text_t attrsafe = encodeForHTMLAttr(macrovalue);
1533 text_t urlsafe = encodeForURL(macrovalue);
1534 text_t jssafe = encodeForJavascript(macrovalue);
1535 text_t csssafe = encodeForCSS(macrovalue);
1536
1537 disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe);
1538 disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe);
1539 disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe);
1540 disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe);
1541 disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe);
1542
1543
1544 ++argshere;
1545 }
1546
1547 // set collection specific macros
1548 if (!collection.empty()) {
1549 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1550 if (collectproto != NULL) {
1551 FilterResponse_t response;
1552 text_tset metadata;
1553 get_info ("collection", collection, args["l"], metadata, false,
1554 collectproto, response, logout);
1555
1556 if (!response.docInfo[0].metadata.empty()) {
1557 MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1558 MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1559 while (here != end) {
1560 if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1561 ((*here).first != "hasprevious")) {
1562 // check for args in form name:lang
1563 text_t name = g_EmptyText;
1564 text_t lang = g_EmptyText;
1565 bool colonfound=false;
1566 text_t::const_iterator a = (*here).first.begin();
1567 text_t::const_iterator b = (*here).first.end();
1568 while (a !=b) {
1569 if (*a==':') {
1570 colonfound=true;
1571 }
1572 else {
1573 if (colonfound)
1574 lang.push_back(*a);
1575 else name.push_back(*a);
1576 }
1577 ++a;
1578 }
1579 if (!lang.empty()) {
1580 if (args["l"]==lang) {
1581 disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1582 }
1583 }
1584 else { // the default one
1585 disp.setcollectionmacro(displayclass::defaultpackage, (*here).first, "", (*here).second.values[0]);
1586 }
1587 }
1588 ++here;
1589 }
1590 }
1591
1592 text_t iconcollection;
1593 disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1594 if (!iconcollection.empty())
1595 {
1596 ColInfoResponse_t cinfo;
1597 comerror_t err;
1598 collectproto->get_collectinfo (collection, cinfo, err, logout);
1599 if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1600 {
1601 // local but with full path
1602 iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1603 disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1604 }
1605 }
1606 }
1607 }
1608
1609 if (!collection.empty()) {
1610 ColInfoResponse_t cinfo;
1611 comerror_t err;
1612 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1613 if (collectproto != NULL) {
1614 collectproto->get_collectinfo (collection, cinfo, err, logout);
1615
1616
1617 // This part of the code used to use "cinfo.httpprefix" regardless
1618 // of the value it contained. Since
1619 // this can come back with an empty (in the case of gsdl_mod), the
1620 // URL produced was invalid.
1621 //
1622 // Changed to test for empty first, and use configinfo.httpprefix as
1623 // a "backup"
1624 //
1625 // Point to consider: since configinfo.httpprefix has been offically
1626 // set as "httpprefix" in macros, it seems to make more sense to use
1627 // always use that version and not the cinfo version at all.
1628
1629 text_t httpprefix
1630 = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1631
1632 text_t httpcollection;
1633 if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1634 httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1635 + collection;
1636 disp.setmacro ("httpcollection", displayclass::defaultpackage,
1637 httpcollection);
1638
1639 // as of gsdl 2.53, collect.cfg can specify macros
1640 if (cinfo.collection_macros.size() > 0) {
1641 collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1642 collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1643 while (this_macro != done_macro) {
1644 text_t package = "Global";
1645 text_t macroname = this_macro->first;
1646 // if this macro name is AAA:bbb then extract the package name
1647 text_t::const_iterator thischar, donechar;
1648 thischar = macroname.begin();
1649 donechar = macroname.end();
1650 while (thischar < donechar) {
1651 if (*thischar == ':') {
1652 package = substr(macroname.begin(),thischar);
1653 macroname = substr(thischar+1,donechar);
1654 break;
1655 }
1656 ++thischar;
1657 }
1658
1659 text_tmap params_map = this_macro->second;
1660 text_tmap::const_iterator this_param = params_map.begin();
1661 text_tmap::const_iterator done_param = params_map.end();
1662 while (this_param != done_param) {
1663 disp.setcollectionmacro(package,
1664 macroname,
1665 this_param->first,
1666 this_param->second);
1667 ++this_param;
1668 }
1669
1670 ++this_macro;
1671 }
1672 } // col macros
1673 } // collectproto != NULL
1674 }
1675
1676}
1677
1678// gets collection info from cache if found or
1679// calls collection server (and updates cache)
1680// returns NULL if there's an error
1681ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1682 const text_t &collection,
1683 ostream &logout) {
1684
1685 // check the cache
1686 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1687 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1688 // found it
1689 return &((*it).second.info);
1690 }
1691
1692 // not cached, get info from collection server
1693 if (collectproto == NULL) {
1694 logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1695 return NULL;
1696 }
1697
1698 comerror_t err;
1699 if (it == configinfo.collectinfo.end()) {
1700 collectioninfo_t cinfo;
1701 collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1702 if (err != noError) {
1703 outconvertclass text_t2ascii;
1704 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1705 << get_comerror_string (err) << "\"while getting collectinfo\n";
1706 return NULL;
1707 }
1708 cinfo.info_loaded = true;
1709 configinfo.collectinfo[collection] = cinfo;
1710 return &(configinfo.collectinfo[collection].info);
1711 } else {
1712 collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1713 if (err != noError) {
1714 outconvertclass text_t2ascii;
1715 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1716 << get_comerror_string (err) << "\"while getting collectinfo\n";
1717 return NULL;
1718 }
1719 (*it).second.info_loaded = true;
1720 return &((*it).second.info);
1721 }
1722}
1723
1724// removes a collection from the cache so that the next
1725// call to get_collectinfo_ptr() for that collection will
1726// retrieve the collection info from the collection server
1727void receptionist::uncache_collection (const text_t &collection) {
1728
1729 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1730 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1731
1732 (*it).second.info_loaded = false;
1733
1734 }
1735}
1736
1737// Handles an "Encoding" line from a configuration file - note that the
1738// configinfo.encodings map is a bit of a hack (to be fixed when the
1739// configuration files are tidied up).
1740void receptionist::configure_encoding (const text_tarray &cfgline) {
1741
1742 text_t subkey, subvalue, shortname, longname, mapfile;
1743 int multibyte = 0;
1744 text_t::const_iterator cfglinesub_here;
1745 text_tarray::const_iterator cfgline_here = cfgline.begin();
1746 text_tarray::const_iterator cfgline_end = cfgline.end();
1747 while (cfgline_here != cfgline_end) {
1748 if (*cfgline_here == "multibyte") {
1749 multibyte = 1;
1750 } else {
1751 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1752 (*cfgline_here).end(), '=', subkey);
1753 if (subkey == "shortname") {
1754 shortname = substr (cfglinesub_here, (*cfgline_here).end());
1755 } else if (subkey == "longname") {
1756 longname = substr (cfglinesub_here, (*cfgline_here).end());
1757 } else if (subkey == "map") {
1758 mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1759 }
1760 }
1761 ++cfgline_here;
1762 }
1763 if (!shortname.empty()) {
1764 if (longname.empty()) longname = shortname;
1765
1766 // add the converter
1767 if (shortname == "utf-8") {
1768 utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1769 utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1770 utf8outconvert->set_rzws(1);
1771 add_converter (shortname, utf8inconvert, utf8outconvert);
1772 configinfo.encodings[longname] = shortname;
1773
1774 } else if (shortname == "utf-16be") {
1775 // we use the default input converter as this shouldn't ever be used
1776 // for converting from unicode...
1777 inconvertclass *inconverter = new inconvertclass();
1778 utf16outconvertclass *outconverter = new utf16outconvertclass();
1779 add_converter (shortname, inconverter, outconverter);
1780 configinfo.encodings[longname] = shortname;
1781
1782 } else if (!mapfile.empty()) {
1783
1784 if (mapfile == "8859_1.ump") {
1785 // iso-8859-1 is a special case as it'll always be supported by the
1786 // standard converter class and therefore doesn't need to use its
1787 // mapping file
1788 inconvertclass *inconvert = new inconvertclass();
1789 rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1790 outconvert->set_rzws(1);
1791 add_converter (shortname, inconvert, outconvert);
1792 configinfo.encodings[longname] = shortname;
1793
1794 } else {
1795 text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1796 text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1797 if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1798
1799 mapinconvertclass *mapinconvert = new mapinconvertclass();
1800 mapinconvert->setmapfile (to_uc_map, 0x003F);
1801 mapinconvert->set_multibyte (multibyte);
1802 mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1803 mapoutconvert->setmapfile (from_uc_map, 0x3F);
1804 mapoutconvert->set_multibyte (multibyte);
1805 mapoutconvert->set_rzws(1);
1806 add_converter (shortname, mapinconvert, mapoutconvert);
1807 configinfo.encodings[longname] = shortname;
1808 }
1809 }
1810 }
1811 }
1812}
Note: See TracBrowser for help on using the repository browser.