source: main/trunk/greenstone2/runtime-src/src/recpt/receptionist.cpp@ 39000

Last change on this file since 39000 was 39000, checked in by kjdon, 6 weeks ago

the usecookies option has been renamed to usecookiesForUID to better reflect what it means. A new option usecookiesForE is added. If this is set to true (the default), then the e arg will be saved as a cookie, instead of being set into the various compressedoptions macros (which will now be empty). nzdl.org is getting hammered by bots, and one theory is that the e arg changes everytime so looks like a new page when its not.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 60.6 KB
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT 1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include "securitytools.h"
39#include <assert.h>
40#include <time.h>
41#include <stdio.h> // for open()
42#include <fcntl.h> // for open() flags
43// following 2 are for printing Last-Modified http header.
44#include <sys/stat.h>
45#include <time.h>
46
47#if defined (GSDL_USE_IOS_H)
48#include <fstream.h>
49#else
50#include <fstream>
51#endif
52
53void recptconf::clear () {
54 gsdlhome.clear();
55 collecthome.clear();
56 dbhome.clear();
57 collectinfo.erase(collectinfo.begin(), collectinfo.end());
58 collection.clear();
59 collectdir.clear();
60 httpprefix.clear();
61 httpweb.clear();
62 gwcgi.clear();
63 macrofiles.erase(macrofiles.begin(), macrofiles.end());
64 saveconf.clear();
65 usecookiesForUID = false;
66 usecookiesForE = true;
67 logcgiargs = false;
68 LogDateFormat = LocalTime;
69
70 maintainer.clear();
71 MailServer.clear();
72 LogEvents = Disabled;
73 EmailEvents = Disabled;
74 EmailUserEvents = false;
75
76 languages.erase(languages.begin(), languages.end());
77 encodings.erase(encodings.begin(), encodings.end());
78
79 site_auth = false;
80 HomePageType = "images";
81 HomePageCols = 3;
82
83 // these default page parameters can always be overriden
84 // in the configuration file
85 pageparams.erase(pageparams.begin(), pageparams.end());
86 pageparams["c"] = "";
87 pageparams["l"] = "en";
88
89#ifdef MACROPRECEDENCE
90 macroprecedence = MACROPRECEDENCE;
91#else
92 macroprecedence.clear();
93#endif
94}
95
96
97void collectioninfo_t::clear () {
98 gsdl_gsdlhome.clear();
99 gsdl_dbhome.clear();
100
101 info_loaded = false;
102 info.clear();
103}
104
105void languageinfo_t::clear () {
106 longname.clear();
107 defaultencoding.clear();
108}
109
110receptionist::receptionist () {
111 // create a list of cgi arguments
112 // this must be done before the configuration
113
114 cgiarginfo ainfo;
115
116 ainfo.shortname = "e";
117 ainfo.longname = "compressed arguments";
118 ainfo.multiplechar = true;
119 ainfo.defaultstatus = cgiarginfo::good;
120 ainfo.argdefault = g_EmptyText;
121 ainfo.savedarginfo = cgiarginfo::mustnot;
122 argsinfo.addarginfo (NULL, ainfo);
123
124 ainfo.shortname = "a";
125 ainfo.longname = "action";
126 ainfo.multiplechar = true;
127 ainfo.defaultstatus = cgiarginfo::none;
128 ainfo.argdefault = g_EmptyText;
129 ainfo.savedarginfo = cgiarginfo::must;
130 argsinfo.addarginfo (NULL, ainfo);
131
132 // w=western
133 ainfo.shortname = "w";
134 ainfo.longname = "encoding";
135 ainfo.multiplechar = true;
136 ainfo.defaultstatus = cgiarginfo::none;
137 ainfo.argdefault = g_EmptyText;
138 ainfo.savedarginfo = cgiarginfo::must;
139 argsinfo.addarginfo (NULL, ainfo);
140
141 ainfo.shortname = "nw";
142 ainfo.longname = "new encoding";
143 ainfo.multiplechar = true;
144 ainfo.defaultstatus = cgiarginfo::none;
145 ainfo.argdefault = g_EmptyText;
146 ainfo.savedarginfo = cgiarginfo::mustnot;
147 argsinfo.addarginfo (NULL, ainfo);
148
149 ainfo.shortname = "c";
150 ainfo.longname = "collection";
151 ainfo.multiplechar = true;
152 ainfo.defaultstatus = cgiarginfo::none;
153 ainfo.argdefault = g_EmptyText;
154 ainfo.savedarginfo = cgiarginfo::must;
155 argsinfo.addarginfo (NULL, ainfo);
156
157 // the interface language name should use the ISO 639
158 // standard
159 ainfo.shortname = "l";
160 ainfo.longname = "interface language";
161 ainfo.multiplechar = true;
162 ainfo.defaultstatus = cgiarginfo::weak;
163 ainfo.argdefault = "en";
164 ainfo.savedarginfo = cgiarginfo::must;
165 argsinfo.addarginfo (NULL, ainfo);
166
167 ainfo.shortname = "nl";
168 ainfo.longname = "new language";
169 ainfo.multiplechar = false;
170 ainfo.defaultstatus = cgiarginfo::none;
171 ainfo.argdefault = "0";
172 ainfo.savedarginfo = cgiarginfo::mustnot;
173 argsinfo.addarginfo (NULL, ainfo);
174
175 // the GSDL_UID (cookie)
176 ainfo.shortname = "z";
177 ainfo.longname = "gsdl uid";
178 ainfo.multiplechar = true;
179 ainfo.defaultstatus = cgiarginfo::none;
180 ainfo.argdefault = g_EmptyText;
181 ainfo.savedarginfo = cgiarginfo::mustnot;
182 argsinfo.addarginfo (NULL, ainfo);
183}
184
185
186void receptionist::add_action (action *theaction) {
187 // make sure we have an action to add
188 if (theaction == NULL) return;
189
190 // add this action to the list of actions
191 actions.addaction(theaction);
192
193 // add the cgi arguments from this action
194 argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
195}
196
197
198void receptionist::add_browser (browserclass *thebrowser) {
199 // make sure we have a browser to add
200 if (thebrowser == NULL) return;
201
202 // add this browser to the list of browsers
203 browsers.addbrowser(thebrowser);
204}
205
206
207void receptionist::setdefaultbrowser (const text_t &browsername) {
208 browsers.setdefaultbrowser (browsername);
209}
210
211
212// configure should be called for each line in the
213// configuration files to configure the receptionist and everything
214// it contains. The configuration should take place after everything
215// has been added but before the initialisation.
216
217void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
218 // configure the receptionist
219
220
221
222 if (cfgline.size() >= 1) {
223 cgiarginfo *info = NULL;
224 if (key == "gsdlhome") {
225 configinfo.gsdlhome = cfgline[0];
226 if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
227 }
228 else if (key == "collecthome") configinfo.collecthome = cfgline[0];
229 else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
230 else if (key == "collection") {
231 configinfo.collection = cfgline[0];
232 // also need to set the default arg to this collection
233 if ((info = argsinfo.getarginfo("c")) != NULL) {
234 info->defaultstatus = cgiarginfo::good;
235 info->argdefault = cfgline[0];
236 }
237
238 }
239 else if (key == "collectdir") configinfo.collectdir = cfgline[0];
240 else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
241 else if (key == "httpweb") configinfo.httpweb = cfgline[0];
242 else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
243 else if (key == "macrofiles") {
244 // want to append to macrofiles (i.e. may be several config files
245 // contributing, maybe from several collections).
246 text_tarray::const_iterator here = cfgline.begin();
247 text_tarray::const_iterator end = cfgline.end();
248 while (here != end) {
249 configinfo.macrofiles.insert (*here);
250 ++here;
251 }
252 }
253 else if (key == "saveconf") configinfo.saveconf = cfgline[0];
254 // keep usecookies here for backwards compatibility
255 else if (key == "usecookies" || key == "usecookiesForUID") configinfo.usecookiesForUID = (cfgline[0] == "true");
256 else if (key == "usecookiesForE") configinfo.usecookiesForE = (cfgline[0] == "true");
257 else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
258 else if (key == "maintainer") configinfo.maintainer = cfgline[0];
259 else if (key == "MailServer") configinfo.MailServer = cfgline[0];
260 else if (key == "LogDateFormat") {
261 if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
262 else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
263 }
264 else if (key == "LogEvents") {
265 if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
266 else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
267 }
268 else if (key == "EmailEvents") {
269 if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
270 else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
271 }
272 else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
273 else if (key == "pageparam") {
274 if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
275 else configinfo.pageparams[cfgline[0]] = "";
276 }
277 else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
278 else if (key == "collectinfo") {
279 if (cfgline.size() == 3) {
280 // for backwards compatability with older collections that only use
281 // gsdlhome and dbhome
282 collectioninfo_t cinfo;
283 cinfo.gsdl_gsdlhome = cfgline[1];
284 cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
285 cinfo.gsdl_dbhome = cfgline[2];
286 configinfo.collectinfo[cfgline[0]] = cinfo;
287 }
288 else if (cfgline.size() >= 4) {
289 collectioninfo_t cinfo;
290 cinfo.gsdl_gsdlhome = cfgline[1];
291 cinfo.gsdl_collecthome = cfgline[2];
292 cinfo.gsdl_dbhome = cfgline[3];
293 configinfo.collectinfo[cfgline[0]] = cinfo;
294 }
295 }
296
297 // Read in the value for the site_auth directive either true or false
298 else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
299
300 else if (key == "site_group")
301 joinchar(cfgline,',',configinfo.site_group);
302
303 else if (key == "SiteFormat") {
304 if (cfgline[0] == "HomePageType") {
305 configinfo.HomePageType = cfgline[1];
306 } else if (cfgline[0] == "HomePageCols") {
307 configinfo.HomePageCols = cfgline[1].getint();
308 }
309 }
310
311 else if (key == "cgiarg") {
312 // get shortname
313 bool seen_defaultstatus = false;
314 text_t subkey, subvalue;
315 text_t shortname;
316 text_t::const_iterator cfglinesub_here;
317 text_tarray::const_iterator cfgline_here = cfgline.begin();
318 text_tarray::const_iterator cfgline_end = cfgline.end();
319 while (cfgline_here != cfgline_end) {
320 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
321 (*cfgline_here).end(), '=', subkey);
322 if (subkey == "shortname") {
323 shortname = substr (cfglinesub_here, (*cfgline_here).end());
324 }
325 ++cfgline_here;
326 }
327
328 // if we found the shortname process the line again filling in values
329 if (!shortname.empty()) {
330 cgiarginfo &chinfo = argsinfo[shortname];
331 chinfo.shortname = shortname; // in case this is a new argument
332
333 cfgline_here = cfgline.begin();
334 while (cfgline_here != cfgline_end) {
335 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
336 (*cfgline_here).end(), '=', subkey);
337 subvalue = substr (cfglinesub_here, (*cfgline_here).end());
338
339 if (subkey == "longname") chinfo.longname = subvalue;
340 else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
341 else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
342 else if (subkey == "defaultstatus") {
343 seen_defaultstatus = true;
344 if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
345 else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
346 else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
347 else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
348 else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
349 }
350 else if (subkey == "argdefault") {
351 chinfo.argdefault = subvalue;
352 if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
353 }
354 else if (subkey == "savedarginfo") {
355 if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
356 else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
357 else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
358 }
359
360 ++cfgline_here;
361 }
362 }
363
364 } else if (key == "Encoding") {
365
366 configure_encoding (cfgline);
367
368 } else if (key == "Language") {
369 text_t subkey, subvalue, shortname;
370 languageinfo_t lang;
371 text_t::const_iterator cfglinesub_here;
372 text_tarray::const_iterator cfgline_here = cfgline.begin();
373 text_tarray::const_iterator cfgline_end = cfgline.end();
374 while (cfgline_here != cfgline_end) {
375 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
376 (*cfgline_here).end(), '=', subkey);
377 if (subkey == "shortname") {
378 shortname = substr (cfglinesub_here, (*cfgline_here).end());
379 } else if (subkey == "longname") {
380 lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
381 } else if (subkey == "default_encoding") {
382 lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
383 }
384 ++cfgline_here;
385 }
386 if (!shortname.empty()) {
387 if (lang.longname.empty()) lang.longname = shortname;
388 configinfo.languages[shortname] = lang;
389 }
390 }
391 }
392
393 // configure the actions
394 actionptrmap::iterator actionhere = actions.begin ();
395 actionptrmap::iterator actionend = actions.end ();
396
397 while (actionhere != actionend) {
398 assert ((*actionhere).second.a != NULL);
399 if ((*actionhere).second.a != NULL)
400 (*actionhere).second.a->configure(key, cfgline);
401
402 ++actionhere;
403 }
404
405 // configure the protocols
406 recptprotolistclass::iterator protohere = protocols.begin ();
407 recptprotolistclass::iterator protoend = protocols.end ();
408
409 while (protohere != protoend) {
410 assert ((*protohere).p != NULL);
411 comerror_t err;
412 if ((*protohere).p != NULL)
413 (*protohere).p->configure(key, cfgline, err);
414
415 ++protohere;
416 }
417
418 // configure the browsers
419 browserptrmap::iterator browserhere = browsers.begin ();
420 browserptrmap::iterator browserend = browsers.end ();
421
422 while (browserhere != browserend) {
423 assert ((*browserhere).second.b != NULL);
424 if ((*browserhere).second.b != NULL)
425 (*browserhere).second.b->configure(key, cfgline);
426
427 ++browserhere;
428 }
429}
430
431
432void receptionist::configure (const text_t &key, const text_t &value) {
433 text_tarray cfgline;
434 cfgline.push_back (value);
435 configure(key, cfgline);
436}
437
438
439// init should be called after all the actions and protocols have been
440// added to the receptionist and after everything has been configured but
441// before any pages are created. It returns true on success and false on
442// failure. If false is returned getpage should not be called (without
443// producing meaningless output), instead an error page should be produced
444// by the calling code.
445bool receptionist::init (ostream &logout) {
446
447 // first configure collectdir
448 if (!configinfo.collection.empty()) {
449
450 // collection specific mode
451
452 text_t collectdir = configinfo.gsdlhome;
453
454 if (!configinfo.collectdir.empty()) {
455 // has already been configured
456 collectdir = configinfo.collectdir;
457 } else {
458
459 // decide where collectdir is by searching for collect.cfg
460 // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
461 // then $GSDLHOME/etc/collect.cfg
462 collectdir = filename_cat (configinfo.gsdlhome, "collect");
463 collectdir = filename_cat (collectdir, configinfo.collection);
464 text_t filename = filename_cat (collectdir, "etc");
465 filename = filename_cat (filename, "collect.cfg");
466
467 if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
468 }
469
470 configure("collectdir", collectdir);
471
472 }
473 else {
474
475 text_t collecthome;
476 if (configinfo.collecthome.empty()) {
477 collecthome = filename_cat(configinfo.gsdlhome,"collect");
478 }
479 else {
480 collecthome = configinfo.collecthome;
481 }
482
483 configure("collecthome", collecthome);
484
485 // for backwards compatability collectdir set to gsdlhome
486 // (possible it could now be removed)
487 configure("collectdir", configinfo.gsdlhome);
488 }
489
490
491 // read in the macro files
492 if (!read_macrofiles (logout)) return false;
493
494 // there must be at least one action defined
495 if (actions.empty()) {
496 logout << "Error: no actions have been added to the receptionist\n";
497 return false;
498 }
499
500 // there must be at least one browser defined
501 if (browsers.empty()) {
502 logout << "Error: no browsers have been added to the receptionist\n";
503 return false;
504 }
505
506 // create a saveconf string if there isn't one already
507 if (configinfo.saveconf.empty())
508 configinfo.saveconf = create_save_conf_str (argsinfo, logout);
509
510 // check the saveconf string
511 if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
512 return false;
513
514 // set a random seed
515 srand (time(NULL));
516
517 // if maintainer email address is something dodgy (for now I'll define
518 // dodgy as being anything that doesn't contain '@') disable EmailEvents
519 // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
520 // in this case but we will as it seems likely that MailServer will also
521 // be screwed up if maintainer is).
522 text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
523 text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
524 maintainer_end, '@');
525 if (maintainer_here == maintainer_end) {
526 configinfo.EmailEvents = Disabled;
527 configinfo.EmailUserEvents = Disabled;
528 } else {
529 // if MailServer isn't set it should default to mail.maintainer-domain
530 if (configinfo.MailServer.empty()) {
531 configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
532 }
533 }
534
535 // init the actions
536 actionptrmap::iterator actionhere = actions.begin ();
537 actionptrmap::iterator actionend = actions.end ();
538 while (actionhere != actionend) {
539 if (((*actionhere).second.a == NULL) ||
540 !(*actionhere).second.a->init(logout)) return false;
541 ++actionhere;
542 }
543
544 // init the protocols
545 recptprotolistclass::iterator protohere = protocols.begin ();
546 recptprotolistclass::iterator protoend = protocols.end ();
547 while (protohere != protoend) {
548 comerror_t err;
549 if (((*protohere).p == NULL) ||
550 !(*protohere).p->init(err, logout)) return false;
551 ++protohere;
552 }
553
554 // init the browsers
555 browserptrmap::iterator browserhere = browsers.begin ();
556 browserptrmap::iterator browserend = browsers.end ();
557 while (browserhere != browserend) {
558 if (((*browserhere).second.b == NULL) ||
559 !(*browserhere).second.b->init(logout)) return false;
560 ++browserhere;
561 }
562
563 return true;
564}
565
566// get the default encoding for the given language - if it fails for any
567// reason return ""
568text_t receptionist::get_default_encoding (const text_t &language) {
569
570 // make sure language is valid
571 if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
572
573 text_t default_encoding = configinfo.languages[language].defaultencoding;
574
575 // make sure the encoding is valid
576 if (converters.find(default_encoding) == converters.end()) {
577 // we don't support the encoding specified as default for this language
578 if (configinfo.encodings.size()==1) {
579 // only 1 encoding specified in main.cfg, so use it
580 return configinfo.encodings.begin()->second;
581 }
582 return "";
583 }
584
585 return default_encoding;
586}
587
588// parse_cgi_args parses cgi arguments into an argument class.
589// This function should be called for each page request. It returns false
590// if there was a major problem with the cgi arguments.
591bool receptionist::parse_cgi_args (const text_t &argstr,
592 fileupload_tmap &fileuploads,
593 cgiargsclass &args,
594 ostream &logout, text_tmap &fcgienv) {
595
596 // get an initial list of cgi arguments
597 args.clear();
598 split_cgi_args (argsinfo, argstr, args);
599
600 // get e cookie
601 if (configinfo.usecookiesForE) get_named_cookie(args["e"], "GSDL_E", fcgienv);
602 // expand the compressed argument (if there was one)
603 if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
604
605 // add the defaults
606 add_default_args (argsinfo, args, logout);
607
608 // add any file upload arguments
609 add_fileupload_args(argsinfo, args, fileuploads, logout);
610
611 // get the cookie
612 if (configinfo.usecookiesForUID) get_uid_cookie(args["z"], fcgienv);
613
614 // if we're changing languages, set the encoding to the default for the new language
615 if (args["nl"] == "1") {
616 args["nw"] = get_default_encoding(args["l"]);
617 }
618
619 // get the input encoding
620 // if encoding isn't set, set it to the default for the current language
621 if ((args.getarg("w") == NULL) || args["w"].empty()) {
622 args["w"] = get_default_encoding(args["l"]);
623 }
624
625 text_t &arg_w = args["w"];
626
627 inconvertclass defaultinconvert;
628 inconvertclass *inconvert = converters.get_inconverter (arg_w);
629 if (inconvert == NULL) inconvert = &defaultinconvert;
630
631 // see if the next page will have a different encoding
632 if (args.getarg("nw") != NULL) arg_w = args["nw"];
633
634 // convert arguments which aren't in unicode to unicode
635 args_tounicode (args, *inconvert);
636
637 // decide on the output conversion class (needed for checking the external
638 // cgi arguments)
639 rzwsoutconvertclass defaultoutconverter;
640 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
641 if (outconverter == NULL) outconverter = &defaultoutconverter;
642 outconverter->reset();
643
644 // check the main cgi arguments
645 if (!check_mainargs (args, logout)) return false;
646
647 // check the arguments for the action
648 action *a = actions.getaction (args["a"]);
649 if (a != NULL) {
650 if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
651 } else {
652 // the action was not found!!
653 outconvertclass text_t2ascii;
654 logout << text_t2ascii << "Error: the action \"" << args["a"]
655 << "\" could not be found.\n";
656 return false;
657 }
658
659 // check external cgi arguments for each action
660 actionptrmap::iterator actionhere = actions.begin ();
661 actionptrmap::iterator actionend = actions.end ();
662 while (actionhere != actionend) {
663 assert ((*actionhere).second.a != NULL);
664 if ((*actionhere).second.a != NULL) {
665 if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
666 configinfo.saveconf, logout))
667 return false;
668 }
669 ++actionhere;
670 }
671
672 // the action might have changed but we will assume that
673 // the cgiargs were checked properly when the change was made
674
675 return true;
676}
677
678
679// Returns true if cookie exists, false if not
680bool receptionist::get_named_cookie (text_t &cookie, text_t cookie_name, text_tmap &fcgienv)
681{
682
683 // See if we can get the cookies
684 text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
685 if (cookiestring.empty()) return false;
686 // This should really be handled by the findword function...
687
688 // Check if the cookie contains 'cookie_name'
689 text_t cid = cookie_name+"=";
690 text_t::iterator cid_start = findword(cookiestring.begin(), cookiestring.end(), cid);
691 if (cid_start != cookiestring.end())
692 {
693 // Yes, so extract its value
694 cookie = substr(cid_start + cid.size(), findchar(cid_start + cid.size(), cookiestring.end(), ';'));
695 return true;
696 }
697 return false;
698}
699
700// Returns true if cookie already existed, false if it was generated
701bool receptionist::get_uid_cookie (text_t &cookie, text_tmap &fcgienv)
702{
703 // See if we can get the GSDL_UID cookie
704 bool found = get_named_cookie(cookie, "GSDL_UID", fcgienv);
705 if (found) return true;
706 // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
707 cookie.clear();
708 text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
709 time_t ttime = time(NULL);
710 if (!host.empty())
711 {
712 cookie += host;
713 cookie.push_back ('-');
714 }
715 cookie += text_t(ttime);
716
717 return false;
718}
719
720
721// Same as above but just tests if cookie exists
722bool receptionist::has_uid_cookie (text_tmap &fcgienv)
723{
724 text_t cookie_jar = "";
725 return get_named_cookie(cookie_jar, "GSDL_UID", fcgienv);
726
727}
728
729
730bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
731
732 // see if we want to log the cgi arguments
733 if (!configinfo.logcgiargs) return true;
734
735 text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
736 text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
737 if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
738 text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
739
740 cgiargsclass::const_iterator args_here = args.begin();
741 cgiargsclass::const_iterator args_end = args.end();
742
743 text_t argstr;
744 bool first = true;
745 while (args_here != args_end) {
746 if (!first) argstr += ", ";
747 argstr += (*args_here).first + "=" + (*args_here).second.value;
748 first = false;
749 ++args_here;
750 }
751
752 text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
753
754 text_t logstr = script_name;
755 logstr += " " + host;
756 logstr += " [";
757 if (configinfo.LogDateFormat == UTCTime) {
758 logstr += get_date (false);
759 } else if (configinfo.LogDateFormat == Absolute) {
760 time_t ttime = time(NULL);
761 logstr += ttime;
762 } else {
763 // LocalTime
764 logstr += get_date (true);
765 }
766 logstr += "] (" + argstr + ") \"";
767 logstr += browser;
768 logstr += "\"\n";
769
770 return append_logstr (logfile, logstr, logout);
771}
772
773bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
774 ostream &logout) {
775
776 char *lfile = filename.getcstr();
777
778 int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777);
779 //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777);
780
781 if (fd == -1) {
782 logout << "Error: Couldn't open file " << lfile << "\n";
783 delete []lfile;
784 return false;
785 }
786
787 // lock_val is set to 0 if file is locked successfully
788 int lock_val = 1;
789 GSDL_LOCK_FILE (fd);
790 if (lock_val == 0) {
791 // Write the string out in UTF-8
792 text_t tmp_log_str_utf8 = to_utf8(logstr);
793 char *buffer = tmp_log_str_utf8.getcstr();
794 size_t num_chars = tmp_log_str_utf8.size();
795 write(fd, buffer, num_chars);
796 GSDL_UNLOCK_FILE (fd);
797 delete []buffer;
798 } else {
799 logout << "Error: Couldn't lock file " << lfile << "\n";
800 close(fd);
801 delete []lfile;
802 return false;
803 }
804
805 close(fd);
806
807 delete []lfile;
808 return true;
809}
810
811text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
812 ostream &logout) {
813 text_t outstring;
814 outconvertclass text_t2ascii;
815
816 action *a = actions.getaction (args["a"]);
817 if (a != NULL)
818 {
819 prepare_page (a, args, text_t2ascii, logout);
820 }
821 disp.expandstring (displayclass::defaultpackage, astring, outstring);
822 return outstring;
823}
824
825// produce_cgi_page will call get_cgihead_info and
826// produce_content in the appropriate way to output a cgi header and
827// the page content (if needed). If a page could not be created it
828// will return false
829bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
830 ostream &logout, text_tmap &fcgienv) {
831 outconvertclass text_t2ascii;
832
833 response_t response;
834 text_t response_data;
835
836 // produce cgi header
837 get_cgihead_info (args, response, response_data, logout, fcgienv);
838 if (response == location) {
839 // location response (url may contain macros!!)
840 response_data = expandmacros (response_data, args, logout);
841
842 contentout << text_t2ascii << "Location: " << response_data << "\n\n";
843 contentout << flush;
844
845 return true;
846 } else if (response == content) {
847 // content response
848
849#ifdef GSDL_NOCACHE
850 contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
851 tm *tm_ptr = NULL;
852 time_t t = time(NULL);
853 tm_ptr = gmtime (&t);
854 if (tm_ptr != NULL) {
855 char *timestr = new char[128];
856 strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
857 contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
858 delete []timestr;
859 }
860 contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
861 contentout << "Pragma: no-cache\n"; // HTTP/1.0
862
863#else
864
865 // use the later of build.cfg and collect.cfg modification times
866 // as the Last-Modified: header, for caching values
867 struct stat file_info;
868 time_t latest=0;
869
870 text_t collectname="";
871 collectname=args["c"];
872 if (collectname != "") {
873
874 text_t collecthome;
875 if (!configinfo.collecthome.empty()) {
876 collecthome = configinfo.collecthome;
877 }
878 else {
879 collecthome=filename_cat(configinfo.gsdlhome,"collect");
880 }
881 text_t collectdir=filename_cat(collecthome,collectname);
882
883 text_t buildcfg=filename_cat(collectdir,"index");
884 buildcfg=filename_cat(buildcfg,"build.cfg");
885 char *buildcfg_ptr=buildcfg.getcstr();
886 text_t collectcfg=filename_cat(collectdir,"etc");
887 collectcfg=filename_cat(collectcfg,"collect.cfg");
888 char *collectcfg_ptr=collectcfg.getcstr();
889
890 if (stat(buildcfg_ptr, &file_info)) {
891 // we got an error. Currently don't handle error :(
892 // logout <<
893 } else {
894 latest=file_info.st_mtime;
895 }
896
897 if (stat(collectcfg_ptr, &file_info)) {
898 // error - unhandled for now
899 } else {
900 if (latest<file_info.st_mtime) latest=file_info.st_mtime;
901 }
902 delete []buildcfg_ptr;
903 delete []collectcfg_ptr;
904
905 if (latest>0) {
906 // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
907 // c library takes care of mem for this string... (has \n at end!!!!)
908 // latest is currently local time, convert to UTC.
909 struct tm* utc_latest;
910 utc_latest=gmtime(&latest);
911 contentout << "Last-Modified: " << asctime(utc_latest);
912 }
913 } // end of collection != ""
914
915#endif
916
917 contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
918 }
919 else if (response == undecided_location) {
920 // Wait until later to output the target location
921 // Used for the "I'm feeling lucky" functionality
922 }
923 else {
924 // unknown response
925 logout << "Error: get_cgihead_info returned an unknown response type.\n";
926 return false;
927 }
928
929 // produce cgi page
930 if (!produce_content (args, contentout, logout)) return false;
931
932 // flush contentout
933 contentout << flush;
934 return true;
935}
936
937
938// get_cgihead_info determines the cgi header information for
939// a set of cgi arguments. If response contains location then
940// response_data contains the redirect address. If reponse
941// contains content then reponse_data contains the content-type.
942// Note that images can now be produced by the receptionist.
943// Note also, alternative for get_cgihead_info below which
944// stores the information in a text_tmap so it is more easily digested
945
946void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
947 text_t &response_data, ostream &logout,
948 text_tmap &fcgienv) {
949 outconvertclass text_t2ascii;
950
951 // get the action
952 action *a = actions.getaction (args["a"]);
953 if (a != NULL) {
954 a->get_cgihead_info (args, &protocols, response, response_data, logout);
955
956 } else {
957 // the action was not found!!
958 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
959 << args["a"] << "\" could not be found.\n";
960 response = content;
961 response_data = "text/html";
962 }
963
964 // add the encoding information
965 if (response == content) {
966 if (converters.find(args["w"]) != converters.end()) {
967 response_data += "; charset=" + args["w"];
968 } else {
969 // default to latin 1
970 response_data += "; charset=ISO-8859-1";
971 }
972
973 // add cookie if required
974 if (configinfo.usecookiesForUID && !has_uid_cookie(fcgienv))
975 response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
976 + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
977 if (configinfo.usecookiesForE) {
978 // set the GSDL_E cookie
979 response_data += "\nSet-Cookie: GSDL_E=" + get_compressed_arg (args, logout)
980 + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
981 }
982
983 }
984}
985
986
987// Alternative version of get_cgihead_info, stores fielded infomation
988// in text_tmap rather than concatenated string
989void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
990 ostream &logout, text_tmap &fcgienv) {
991
992 response_t response;
993 text_t response_data;
994
995 // get the action
996 action *a = actions.getaction (args["a"]);
997 if (a != NULL) {
998 a->get_cgihead_info (args, &protocols, response, response_data, logout);
999
1000 } else {
1001 // the action was not found!!
1002 outconvertclass text_t2ascii;
1003 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
1004 << args["a"] << "\" could not be found.\n";
1005 response = content;
1006 response_data = "text/html";
1007 }
1008
1009 if (response == location) {
1010 response_data = expandmacros(response_data, args, logout);
1011 headers["Location"] = response_data;
1012 return;
1013 }
1014
1015 // add the encoding information
1016 if (response == content) {
1017
1018 if (converters.find(args["w"]) != converters.end()) {
1019 //headers["content-encoding"] = args["w"];
1020 response_data += "; charset=" + args["w"];
1021 } else {
1022 // default to utf-8
1023 //headers["content-encoding"] = "utf-8";
1024 response_data += "; charset=utf-8";
1025 }
1026
1027 headers["content-type"] = response_data;
1028
1029 }
1030
1031}
1032
1033
1034
1035// produce the page content
1036bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1037 ostream &logout) {
1038
1039 // decide on the output conversion class
1040 text_t &arg_w = args["w"];
1041 rzwsoutconvertclass defaultoutconverter;
1042 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1043 if (outconverter == NULL) outconverter = &defaultoutconverter;
1044 outconverter->reset();
1045
1046 // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1047 if (arg_w=="utf-16be") {
1048 contentout << '\xfe' << '\xff' ;
1049 }
1050
1051 recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1052 if (collectproto != NULL) {
1053 // get browsers to process OID
1054 text_t OID = args["d"];
1055 if (OID.empty()) OID = args["cl"];
1056 if (!OID.empty()) {
1057 text_tset metadata;
1058 text_tarray OIDs;
1059 OIDs.push_back (OID);
1060 if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1061 FilterResponse_t response;
1062 metadata.insert ("childtype");
1063 if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1064 text_t classifytype;
1065 if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1066 classifytype = response.docInfo[0].metadata["childtype"].values[0];
1067 else if (!is_top (OID)) {
1068 // not sure why this is occasionally not set, but it will
1069 // cause a segfault... possibly if built with no_text? jrm21
1070 if (response.docInfo[1].metadata.find("childtype")
1071 == response.docInfo[1].metadata.end()) {
1072 cerr << "receptionist: no childtype element in metadata map!"
1073 << endl;
1074 } else {
1075 if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1076 classifytype = response.docInfo[1].metadata["childtype"].values[0];
1077 }
1078 }
1079 browserclass *b = browsers.getbrowser (classifytype);
1080 b->processOID (args, collectproto, logout);
1081 }
1082 }
1083
1084 // translate "d" and "cl" arguments if required
1085 translate_OIDs (args, collectproto, logout);
1086 }
1087
1088 // produce the page using the desired action
1089 action *a = actions.getaction (args["a"]);
1090 if (a != NULL) {
1091 if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1092 if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1093 return false;
1094 } else {
1095 // the action was not found!!
1096 outconvertclass text_t2ascii;
1097
1098 logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1099 << args["a"] << "\" could not be found.\n";
1100
1101 contentout << (*outconverter)
1102 << "<html>\n"
1103 << "<head>\n"
1104 << "<title>Error</title>\n"
1105 << "</head>\n"
1106 << "<body>\n"
1107 << "<h2>Oops!</h2>\n"
1108 << "Undefined Page. The action \""
1109 << args["a"] << "\" could not be found.\n"
1110 << "</body>\n"
1111 << "</html>\n";
1112 }
1113 return true;
1114}
1115
1116
1117// returns the compressed argument ("e") corresponding to the argument
1118// list. This can be used to save preferences between sessions.
1119text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1120 // decide on the output conversion class
1121 text_t &arg_w = args["w"];
1122 rzwsoutconvertclass defaultoutconverter;
1123 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1124 if (outconverter == NULL) outconverter = &defaultoutconverter;
1125 outconverter->reset();
1126
1127 text_t compressed_args;
1128
1129 if (compress_save_args (argsinfo, configinfo.saveconf, args,
1130 compressed_args, *outconverter, logout))
1131 return compressed_args;
1132
1133 return g_EmptyText;
1134}
1135
1136
1137// will read in all the macro files. If one is not found an
1138// error message will be written to logout and the method will
1139// return false.
1140bool receptionist::read_macrofiles (ostream &logout) {
1141 outconvertclass text_t2ascii;
1142
1143 // redirect the error output to logout
1144 ostream *savedlogout = disp.setlogout (&logout);
1145
1146 // unload any macros that were previously loaded - this allows us to call
1147 // this function a second time to reload all the macro files (useful for
1148 // reading in changed macro files in server versions of greenstone)
1149 disp.unloaddefaultmacros();
1150
1151 // load up the default macro files, the collection directory
1152 // is searched first for the file (if this is being used in
1153 // collection specific mode) and then the main directory(s)
1154 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1155
1156 text_tset maindirs;
1157 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1158 maindirs.insert (gsdlmacrodir);
1159 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1160 colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1161 while (colhere != colend) {
1162 if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1163 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1164 maindirs.insert (gsdlmacrodir);
1165 }
1166 ++colhere;
1167 }
1168
1169 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1170 text_tset::iterator arrend = configinfo.macrofiles.end();
1171 text_t filename;
1172 while (arrhere != arrend) {
1173 bool foundfile = false;
1174
1175 // try in the collection directory if this is being
1176 // run in collection specific mode
1177 if (!configinfo.collection.empty()) {
1178 filename = filename_cat (colmacrodir, *arrhere);
1179 if (file_exists (filename)) {
1180 disp.loaddefaultmacros(filename);
1181 foundfile = true;
1182 }
1183 }
1184
1185 // if we haven't found the macro file yet try in
1186 // the main macro directory(s)
1187 // if file is found in more than one main directory
1188 // we'll load all copies
1189 if (!foundfile) {
1190 text_tset::const_iterator dirhere = maindirs.begin();
1191 text_tset::const_iterator dirend = maindirs.end();
1192 while (dirhere != dirend) {
1193 filename = filename_cat (*dirhere, *arrhere);
1194 if (file_exists (filename)) {
1195 disp.loaddefaultmacros(filename);
1196 foundfile = true;
1197 }
1198 ++dirhere;
1199 }
1200 }
1201
1202 // see if we found the file or not
1203 if (!foundfile) {
1204 logout << text_t2ascii
1205 << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1206 if (configinfo.collection.empty()) {
1207 text_t dirs;
1208 joinchar (maindirs, ", ", dirs);
1209 logout << text_t2ascii
1210 << "It should be in either of the following directories ("
1211 << dirs << ").\n\n";
1212
1213 } else {
1214 logout << text_t2ascii
1215 << "It should be in either " << colmacrodir << " or in "
1216 << gsdlmacrodir << ".\n\n";
1217 }
1218 // don't crap out if a macro file is missing
1219 //disp.setlogout (savedlogout);
1220 //return false;
1221 }
1222 ++arrhere;
1223 }
1224
1225 // success
1226
1227 // reset logout to what it was
1228 disp.setlogout (savedlogout);
1229 return true;
1230}
1231
1232
1233
1234
1235// Go through the list of macro files looking to see
1236// if any exist in the collectoin specific area. If they
1237// do then read them in and add them to the set of existing
1238// current macros
1239
1240void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1241{
1242 outconvertclass text_t2ascii;
1243
1244 // disp.unloadcollectionmacros();
1245
1246 // redirect the error output to logout
1247 ostream *savedlogout = disp.setlogout (&logout);
1248
1249 text_t colmacrodir
1250 = filename_cat (configinfo.collecthome,collection, "macros");
1251
1252 if (directory_exists (colmacrodir)) {
1253
1254 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1255 text_tset::iterator arrend = configinfo.macrofiles.end();
1256 text_t filename;
1257 while (arrhere != arrend) {
1258
1259 filename = filename_cat (colmacrodir, *arrhere);
1260 if (file_exists (filename)) {
1261 disp.loadcollectionmacros(filename);
1262 }
1263
1264 ++arrhere;
1265 }
1266 }
1267
1268 // reset logout to what it was
1269 disp.setlogout (savedlogout);
1270}
1271
1272
1273
1274
1275// check_mainargs will check all the main arguments. If a major
1276// error is found it will return false and no cgi page should
1277// be created using the arguments.
1278
1279bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1280
1281 if(configinfo.site_auth)
1282 {
1283 args["uan"] = "1";
1284 args["ug"] = configinfo.site_group;
1285 }
1286
1287
1288 // if this receptionist is running in collection dependant mode
1289 // then it should always set the collection argument to the
1290 // collection
1291 if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1292
1293 // if current collection uses ccscols make sure
1294 // "ccs" argument is set and make "cc" default to
1295 // all collections in "ccs"
1296 if (args["a"] != "config" && !args["c"].empty()) {
1297
1298 text_t &arg_c = args["c"];
1299 recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1300 if (collectproto == NULL) {
1301 // oops, this collection isn't valid
1302 outconvertclass text_t2ascii;
1303 logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1304 // args["c"].clear();
1305
1306 } else {
1307
1308 ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1309
1310 if(cinfo->authenticate == "collection")
1311 {
1312 args["uan"] = "1";
1313 args["ug"] = cinfo->auth_group;
1314 }
1315
1316
1317 if (cinfo != NULL) {
1318 if (!cinfo->ccsCols.empty()) {
1319 args["ccs"] = 1;
1320 if (args["cc"].empty()) {
1321 text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1322 text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1323 bool first = true;
1324 while (col_here != col_end) {
1325 // make sure it's a valid collection
1326 if (protocols.getrecptproto (*col_here, logout) != NULL) {
1327 if (!first) args["cc"].push_back (',');
1328 args["cc"] += *col_here;
1329 first = false;
1330 }
1331 ++col_here;
1332 }
1333 }
1334 }
1335 } else {
1336 logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1337 }
1338 }
1339 }
1340
1341 // argument "v" can only be 0 or 1. Use the default value
1342 // if it is out of range
1343 int arg_v = args.getintarg ("v");
1344 if (arg_v != 0 && arg_v != 1) {
1345 cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1346 if (vinfo != NULL) args["v"] = vinfo->argdefault;
1347 }
1348
1349 // argument "f" can only be 0 or 1. Use the default value
1350 // if it is out of range
1351 int arg_f = args.getintarg ("f");
1352 if (arg_f != 0 && arg_f != 1) {
1353 cgiarginfo *finfo = argsinfo.getarginfo ("f");
1354 if (finfo != NULL) args["f"] = finfo->argdefault;
1355 }
1356
1357 return true;
1358}
1359
1360// translate_OIDs translates the "d" and "cl" arguments to their correct values
1361// if they use the tricky ".fc", ".lc" type syntax.
1362void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1363 ostream &logout) {
1364
1365 FilterResponse_t response;
1366 FilterRequest_t request;
1367 comerror_t err;
1368 text_t &arg_d = args["d"];
1369 text_t &arg_cl = args["cl"];
1370 text_t &collection = args["c"];
1371
1372 // do a call to translate OIDs if required
1373 request.filterName = "NullFilter";
1374 request.filterResultOptions = FROID;
1375 if (!arg_d.empty() && needs_translating (arg_d)) {
1376 request.docSet.push_back (arg_d);
1377 collectproto->filter (collection, request, response, err, logout);
1378 arg_d = response.docInfo[0].OID;
1379 request.clear();
1380 }
1381 // we'll also check here that the "cl" argument has a "classify" doctype
1382 // (in case ".fc" or ".lc" have screwed up)
1383 if (needs_translating (arg_cl)) {
1384 request.fields.insert ("doctype");
1385 request.docSet.push_back (arg_cl);
1386 request.filterResultOptions = FRmetadata;
1387 collectproto->filter (collection, request, response, err, logout);
1388 // set to original value (without .xx stuff) if doctype isn't "classify" or if no doctype
1389 if (response.docInfo[0].metadata["doctype"].values.size() == 0 || response.docInfo[0].metadata["doctype"].values[0] != "classify")
1390 strip_suffix (arg_cl);
1391 else
1392 arg_cl = response.docInfo[0].OID;
1393 }
1394}
1395
1396// prepare_page sets up page parameters, sets display macros
1397// and opens the page ready for output
1398void receptionist::prepare_page (action *a, cgiargsclass &args,
1399 outconvertclass &outconvert,
1400 ostream &logout) {
1401 // set up page parameters
1402 text_t pageparams;
1403 bool first = true;
1404
1405 text_tmap::iterator params_here = configinfo.pageparams.begin();
1406 text_tmap::iterator params_end = configinfo.pageparams.end();
1407 while (params_here != params_end) {
1408 // page params are those from main.cfg (eg pageparam v 0) plus
1409 // two defaults set in recptconf.clear() (c="" and l=en)
1410 // This used to check if the current value of the page param
1411 // == the default value, then don't add in it the list
1412 // but if l=en, and there is a macro with [l=en], then it doesn't
1413 // find it.
1414 // so now all page params will go into the list. I assume this will
1415 // mean more attempts to find each macro, but nothing worsee than
1416 // that. --kjdon
1417 //if (args[(*params_here).first] != (*params_here).second) {
1418 if (first)
1419 first = false;
1420 else
1421 pageparams += ",";
1422
1423 pageparams += (*params_here).first;
1424 pageparams += "=";
1425 pageparams += args[(*params_here).first];
1426 // }
1427
1428 ++params_here;
1429 }
1430
1431
1432 // open the page
1433 disp.openpage(pageparams, configinfo.macroprecedence);
1434
1435 disp.unloadcollectionmacros();
1436
1437 text_t collection = args["c"];
1438 if (!collection.empty()) {
1439 read_collection_macrofiles(collection,logout);
1440 }
1441
1442 // define external macros for each action
1443 actionptrmap::iterator actionhere = actions.begin ();
1444 actionptrmap::iterator actionend = actions.end ();
1445
1446 while (actionhere != actionend) {
1447 assert ((*actionhere).second.a != NULL);
1448 if ((*actionhere).second.a != NULL) {
1449 (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1450 }
1451 ++actionhere;
1452 }
1453
1454
1455 // define internal macros for the current action
1456 a->define_internal_macros (disp, args, &protocols, logout);
1457
1458 // define general macros. the defining of general macros is done here so that
1459 // the last possible version of the cgi arguments are used
1460 define_general_macros (args, outconvert, logout);
1461}
1462
1463
1464void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1465 ostream &logout) {
1466
1467 text_t &collection = args["c"];
1468
1469 disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1470 disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1471 disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1472 disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1473
1474 // This perhaps should be done with gsdl_getenv() which takes the
1475 // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1476 // additional parameter would need to be passed into here (not sure how
1477 // that would effect any virtual inheritence going on), or else moved
1478 // higher up the calling to chain to, e.g., produce_cgi_page()
1479
1480 char* remote_addr = getenv("REMOTE_ADDR");
1481
1482 if (remote_addr != NULL) {
1483 text_t remote_addr_t(remote_addr);
1484 disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1485 }
1486
1487 char* remote_host = getenv("REMOTE_HOST");
1488 if (remote_host != NULL) {
1489 text_t remote_host_t(remote_host);
1490 disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1491 }
1492 else {
1493 // setting this to "unknown" is easier to deal with in format/macro
1494 // statements, rather than testing for _remoteHost_
1495 disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1496 }
1497
1498
1499 text_t compressedoptions = "";
1500 if (!configinfo.usecookiesForE) {
1501 compressedoptions = get_compressed_arg(args, logout);
1502 }
1503 disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1504 //disp.setmacro ("compressedoptionsUrlsafe", displayclass::defaultpackage, encodeForURL(dm_safe(compressedoptions))); // seems to be unnecessary after testing e=hack or e=hack<collect>... or e=...<collect>hack in a live server
1505
1506 // need a decoded version of compressedoptions for use within forms
1507 // as browsers encode values from forms before sending to server
1508 // (e.g. %25 becomes %2525)
1509 decode_cgi_arg (compressedoptions);
1510
1511 if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1512 // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1513 compressedoptions = to_uni(compressedoptions);
1514 }
1515
1516 // does it need to be dm_safed? this breaks depositor when there is _ in filename
1517 //text_t dmacrovalue = dm_safe(compressedoptions);
1518 text_t dmacrovalue = compressedoptions;
1519
1520 disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dmacrovalue);
1521 disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(dmacrovalue));
1522 // the old version was dm_safed, so make a dm safe version in case we need it
1523 disp.setmacro ("decodedcompressedoptionsAttrDmsafe", displayclass::defaultpackage, encodeForHTMLAttr(dm_safe(dmacrovalue)));
1524
1525
1526#if defined (__WIN32__)
1527 disp.setmacro ("win32", displayclass::defaultpackage, "1");
1528#endif
1529
1530 // set _cgiargX_ macros for each cgi argument
1531 cgiargsclass::const_iterator argshere = args.begin();
1532 cgiargsclass::const_iterator argsend = args.end();
1533 while (argshere != argsend) {
1534
1535 text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp")
1536
1537 if (((*argshere).first == "q") ||
1538 ((*argshere).first == "qa") ||
1539 ((*argshere).first == "qtt") ||
1540 ((*argshere).first == "qty") ||
1541 ((*argshere).first == "qp") ||
1542 ((*argshere).first == "qpl") ||
1543 ((*argshere).first == "qr") ||
1544 ((*argshere).first == "q2")) {
1545
1546 // need to escape special characters from query string
1547 macrovalue = html_safe(macrovalue);
1548
1549 } else if ((*argshere).first == "hp") {
1550 if(!isValidURLProtocol(macrovalue)) {
1551 macrovalue = encodeForURL(macrovalue); // URL has invalid protocol like javascript:, so URL encode it
1552 }
1553 }
1554 else {
1555 macrovalue = dm_safe(macrovalue);
1556 }
1557
1558 // set the default value for the macro
1559 disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue);
1560
1561 // set macros for the encoded versions of the same value. Uses the functions in securitytools.h
1562 // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
1563
1564 text_t htmlsafe = encodeForHTML(macrovalue);
1565 text_t attrsafe = encodeForHTMLAttr(macrovalue);
1566 text_t urlsafe = encodeForURL(macrovalue);
1567 text_t jssafe = encodeForJavascript(macrovalue); // with default setting will return \\x and \\u for macro files
1568 text_t csssafe = encodeForCSS(macrovalue); // not yet used anywhere, but is available for use in macros
1569 text_t sqlsafe = encodeForSQL(macrovalue);
1570
1571 disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe);
1572 disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe);
1573 disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe);
1574 disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe);
1575 disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe);
1576 disp.setmacro ("cgiarg" + (*argshere).first + "Sqlsafe", displayclass::defaultpackage, sqlsafe);
1577
1578
1579 ++argshere;
1580 }
1581
1582 // set collection specific macros
1583 if (!collection.empty()) {
1584 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1585 if (collectproto != NULL) {
1586 FilterResponse_t response;
1587 text_tset metadata;
1588 get_info ("collection", collection, args["l"], metadata, false,
1589 collectproto, response, logout);
1590
1591 if (!response.docInfo[0].metadata.empty()) {
1592 MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1593 MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1594 while (here != end) {
1595 if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1596 ((*here).first != "hasprevious")) {
1597 // check for args in form name:lang
1598 text_t name = g_EmptyText;
1599 text_t lang = g_EmptyText;
1600 bool colonfound=false;
1601 text_t::const_iterator a = (*here).first.begin();
1602 text_t::const_iterator b = (*here).first.end();
1603 while (a !=b) {
1604 if (*a==':') {
1605 colonfound=true;
1606 }
1607 else {
1608 if (colonfound)
1609 lang.push_back(*a);
1610 else name.push_back(*a);
1611 }
1612 ++a;
1613 }
1614 if (!lang.empty()) {
1615 if (args["l"]==lang) {
1616 disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1617 }
1618 }
1619 else { // the default one
1620 disp.setcollectionmacro(displayclass::defaultpackage, (*here).first, "", (*here).second.values[0]);
1621 }
1622 }
1623 ++here;
1624 }
1625 }
1626
1627 text_t iconcollection;
1628 disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1629 if (!iconcollection.empty())
1630 {
1631 ColInfoResponse_t cinfo;
1632 comerror_t err;
1633 collectproto->get_collectinfo (collection, cinfo, err, logout);
1634 if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1635 {
1636 // local but with full path
1637 iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1638 disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1639 }
1640 }
1641 }
1642 }
1643
1644 if (!collection.empty()) {
1645 ColInfoResponse_t cinfo;
1646 comerror_t err;
1647 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1648 if (collectproto != NULL) {
1649 collectproto->get_collectinfo (collection, cinfo, err, logout);
1650
1651
1652 // This part of the code used to use "cinfo.httpprefix" regardless
1653 // of the value it contained. Since
1654 // this can come back with an empty (in the case of gsdl_mod), the
1655 // URL produced was invalid.
1656 //
1657 // Changed to test for empty first, and use configinfo.httpprefix as
1658 // a "backup"
1659 //
1660 // Point to consider: since configinfo.httpprefix has been offically
1661 // set as "httpprefix" in macros, it seems to make more sense to use
1662 // always use that version and not the cinfo version at all.
1663
1664 text_t httpprefix
1665 = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1666
1667 text_t httpcollection;
1668 if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1669 httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1670 + encodeForURL(collection);
1671 disp.setmacro ("httpcollection", displayclass::defaultpackage,
1672 httpcollection);
1673
1674 // as of gsdl 2.53, collect.cfg can specify macros
1675 if (cinfo.collection_macros.size() > 0) {
1676 collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1677 collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1678 while (this_macro != done_macro) {
1679 text_t package = "Global";
1680 text_t macroname = this_macro->first;
1681 // if this macro name is AAA:bbb then extract the package name
1682 text_t::const_iterator thischar, donechar;
1683 thischar = macroname.begin();
1684 donechar = macroname.end();
1685 while (thischar < donechar) {
1686 if (*thischar == ':') {
1687 package = substr(macroname.begin(),thischar);
1688 macroname = substr(thischar+1,donechar);
1689 break;
1690 }
1691 ++thischar;
1692 }
1693
1694 text_tmap params_map = this_macro->second;
1695 text_tmap::const_iterator this_param = params_map.begin();
1696 text_tmap::const_iterator done_param = params_map.end();
1697 while (this_param != done_param) {
1698 disp.setcollectionmacro(package,
1699 macroname,
1700 this_param->first,
1701 this_param->second);
1702 ++this_param;
1703 }
1704
1705 ++this_macro;
1706 }
1707 } // col macros
1708 } // collectproto != NULL
1709 }
1710
1711}
1712
1713// gets collection info from cache if found or
1714// calls collection server (and updates cache)
1715// returns NULL if there's an error
1716ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1717 const text_t &collection,
1718 ostream &logout) {
1719
1720 // check the cache
1721 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1722 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1723 // found it
1724 return &((*it).second.info);
1725 }
1726
1727 // not cached, get info from collection server
1728 if (collectproto == NULL) {
1729 logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1730 return NULL;
1731 }
1732
1733 comerror_t err;
1734 if (it == configinfo.collectinfo.end()) {
1735 collectioninfo_t cinfo;
1736 collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1737 if (err != noError) {
1738 outconvertclass text_t2ascii;
1739 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1740 << get_comerror_string (err) << "\"while getting collectinfo\n";
1741 return NULL;
1742 }
1743 cinfo.info_loaded = true;
1744 configinfo.collectinfo[collection] = cinfo;
1745 return &(configinfo.collectinfo[collection].info);
1746 } else {
1747 collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1748 if (err != noError) {
1749 outconvertclass text_t2ascii;
1750 logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1751 << get_comerror_string (err) << "\"while getting collectinfo\n";
1752 return NULL;
1753 }
1754 (*it).second.info_loaded = true;
1755 return &((*it).second.info);
1756 }
1757}
1758
1759// removes a collection from the cache so that the next
1760// call to get_collectinfo_ptr() for that collection will
1761// retrieve the collection info from the collection server
1762void receptionist::uncache_collection (const text_t &collection) {
1763
1764 colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1765 if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1766
1767 (*it).second.info_loaded = false;
1768
1769 }
1770}
1771
1772// Handles an "Encoding" line from a configuration file - note that the
1773// configinfo.encodings map is a bit of a hack (to be fixed when the
1774// configuration files are tidied up).
1775void receptionist::configure_encoding (const text_tarray &cfgline) {
1776
1777 text_t subkey, subvalue, shortname, longname, mapfile;
1778 int multibyte = 0;
1779 text_t::const_iterator cfglinesub_here;
1780 text_tarray::const_iterator cfgline_here = cfgline.begin();
1781 text_tarray::const_iterator cfgline_end = cfgline.end();
1782 while (cfgline_here != cfgline_end) {
1783 if (*cfgline_here == "multibyte") {
1784 multibyte = 1;
1785 } else {
1786 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1787 (*cfgline_here).end(), '=', subkey);
1788 if (subkey == "shortname") {
1789 shortname = substr (cfglinesub_here, (*cfgline_here).end());
1790 } else if (subkey == "longname") {
1791 longname = substr (cfglinesub_here, (*cfgline_here).end());
1792 } else if (subkey == "map") {
1793 mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1794 }
1795 }
1796 ++cfgline_here;
1797 }
1798 if (!shortname.empty()) {
1799 if (longname.empty()) longname = shortname;
1800
1801 // add the converter
1802 if (shortname == "utf-8") {
1803 utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1804 utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1805 utf8outconvert->set_rzws(1);
1806 add_converter (shortname, utf8inconvert, utf8outconvert);
1807 configinfo.encodings[longname] = shortname;
1808
1809 } else if (shortname == "utf-16be") {
1810 // we use the default input converter as this shouldn't ever be used
1811 // for converting from unicode...
1812 inconvertclass *inconverter = new inconvertclass();
1813 utf16outconvertclass *outconverter = new utf16outconvertclass();
1814 add_converter (shortname, inconverter, outconverter);
1815 configinfo.encodings[longname] = shortname;
1816
1817 } else if (!mapfile.empty()) {
1818
1819 if (mapfile == "8859_1.ump") {
1820 // iso-8859-1 is a special case as it'll always be supported by the
1821 // standard converter class and therefore doesn't need to use its
1822 // mapping file
1823 inconvertclass *inconvert = new inconvertclass();
1824 rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1825 outconvert->set_rzws(1);
1826 add_converter (shortname, inconvert, outconvert);
1827 configinfo.encodings[longname] = shortname;
1828
1829 } else {
1830 text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1831 text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1832 if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1833
1834 mapinconvertclass *mapinconvert = new mapinconvertclass();
1835 mapinconvert->setmapfile (to_uc_map, 0x003F);
1836 mapinconvert->set_multibyte (multibyte);
1837 mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1838 mapoutconvert->setmapfile (from_uc_map, 0x3F);
1839 mapoutconvert->set_multibyte (multibyte);
1840 mapoutconvert->set_rzws(1);
1841 add_converter (shortname, mapinconvert, mapoutconvert);
1842 configinfo.encodings[longname] = shortname;
1843 }
1844 }
1845 }
1846 }
1847}
Note: See TracBrowser for help on using the repository browser.