root/main/trunk/greenstone2/runtime-src/src/recpt/receptionist.cpp @ 28912

Revision 28912, 59.3 KB (checked in by ak19, 7 years ago)

Commit 5 for security. Handles setmacro() occurrences in non action.cpp files. cl is safe from hacks now.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT  1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include "securitytools.h"
39#include <assert.h>
40#include <time.h>
41#include <stdio.h> // for open()
42#include <fcntl.h> // for open() flags
43// following 2 are for printing Last-Modified http header.
44#include <sys/stat.h>
45#include <time.h>
46
47#if defined (GSDL_USE_IOS_H)
48#include <fstream.h>
49#else
50#include <fstream>
51#endif
52
53void recptconf::clear () {
54  gsdlhome.clear();
55  collecthome.clear();
56  dbhome.clear();
57  collectinfo.erase(collectinfo.begin(), collectinfo.end());
58  collection.clear();
59  collectdir.clear();
60  httpprefix.clear();
61  httpweb.clear();
62  gwcgi.clear();
63  macrofiles.erase(macrofiles.begin(), macrofiles.end());
64  saveconf.clear();
65  usecookies = false;
66  logcgiargs = false;
67  LogDateFormat = LocalTime;
68
69  maintainer.clear();
70  MailServer.clear();
71  LogEvents = Disabled;
72  EmailEvents = Disabled;
73  EmailUserEvents = false;
74
75  languages.erase(languages.begin(), languages.end());
76  encodings.erase(encodings.begin(), encodings.end());
77
78  site_auth = false;
79  HomePageType = "images";
80  HomePageCols = 3;
81 
82  // these default page parameters can always be overriden
83  // in the configuration file
84  pageparams.erase(pageparams.begin(), pageparams.end());
85  pageparams["c"] = "";
86  pageparams["l"] = "en";
87
88#ifdef MACROPRECEDENCE
89  macroprecedence = MACROPRECEDENCE;
90#else
91  macroprecedence.clear();
92#endif
93}
94
95
96void collectioninfo_t::clear () {
97  gsdl_gsdlhome.clear();
98  gsdl_dbhome.clear();
99
100  info_loaded = false;
101  info.clear();
102}
103
104void languageinfo_t::clear () {
105  longname.clear();
106  defaultencoding.clear();
107}
108
109receptionist::receptionist () {
110  // create a list of cgi arguments
111  // this must be done before the configuration
112
113  cgiarginfo ainfo;
114
115  ainfo.shortname = "e";
116  ainfo.longname = "compressed arguments";
117  ainfo.multiplechar = true;
118  ainfo.defaultstatus = cgiarginfo::good;
119  ainfo.argdefault = g_EmptyText;
120  ainfo.savedarginfo = cgiarginfo::mustnot;
121  argsinfo.addarginfo (NULL, ainfo);
122
123  ainfo.shortname = "a";
124  ainfo.longname = "action";
125  ainfo.multiplechar = true;
126  ainfo.defaultstatus = cgiarginfo::none;
127  ainfo.argdefault = g_EmptyText;
128  ainfo.savedarginfo = cgiarginfo::must;
129  argsinfo.addarginfo (NULL, ainfo);
130
131  // w=western
132  ainfo.shortname = "w";
133  ainfo.longname = "encoding";
134  ainfo.multiplechar = true;
135  ainfo.defaultstatus = cgiarginfo::none;
136  ainfo.argdefault = g_EmptyText;
137  ainfo.savedarginfo = cgiarginfo::must;
138  argsinfo.addarginfo (NULL, ainfo);
139 
140  ainfo.shortname = "nw";
141  ainfo.longname = "new encoding";
142  ainfo.multiplechar = true;
143  ainfo.defaultstatus = cgiarginfo::none;
144  ainfo.argdefault = g_EmptyText;
145  ainfo.savedarginfo = cgiarginfo::mustnot;
146  argsinfo.addarginfo (NULL, ainfo);
147 
148  ainfo.shortname = "c";
149  ainfo.longname = "collection";
150  ainfo.multiplechar = true;
151  ainfo.defaultstatus = cgiarginfo::none;
152  ainfo.argdefault = g_EmptyText;
153  ainfo.savedarginfo = cgiarginfo::must;
154  argsinfo.addarginfo (NULL, ainfo);
155 
156  // the interface language name should use the ISO 639
157  // standard
158  ainfo.shortname = "l";
159  ainfo.longname = "interface language";
160  ainfo.multiplechar = true;
161  ainfo.defaultstatus = cgiarginfo::weak;
162  ainfo.argdefault = "en";
163  ainfo.savedarginfo = cgiarginfo::must;
164  argsinfo.addarginfo (NULL, ainfo);
165 
166  ainfo.shortname = "nl";
167  ainfo.longname = "new language";
168  ainfo.multiplechar = false;
169  ainfo.defaultstatus = cgiarginfo::none;
170  ainfo.argdefault = "0";
171  ainfo.savedarginfo = cgiarginfo::mustnot;
172  argsinfo.addarginfo (NULL, ainfo);
173 
174  // the GSDL_UID (cookie)
175  ainfo.shortname = "z";
176  ainfo.longname = "gsdl uid";
177  ainfo.multiplechar = true;
178  ainfo.defaultstatus = cgiarginfo::none;
179  ainfo.argdefault = g_EmptyText;
180  ainfo.savedarginfo = cgiarginfo::mustnot;
181  argsinfo.addarginfo (NULL, ainfo);
182}
183
184
185void receptionist::add_action (action *theaction) {
186  // make sure we have an action to add
187  if (theaction == NULL) return;
188
189  // add this action to the list of actions
190  actions.addaction(theaction);
191 
192  // add the cgi arguments from this action
193  argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
194}
195
196
197void receptionist::add_browser (browserclass *thebrowser) {
198  // make sure we have a browser to add
199  if (thebrowser == NULL) return;
200
201  // add this browser to the list of browsers
202  browsers.addbrowser(thebrowser);
203}
204
205
206void receptionist::setdefaultbrowser (const text_t &browsername) {
207  browsers.setdefaultbrowser (browsername);
208}
209
210
211// configure should be called for each line in the
212// configuration files to configure the receptionist and everything
213// it contains. The configuration should take place after everything
214// has been added but before the initialisation.
215
216void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
217  // configure the receptionist
218
219   
220   
221  if (cfgline.size() >= 1) {
222    cgiarginfo *info = NULL;
223    if (key == "gsdlhome") {
224      configinfo.gsdlhome = cfgline[0];
225      if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
226    }
227    else if (key == "collecthome") configinfo.collecthome = cfgline[0];
228    else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
229    else if (key == "collection") {
230      configinfo.collection = cfgline[0];
231      // also need to set the default arg to this collection
232      if ((info = argsinfo.getarginfo("c")) != NULL) {
233    info->defaultstatus = cgiarginfo::good;
234    info->argdefault = cfgline[0];
235      }
236     
237    }
238    else if (key == "collectdir") configinfo.collectdir = cfgline[0];
239    else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
240    else if (key == "httpweb") configinfo.httpweb = cfgline[0];
241    else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
242    else if (key == "macrofiles") {
243      // want to append to macrofiles (i.e. may be several config files
244      // contributing, maybe from several collections).
245      text_tarray::const_iterator here = cfgline.begin();
246      text_tarray::const_iterator end = cfgline.end();
247      while (here != end) {
248    configinfo.macrofiles.insert (*here);
249    ++here;
250      }
251    }
252    else if (key == "saveconf") configinfo.saveconf = cfgline[0];
253    else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
254    else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
255    else if (key == "maintainer") configinfo.maintainer = cfgline[0];
256    else if (key == "MailServer") configinfo.MailServer = cfgline[0];
257    else if (key == "LogDateFormat") {
258      if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
259      else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
260    }
261    else if (key == "LogEvents") {
262      if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
263      else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
264    }
265    else if (key == "EmailEvents") {
266      if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
267      else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
268    }
269    else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
270    else if (key == "pageparam") {
271      if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
272      else configinfo.pageparams[cfgline[0]] = "";
273    }
274    else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
275    else if (key == "collectinfo") {
276      if (cfgline.size() == 3) {
277    // for backwards compatability with older collections that only use
278    // gsdlhome and dbhome
279    collectioninfo_t cinfo;
280    cinfo.gsdl_gsdlhome = cfgline[1];
281    cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
282    cinfo.gsdl_dbhome = cfgline[2];
283    configinfo.collectinfo[cfgline[0]] = cinfo;
284      }
285      else if (cfgline.size() >= 4) {
286    collectioninfo_t cinfo;
287    cinfo.gsdl_gsdlhome = cfgline[1];
288    cinfo.gsdl_collecthome = cfgline[2];
289    cinfo.gsdl_dbhome = cfgline[3];
290    configinfo.collectinfo[cfgline[0]] = cinfo;
291      }
292    }
293
294    // Read in the value for the site_auth directive either true or false
295    else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
296
297    else if (key == "site_group")
298       joinchar(cfgline,',',configinfo.site_group);
299       
300    else if (key == "SiteFormat") {
301       if (cfgline[0] == "HomePageType") {
302      configinfo.HomePageType = cfgline[1];
303       } else if (cfgline[0] == "HomePageCols") {
304      configinfo.HomePageCols = cfgline[1].getint();
305       }
306    }
307       
308    else if (key == "cgiarg") {
309      // get shortname
310      bool seen_defaultstatus = false;
311      text_t subkey, subvalue;
312      text_t shortname;
313      text_t::const_iterator cfglinesub_here;
314      text_tarray::const_iterator cfgline_here = cfgline.begin();
315      text_tarray::const_iterator cfgline_end = cfgline.end();
316      while (cfgline_here != cfgline_end) {
317    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
318                    (*cfgline_here).end(), '=', subkey);
319    if (subkey == "shortname") {
320      shortname = substr (cfglinesub_here, (*cfgline_here).end());
321    }
322    ++cfgline_here;
323      }
324
325      // if we found the shortname process the line again filling in values
326      if (!shortname.empty()) {
327    cgiarginfo &chinfo = argsinfo[shortname];
328    chinfo.shortname = shortname; // in case this is a new argument
329   
330    cfgline_here = cfgline.begin();
331    while (cfgline_here != cfgline_end) {
332      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
333                      (*cfgline_here).end(), '=', subkey);
334      subvalue = substr (cfglinesub_here, (*cfgline_here).end());
335
336      if (subkey == "longname") chinfo.longname = subvalue;
337      else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
338      else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
339      else if (subkey == "defaultstatus") {
340        seen_defaultstatus = true;
341        if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
342        else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
343        else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
344        else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
345        else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
346      }
347      else if (subkey == "argdefault") {
348        chinfo.argdefault = subvalue;
349        if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
350      }
351      else if (subkey == "savedarginfo") {
352        if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
353        else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
354        else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
355      }
356     
357      ++cfgline_here;
358    }
359      }
360
361    } else if (key == "Encoding") {
362
363      configure_encoding (cfgline);
364
365    } else if (key == "Language") {
366      text_t subkey, subvalue, shortname;
367      languageinfo_t lang;
368      text_t::const_iterator cfglinesub_here;
369      text_tarray::const_iterator cfgline_here = cfgline.begin();
370      text_tarray::const_iterator cfgline_end = cfgline.end();
371      while (cfgline_here != cfgline_end) {
372    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
373                    (*cfgline_here).end(), '=', subkey);
374    if (subkey == "shortname") {
375      shortname = substr (cfglinesub_here, (*cfgline_here).end());
376    } else if (subkey == "longname") {
377      lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
378    } else if (subkey == "default_encoding") {
379      lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
380    }
381    ++cfgline_here;
382      }
383      if (!shortname.empty()) {
384    if (lang.longname.empty()) lang.longname = shortname;
385    configinfo.languages[shortname] = lang;
386      }
387    }
388  }
389 
390  // configure the actions
391  actionptrmap::iterator actionhere = actions.begin ();
392  actionptrmap::iterator actionend = actions.end ();
393
394  while (actionhere != actionend) {
395    assert ((*actionhere).second.a != NULL);
396    if ((*actionhere).second.a != NULL)
397      (*actionhere).second.a->configure(key, cfgline);
398
399    ++actionhere;
400  }
401
402  // configure the protocols
403  recptprotolistclass::iterator protohere = protocols.begin ();
404  recptprotolistclass::iterator protoend = protocols.end ();
405
406  while (protohere != protoend) {
407    assert ((*protohere).p != NULL);
408    comerror_t err;
409    if ((*protohere).p != NULL)
410      (*protohere).p->configure(key, cfgline, err);
411   
412    ++protohere;
413  }
414
415  // configure the browsers
416  browserptrmap::iterator browserhere = browsers.begin ();
417  browserptrmap::iterator browserend = browsers.end ();
418
419  while (browserhere != browserend) {
420    assert ((*browserhere).second.b != NULL);
421    if ((*browserhere).second.b != NULL)
422      (*browserhere).second.b->configure(key, cfgline);
423   
424    ++browserhere;
425  }
426}
427
428
429void receptionist::configure (const text_t &key, const text_t &value) {
430  text_tarray cfgline;
431  cfgline.push_back (value);
432  configure(key, cfgline);
433}
434
435
436// init should be called after all the actions and protocols have been
437// added to the receptionist and after everything has been configured but
438// before any pages are created.  It returns true on success and false on
439// failure. If false is returned getpage should not be called (without
440// producing meaningless output), instead an error page should be produced
441// by the calling code.
442bool receptionist::init (ostream &logout) {
443
444  // first configure collectdir
445  if (!configinfo.collection.empty()) {
446
447    // collection specific mode
448
449    text_t collectdir = configinfo.gsdlhome;
450
451    if (!configinfo.collectdir.empty()) {
452      // has already been configured
453      collectdir = configinfo.collectdir;
454    } else {
455
456      // decide where collectdir is by searching for collect.cfg
457      // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
458      // then $GSDLHOME/etc/collect.cfg
459      collectdir = filename_cat (configinfo.gsdlhome, "collect");
460      collectdir = filename_cat (collectdir, configinfo.collection);
461      text_t filename = filename_cat (collectdir, "etc");
462      filename = filename_cat (filename, "collect.cfg");
463     
464      if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
465    }
466
467    configure("collectdir", collectdir);
468
469  }
470  else {
471
472    text_t collecthome;
473    if (configinfo.collecthome.empty()) {
474      collecthome = filename_cat(configinfo.gsdlhome,"collect");
475    }
476    else {
477      collecthome = configinfo.collecthome;
478    }
479
480    configure("collecthome", collecthome);
481
482    // for backwards compatability collectdir set to gsdlhome
483    // (possible it could now be removed)
484    configure("collectdir", configinfo.gsdlhome);
485  }
486
487
488  // read in the macro files
489  if (!read_macrofiles (logout)) return false;
490
491  // there must be at least one action defined
492  if (actions.empty()) {
493    logout << "Error: no actions have been added to the receptionist\n";
494    return false;
495  }
496
497  // there must be at least one browser defined
498  if (browsers.empty()) {
499    logout << "Error: no browsers have been added to the receptionist\n";
500    return false;
501  }
502
503  // create a saveconf string if there isn't one already
504  if (configinfo.saveconf.empty())
505    configinfo.saveconf = create_save_conf_str (argsinfo, logout);
506
507  // check the saveconf string
508  if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
509    return false;
510
511  // set a random seed
512  srand (time(NULL));
513
514  // if maintainer email address is something dodgy (for now I'll define
515  // dodgy as being anything that doesn't contain '@') disable EmailEvents
516  // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
517  // in this case but we will as it seems likely that MailServer will also
518  // be screwed up if maintainer is).
519  text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
520  text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
521                             maintainer_end, '@');
522  if (maintainer_here == maintainer_end) {
523    configinfo.EmailEvents = Disabled;
524    configinfo.EmailUserEvents = Disabled;
525  } else {
526    // if MailServer isn't set it should default to mail.maintainer-domain
527    if (configinfo.MailServer.empty()) {
528      configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
529    }
530  }
531
532  // init the actions
533  actionptrmap::iterator actionhere = actions.begin ();
534  actionptrmap::iterator actionend = actions.end ();
535  while (actionhere != actionend) {
536    if (((*actionhere).second.a == NULL) ||
537    !(*actionhere).second.a->init(logout)) return false;
538    ++actionhere;
539  }
540
541  // init the protocols
542  recptprotolistclass::iterator protohere = protocols.begin ();
543  recptprotolistclass::iterator protoend = protocols.end ();
544  while (protohere != protoend) {
545    comerror_t err;   
546    if (((*protohere).p == NULL) ||
547    !(*protohere).p->init(err, logout)) return false;
548    ++protohere;
549  }
550
551  // init the browsers
552  browserptrmap::iterator browserhere = browsers.begin ();
553  browserptrmap::iterator browserend = browsers.end ();
554  while (browserhere != browserend) {
555    if (((*browserhere).second.b == NULL) ||
556    !(*browserhere).second.b->init(logout)) return false;
557    ++browserhere;
558  }
559
560  return true;
561}
562
563// get the default encoding for the given language - if it fails for any
564// reason return ""
565text_t receptionist::get_default_encoding (const text_t &language) {
566 
567  // make sure language is valid
568  if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
569
570  text_t default_encoding = configinfo.languages[language].defaultencoding;
571
572  // make sure the encoding is valid
573  if (converters.find(default_encoding) == converters.end()) {
574    // we don't support the encoding specified as default for this language
575    if (configinfo.encodings.size()==1) {
576      // only 1 encoding specified in main.cfg, so use it
577      return configinfo.encodings.begin()->second;
578    }
579    return "";
580  }
581
582  return default_encoding;
583}
584
585// parse_cgi_args parses cgi arguments into an argument class.
586// This function should be called for each page request. It returns false
587// if there was a major problem with the cgi arguments.
588bool receptionist::parse_cgi_args (const text_t &argstr,
589                   fileupload_tmap &fileuploads,
590                   cgiargsclass &args,
591                   ostream &logout, text_tmap &fcgienv) {
592
593  // get an initial list of cgi arguments
594  args.clear();
595  split_cgi_args (argsinfo, argstr, args);
596
597  // expand the compressed argument (if there was one)
598  if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
599
600  // add the defaults
601  add_default_args (argsinfo, args, logout);
602
603  // add any file upload arguments
604  add_fileupload_args(argsinfo, args, fileuploads, logout);
605
606  // get the cookie
607  if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
608 
609  // if we're changing languages, set the encoding to the default for the new language
610  if (args["nl"] == "1") {
611    args["nw"] = get_default_encoding(args["l"]);
612  }
613
614  // get the input encoding
615  // if encoding isn't set, set it to the default for the current language
616  if ((args.getarg("w") == NULL) || args["w"].empty()) {
617    args["w"] = get_default_encoding(args["l"]);
618  }
619
620  text_t &arg_w = args["w"];
621
622  inconvertclass defaultinconvert;
623  inconvertclass *inconvert = converters.get_inconverter (arg_w);
624  if (inconvert == NULL) inconvert = &defaultinconvert;
625
626  // see if the next page will have a different encoding
627  if (args.getarg("nw") != NULL) arg_w = args["nw"];
628
629  // convert arguments which aren't in unicode to unicode
630  args_tounicode (args, *inconvert);
631
632
633  // decide on the output conversion class (needed for checking the external
634  // cgi arguments)
635  rzwsoutconvertclass defaultoutconverter;
636  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
637  if (outconverter == NULL) outconverter = &defaultoutconverter;
638  outconverter->reset();
639
640  // check the main cgi arguments
641  if (!check_mainargs (args, logout)) return false;
642
643  // check the arguments for the action
644  action *a = actions.getaction (args["a"]);
645  if (a != NULL) {
646    if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
647  } else {
648    // the action was not found!!
649    outconvertclass text_t2ascii;
650    logout << text_t2ascii << "Error: the action \"" << args["a"]
651       << "\" could not be found.\n";
652    return false;
653  }
654
655  // check external cgi arguments for each action
656  actionptrmap::iterator actionhere = actions.begin ();
657  actionptrmap::iterator actionend = actions.end ();
658  while (actionhere != actionend) {
659    assert ((*actionhere).second.a != NULL);
660    if ((*actionhere).second.a != NULL) {
661      if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
662                               configinfo.saveconf, logout))
663    return false;
664    }
665    ++actionhere;
666  }
667
668  // the action might have changed but we will assume that
669  // the cgiargs were checked properly when the change was made
670
671  return true;
672}
673
674
675// Returns true if cookie already existed, false if it was generated
676bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv)
677{
678  // See if we can get the GSDL_UID cookie
679  text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
680  if (!cookiestring.empty()) // This should really be handled by the findword function...
681  {
682    // Check if the cookie contains GSDL_UID
683    text_t gsdl_uid = "GSDL_UID=";
684    text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid);
685    if (gsdl_uid_start != cookiestring.end())
686    {
687      // Yes, so extract its value
688      cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';'));
689      return true;
690    }
691  }
692
693  // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
694  cookie.clear();
695  text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
696  time_t ttime = time(NULL);
697  if (!host.empty())
698  {
699    cookie += host;
700    cookie.push_back ('-');
701  }
702  cookie += text_t(ttime);
703
704  return false;
705}
706
707
708// Same as above but just tests if cookie exists
709bool receptionist::get_cookie (text_tmap &fcgienv)
710{
711  text_t cookie_jar = "";
712  return get_cookie(cookie_jar, fcgienv);
713}
714
715
716bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
717
718  // see if we want to log the cgi arguments
719  if (!configinfo.logcgiargs) return true;
720 
721  text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
722  text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
723  if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
724  text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
725
726  cgiargsclass::const_iterator args_here = args.begin();
727  cgiargsclass::const_iterator args_end = args.end();
728
729  text_t argstr;
730  bool first = true;
731  while (args_here != args_end) {
732    if (!first) argstr += ", ";
733    argstr += (*args_here).first + "=" + (*args_here).second.value;
734    first = false;
735    ++args_here;
736  }
737
738  text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
739
740  text_t logstr = script_name;
741  logstr += " " + host;
742  logstr += " [";
743  if (configinfo.LogDateFormat == UTCTime) {
744    logstr += get_date (false);
745  } else if (configinfo.LogDateFormat == Absolute) {
746    time_t ttime = time(NULL);
747    logstr += ttime;
748  } else {
749    // LocalTime
750    logstr += get_date (true);
751  }
752  logstr += "] (" + argstr + ") \"";
753  logstr += browser;
754  logstr += "\"\n";
755
756  return append_logstr (logfile, logstr, logout);
757}
758
759bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
760                  ostream &logout) {
761
762  char *lfile = filename.getcstr();
763
764  int fd = open(lfile, O_CREAT | O_WRONLY | O_APPEND, 0777);
765  //int fd = open(lfile, O_CREAT | O_RDWR | O_APPEND, 0777);
766 
767  if (fd == -1) {
768    logout << "Error: Couldn't open file " << lfile << "\n";
769    delete []lfile;
770    return false;
771  }
772
773  // lock_val is set to 0 if file is locked successfully
774  int lock_val = 1;
775  GSDL_LOCK_FILE (fd);
776  if (lock_val == 0) {
777    // Write the string out in UTF-8
778    text_t tmp_log_str_utf8 = to_utf8(logstr);
779    char *buffer = tmp_log_str_utf8.getcstr();
780    size_t num_chars = tmp_log_str_utf8.size();
781    write(fd, buffer, num_chars);
782    GSDL_UNLOCK_FILE (fd);
783    delete []buffer;
784  } else {
785    logout << "Error: Couldn't lock file " << lfile << "\n";
786    close(fd);
787    delete []lfile;
788    return false;
789  }
790
791  close(fd);
792       
793  delete []lfile;
794  return true;
795}
796
797text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
798                   ostream &logout) {
799  text_t outstring;
800  outconvertclass text_t2ascii;
801
802  action *a = actions.getaction (args["a"]);
803  if (a != NULL)
804  {
805    prepare_page (a, args, text_t2ascii, logout);
806  }
807  disp.expandstring (displayclass::defaultpackage, astring, outstring);
808  return outstring;
809}
810
811// produce_cgi_page will call get_cgihead_info and
812// produce_content in the appropriate way to output a cgi header and
813// the page content (if needed). If a page could not be created it
814// will return false
815bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
816                     ostream &logout, text_tmap &fcgienv) {
817  outconvertclass text_t2ascii;
818
819  response_t response;
820  text_t response_data;
821
822  // produce cgi header
823  get_cgihead_info (args, response, response_data, logout, fcgienv);
824  if (response == location) {
825    // location response (url may contain macros!!)
826    response_data = expandmacros (response_data, args, logout);
827
828    contentout << text_t2ascii << "Location: " << response_data << "\n\n";
829    contentout << flush;
830
831    return true;
832  } else if (response == content) {
833    // content response
834
835#ifdef GSDL_NOCACHE
836    contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
837    tm *tm_ptr = NULL;
838    time_t t = time(NULL);
839    tm_ptr = gmtime (&t);
840    if (tm_ptr != NULL) {
841      char *timestr = new char[128];
842      strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
843      contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
844      delete []timestr;
845    }
846    contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
847    contentout << "Pragma: no-cache\n"; // HTTP/1.0
848
849#else
850
851    // use the later of build.cfg and collect.cfg modification times
852    // as the Last-Modified: header, for caching values
853    struct stat file_info;
854    time_t latest=0;
855
856    text_t collectname="";
857    collectname=args["c"];
858    if (collectname != "") {
859
860      text_t collecthome;
861      if (!configinfo.collecthome.empty()) {
862    collecthome = configinfo.collecthome;
863      }   
864      else {
865    collecthome=filename_cat(configinfo.gsdlhome,"collect");
866      }
867      text_t collectdir=filename_cat(collecthome,collectname);
868     
869      text_t buildcfg=filename_cat(collectdir,"index");
870      buildcfg=filename_cat(buildcfg,"build.cfg");
871      char *buildcfg_ptr=buildcfg.getcstr();
872      text_t collectcfg=filename_cat(collectdir,"etc");
873      collectcfg=filename_cat(collectcfg,"collect.cfg");
874      char *collectcfg_ptr=collectcfg.getcstr();
875
876      if (stat(buildcfg_ptr, &file_info)) {
877    // we got an error. Currently don't handle error :(
878    //  logout <<
879      } else {
880    latest=file_info.st_mtime;
881      }
882   
883      if (stat(collectcfg_ptr, &file_info)) {
884    // error - unhandled for now
885      } else {
886    if (latest<file_info.st_mtime) latest=file_info.st_mtime;
887      }
888      delete []buildcfg_ptr;
889      delete []collectcfg_ptr;
890
891      if (latest>0) {
892    // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
893    // c library takes care of mem for this string... (has \n at end!!!!)
894    // latest is currently local time, convert to UTC.
895    struct tm* utc_latest;
896    utc_latest=gmtime(&latest);
897    contentout << "Last-Modified: " << asctime(utc_latest);
898      }
899    } // end of collection != ""
900
901#endif
902
903    contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
904  }
905  else if (response == undecided_location) {
906    // Wait until later to output the target location
907    // Used for the "I'm feeling lucky" functionality
908  }
909  else {
910    // unknown response
911    logout << "Error: get_cgihead_info returned an unknown response type.\n";
912    return false;
913  }
914
915  // produce cgi page
916  if (!produce_content (args, contentout, logout)) return false;
917
918  // flush contentout
919  contentout << flush;
920  return true;
921}
922
923
924// get_cgihead_info determines the cgi header information for
925// a set of cgi arguments. If response contains location then
926// response_data contains the redirect address. If reponse
927// contains content then reponse_data contains the content-type.
928// Note that images can now be produced by the receptionist.
929// Note also, alternative for get_cgihead_info below which
930// stores the information in a text_tmap so it is more easily digested
931
932void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
933                     text_t &response_data, ostream &logout,
934                     text_tmap &fcgienv) {
935  outconvertclass text_t2ascii;
936
937  // get the action
938  action *a = actions.getaction (args["a"]);
939  if (a != NULL) {
940    a->get_cgihead_info (args, &protocols, response, response_data, logout);
941
942  } else {
943    // the action was not found!!
944    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
945       << args["a"] << "\" could not be found.\n";
946    response = content;
947    response_data = "text/html";
948  }
949
950  // add the encoding information
951  if (response == content) {
952    if (converters.find(args["w"]) != converters.end()) {
953      response_data += "; charset=" + args["w"];
954    } else {
955      // default to latin 1
956      response_data += "; charset=ISO-8859-1";
957    }
958
959    // add cookie if required
960    if (configinfo.usecookies && !get_cookie(fcgienv))
961      response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
962    + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
963  }
964}
965
966
967// Alternative version of get_cgihead_info, stores fielded infomation
968// in text_tmap rather than concatenated string
969void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
970                     ostream &logout, text_tmap &fcgienv) {
971
972  response_t response;
973  text_t response_data;
974
975  // get the action
976  action *a = actions.getaction (args["a"]);
977  if (a != NULL) {
978    a->get_cgihead_info (args, &protocols, response, response_data, logout);
979
980  } else {
981    // the action was not found!!
982    outconvertclass text_t2ascii;
983    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
984       << args["a"] << "\" could not be found.\n";
985    response = content;
986    response_data = "text/html";
987  }
988
989  if (response == location) {
990    response_data = expandmacros(response_data, args, logout);
991    headers["Location"] = response_data;
992    return;
993  }
994
995  // add the encoding information
996  if (response == content) {
997
998    if (converters.find(args["w"]) != converters.end()) {
999      headers["content-encoding"] = args["w"];
1000      response_data += "; charset=" + args["w"];
1001    } else {
1002      // default to utf-8
1003      headers["content-encoding"] = "utf-8";
1004      response_data += "; charset=utf-8";
1005    }
1006
1007    headers["content-type"] = response_data;
1008
1009  }
1010
1011}
1012
1013
1014
1015// produce the page content
1016bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1017                    ostream &logout) {
1018
1019  // decide on the output conversion class
1020  text_t &arg_w = args["w"];
1021  rzwsoutconvertclass defaultoutconverter;
1022  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1023  if (outconverter == NULL) outconverter = &defaultoutconverter;
1024  outconverter->reset();
1025
1026  // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1027  if (arg_w=="utf-16be") {
1028    contentout << '\xfe' << '\xff' ;
1029  }
1030
1031  recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1032  if (collectproto != NULL) {
1033    // get browsers to process OID
1034    text_t OID = args["d"];
1035    if (OID.empty()) OID = args["cl"];
1036    if (!OID.empty()) {
1037      text_tset metadata;
1038      text_tarray OIDs;
1039      OIDs.push_back (OID);
1040      if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1041      FilterResponse_t response;
1042      metadata.insert ("childtype");
1043      if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1044    text_t classifytype;
1045    if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1046      classifytype = response.docInfo[0].metadata["childtype"].values[0];
1047    else if (!is_top (OID)) {
1048      // not sure why this is occasionally not set, but it will
1049      // cause a segfault... possibly if built with no_text? jrm21
1050      if (response.docInfo[1].metadata.find("childtype")
1051          == response.docInfo[1].metadata.end()) {
1052        cerr << "receptionist: no childtype element in metadata map!"
1053         << endl;
1054      } else {
1055        if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1056          classifytype = response.docInfo[1].metadata["childtype"].values[0];
1057      }
1058    }
1059    browserclass *b = browsers.getbrowser (classifytype);
1060    b->processOID (args, collectproto, logout);
1061      }
1062    }
1063 
1064    // translate "d" and "cl" arguments if required
1065    translate_OIDs (args, collectproto, logout);
1066  }
1067 
1068  // produce the page using the desired action
1069  action *a = actions.getaction (args["a"]);
1070  if (a != NULL) {
1071    if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1072    if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1073      return false;
1074  } else {
1075    // the action was not found!!
1076    outconvertclass text_t2ascii;
1077
1078    logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1079       << args["a"] << "\" could not be found.\n";
1080   
1081    contentout << (*outconverter)
1082           << "<html>\n"
1083           << "<head>\n"
1084           << "<title>Error</title>\n"
1085           << "</head>\n"
1086           << "<body>\n"
1087           << "<h2>Oops!</h2>\n"
1088           << "Undefined Page. The action \""
1089           << args["a"] << "\" could not be found.\n"
1090           << "</body>\n"
1091           << "</html>\n";
1092  }
1093  return true;
1094}
1095
1096
1097// returns the compressed argument ("e") corresponding to the argument
1098// list. This can be used to save preferences between sessions.
1099text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1100  // decide on the output conversion class
1101  text_t &arg_w = args["w"];
1102  rzwsoutconvertclass defaultoutconverter;
1103  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1104  if (outconverter == NULL) outconverter = &defaultoutconverter;
1105  outconverter->reset();
1106
1107  text_t compressed_args;
1108  if (compress_save_args (argsinfo, configinfo.saveconf, args,
1109              compressed_args, *outconverter, logout))
1110    return compressed_args;
1111
1112  return g_EmptyText;
1113}
1114
1115
1116// will read in all the macro files. If one is not found an
1117// error message will be written to logout and the method will
1118// return false.
1119bool receptionist::read_macrofiles (ostream &logout) {
1120  outconvertclass text_t2ascii;
1121
1122  // redirect the error output to logout
1123  ostream *savedlogout = disp.setlogout (&logout);
1124
1125  // unload any macros that were previously loaded - this allows us to call
1126  // this function a second time to reload all the macro files (useful for
1127  // reading in changed macro files in server versions of greenstone)
1128  disp.unloaddefaultmacros();
1129
1130  // load up the default macro files, the collection directory
1131  // is searched first for the file (if this is being used in
1132  // collection specific mode) and then the main directory(s)
1133  text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1134
1135  text_tset maindirs;
1136  text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1137  maindirs.insert (gsdlmacrodir);
1138  colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1139  colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1140  while (colhere != colend) {
1141    if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1142      gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1143      maindirs.insert (gsdlmacrodir);
1144    }
1145    ++colhere;
1146  }
1147
1148  text_tset::iterator arrhere = configinfo.macrofiles.begin();
1149  text_tset::iterator arrend = configinfo.macrofiles.end();
1150  text_t filename;
1151  while (arrhere != arrend) {
1152    bool foundfile = false;
1153
1154    // try in the collection directory if this is being
1155    // run in collection specific mode
1156    if (!configinfo.collection.empty()) {
1157      filename = filename_cat (colmacrodir, *arrhere);
1158      if (file_exists (filename)) {
1159    disp.loaddefaultmacros(filename);
1160    foundfile = true;
1161      }
1162    }
1163
1164    // if we haven't found the macro file yet try in
1165    // the main macro directory(s)
1166    // if file is found in more than one main directory
1167    // we'll load all copies
1168    if (!foundfile) {
1169      text_tset::const_iterator dirhere = maindirs.begin();
1170      text_tset::const_iterator dirend = maindirs.end();
1171      while (dirhere != dirend) {
1172    filename = filename_cat (*dirhere, *arrhere);
1173    if (file_exists (filename)) {
1174      disp.loaddefaultmacros(filename);
1175      foundfile = true;
1176    }
1177    ++dirhere;
1178      }
1179    }
1180
1181    // see if we found the file or not
1182    if (!foundfile) {
1183      logout << text_t2ascii
1184         << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1185      if (configinfo.collection.empty()) {
1186    text_t dirs;
1187    joinchar (maindirs, ", ", dirs);
1188    logout << text_t2ascii
1189           << "It should be in either of the following directories ("
1190           << dirs << ").\n\n";
1191
1192      } else {
1193    logout << text_t2ascii
1194           << "It should be in either " << colmacrodir << " or in "
1195           << gsdlmacrodir << ".\n\n";
1196      }
1197      // don't crap out if a macro file is missing
1198      //disp.setlogout (savedlogout);
1199      //return false;
1200    }
1201    ++arrhere;
1202  }
1203
1204  // success
1205
1206  // reset logout to what it was
1207  disp.setlogout (savedlogout);
1208  return true;
1209}
1210
1211
1212
1213
1214// Go through the list of macro files looking to see
1215// if any exist in the collectoin specific area.  If they
1216// do then read them in and add them to the set of existing
1217// current macros
1218
1219void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1220{
1221  outconvertclass text_t2ascii;
1222
1223  // disp.unloadcollectionmacros();
1224
1225  // redirect the error output to logout
1226  ostream *savedlogout = disp.setlogout (&logout);
1227
1228  text_t colmacrodir
1229    = filename_cat (configinfo.collecthome,collection, "macros");
1230
1231  if (directory_exists (colmacrodir)) {
1232
1233    text_tset::iterator arrhere = configinfo.macrofiles.begin();
1234    text_tset::iterator arrend = configinfo.macrofiles.end();
1235    text_t filename;
1236    while (arrhere != arrend) {
1237
1238      filename = filename_cat (colmacrodir, *arrhere);
1239      if (file_exists (filename)) {
1240    disp.loadcollectionmacros(filename);
1241      }
1242     
1243      ++arrhere;
1244    }
1245  }
1246
1247  // reset logout to what it was
1248  disp.setlogout (savedlogout);
1249}
1250
1251
1252
1253
1254// check_mainargs will check all the main arguments. If a major
1255// error is found it will return false and no cgi page should
1256// be created using the arguments.
1257
1258bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1259   
1260   if(configinfo.site_auth)
1261      {
1262     args["uan"] = "1";
1263     args["ug"] = configinfo.site_group;
1264      }
1265   
1266   
1267   // if this receptionist is running in collection dependant mode
1268   // then it should always set the collection argument to the
1269   // collection
1270   if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1271   
1272   // if current collection uses ccscols make sure
1273   // "ccs" argument is set and make "cc" default to
1274   // all collections in "ccs"
1275   if (args["a"] != "config" && !args["c"].empty()) {
1276     
1277      text_t &arg_c = args["c"];
1278      recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1279      if (collectproto == NULL) {
1280     // oops, this collection isn't valid
1281     outconvertclass text_t2ascii;
1282     logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1283     // args["c"].clear();
1284     
1285      } else {
1286     
1287     ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1288     
1289     if(cinfo->authenticate == "collection")
1290        {
1291           args["uan"] = "1";
1292           args["ug"] = cinfo->auth_group;
1293        }
1294     
1295     
1296      if (cinfo != NULL) {
1297     if (!cinfo->ccsCols.empty()) {
1298        args["ccs"] = 1;
1299        if (args["cc"].empty()) {
1300           text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1301           text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1302           bool first = true;
1303           while (col_here != col_end) {
1304          // make sure it's a valid collection
1305          if (protocols.getrecptproto (*col_here, logout) != NULL) {
1306             if (!first) args["cc"].push_back (',');
1307             args["cc"] += *col_here;
1308             first = false;
1309          }
1310          ++col_here;
1311        }
1312      }
1313    }
1314      } else {
1315    logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1316      }
1317    }
1318  }
1319
1320  // argument "v" can only be 0 or 1. Use the default value
1321  // if it is out of range
1322  int arg_v = args.getintarg ("v");
1323  if (arg_v != 0 && arg_v != 1) {
1324    cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1325    if (vinfo != NULL) args["v"] = vinfo->argdefault;
1326  }
1327
1328  // argument "f" can only be 0 or 1. Use the default value
1329  // if it is out of range
1330  int arg_f = args.getintarg ("f");
1331  if (arg_f != 0 && arg_f != 1) {
1332    cgiarginfo *finfo = argsinfo.getarginfo ("f");
1333    if (finfo != NULL) args["f"] = finfo->argdefault;
1334  }
1335
1336  return true;
1337}
1338
1339// translate_OIDs translates the "d" and "cl" arguments to their correct values
1340// if they use the tricky ".fc", ".lc" type syntax.
1341void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1342                   ostream &logout) {
1343
1344  FilterResponse_t response;
1345  FilterRequest_t request;
1346  comerror_t err;
1347  text_t &arg_d = args["d"];
1348  text_t &arg_cl = args["cl"];
1349  text_t &collection = args["c"];
1350 
1351  // do a call to translate OIDs if required
1352  request.filterName = "NullFilter";
1353  request.filterResultOptions = FROID;
1354  if (!arg_d.empty() && needs_translating (arg_d)) {
1355    request.docSet.push_back (arg_d);
1356    collectproto->filter (collection, request, response, err, logout);
1357    arg_d = response.docInfo[0].OID;
1358    request.clear();
1359  }
1360  // we'll also check here that the "cl" argument has a "classify" doctype
1361  // (in case ".fc" or ".lc" have screwed up)
1362  if (needs_translating (arg_cl)) {
1363    request.fields.insert ("doctype");
1364    request.docSet.push_back (arg_cl);
1365    request.filterResultOptions = FRmetadata;
1366    collectproto->filter (collection, request, response, err, logout);
1367    // set to original value (without .xx stuff) if doctype isn't "classify"
1368    if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1369      strip_suffix (arg_cl);
1370    else
1371      arg_cl = response.docInfo[0].OID;
1372  }
1373}
1374
1375// prepare_page sets up page parameters, sets display macros
1376// and opens the page ready for output
1377void receptionist::prepare_page (action *a, cgiargsclass &args,
1378                 outconvertclass &outconvert,
1379                 ostream &logout) {
1380  // set up page parameters
1381  text_t pageparams;
1382  bool first = true;
1383
1384  text_tmap::iterator params_here = configinfo.pageparams.begin();
1385  text_tmap::iterator params_end = configinfo.pageparams.end();
1386  while (params_here != params_end) {
1387    // page params are those from main.cfg (eg pageparam v 0) plus
1388    // two defaults set in recptconf.clear() (c="" and l=en)
1389    // This used to check if the current value of the page param
1390    // == the default value, then don't add in it the list
1391    // but if l=en, and there is a macro with [l=en], then it doesn't
1392    // find it.
1393    // so now all page params will go into the list. I assume this will
1394    // mean more attempts to find each macro, but nothing worsee than
1395    // that.  --kjdon
1396    //if (args[(*params_here).first] != (*params_here).second) {
1397      if (first)
1398    first = false;
1399      else
1400    pageparams += ",";
1401
1402      pageparams += (*params_here).first;
1403      pageparams += "=";
1404      pageparams += args[(*params_here).first];
1405      // }
1406   
1407    ++params_here;
1408  }
1409 
1410
1411  // open the page
1412  disp.openpage(pageparams, configinfo.macroprecedence);
1413
1414  disp.unloadcollectionmacros();
1415
1416  text_t collection = args["c"];
1417  if (!collection.empty()) {
1418    read_collection_macrofiles(collection,logout);
1419  }
1420
1421  // define external macros for each action
1422  actionptrmap::iterator actionhere = actions.begin ();
1423  actionptrmap::iterator actionend = actions.end ();
1424
1425  while (actionhere != actionend) {
1426    assert ((*actionhere).second.a != NULL);
1427    if ((*actionhere).second.a != NULL) {
1428      (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1429    }
1430    ++actionhere;
1431  }
1432
1433
1434  // define internal macros for the current action
1435  a->define_internal_macros (disp, args, &protocols, logout);
1436 
1437  // define general macros. the defining of general macros is done here so that
1438  // the last possible version of the cgi arguments are used
1439  define_general_macros (args, outconvert, logout);
1440}
1441
1442
1443void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1444                      ostream &logout) {
1445
1446  text_t &collection = args["c"];
1447
1448  disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1449  disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1450  disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1451  disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1452
1453  // This perhaps should be done with gsdl_getenv() which takes the
1454  // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1455  // additional parameter would need to be passed into here (not sure how
1456  // that would effect any virtual inheritence going on), or else moved
1457  // higher up the calling to chain to, e.g., produce_cgi_page()
1458
1459  char* remote_addr = getenv("REMOTE_ADDR");
1460
1461  if (remote_addr != NULL) {
1462     text_t remote_addr_t(remote_addr);
1463     disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1464  }
1465
1466  char* remote_host = getenv("REMOTE_HOST");
1467  if (remote_host != NULL) {
1468     text_t remote_host_t(remote_host);
1469     disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1470  }
1471  else {
1472     // setting this to "unknown" is easier to deal with in format/macro
1473     // statements, rather than testing for _remoteHost_
1474     disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1475  }
1476
1477
1478  text_t compressedoptions = get_compressed_arg(args, logout);
1479  disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1480  //disp.setmacro ("compressedoptionsUrlsafe", displayclass::defaultpackage, encodeForURL(dm_safe(compressedoptions))); // seems to be unnecessary after testing e=hack or e=hack<collect>... or e=...<collect>hack in a live server
1481
1482  // need a decoded version of compressedoptions for use within forms
1483  // as browsers encode values from forms before sending to server
1484  // (e.g. %25 becomes %2525)
1485  decode_cgi_arg (compressedoptions);
1486  if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1487    // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1488    compressedoptions = to_uni(compressedoptions);
1489  }
1490
1491  text_t dmacrovalue = dm_safe(compressedoptions);
1492  disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dmacrovalue);
1493  disp.setmacro ("decodedcompressedoptionsAttrsafe", displayclass::defaultpackage, encodeForHTMLAttr(dmacrovalue));
1494
1495#if defined (__WIN32__)
1496  disp.setmacro ("win32", displayclass::defaultpackage, "1");
1497#endif
1498
1499  // set _cgiargX_ macros for each cgi argument
1500  cgiargsclass::const_iterator argshere = args.begin();
1501  cgiargsclass::const_iterator argsend = args.end();
1502  while (argshere != argsend) {
1503
1504    text_t macrovalue = (*argshere).second.value; // and stays like that if ((*argshere).first == "hp")
1505
1506    if (((*argshere).first == "q") ||
1507    ((*argshere).first == "qa") ||
1508    ((*argshere).first == "qtt") ||
1509    ((*argshere).first == "qty") ||
1510    ((*argshere).first == "qp") ||
1511    ((*argshere).first == "qpl") ||
1512    ((*argshere).first == "qr") ||
1513    ((*argshere).first == "q2")) {
1514
1515      // need to escape special characters from query string
1516      macrovalue = html_safe(macrovalue);
1517
1518    } else  if ((*argshere).first == "hp") {
1519      if(!isValidURLProtocol(macrovalue)) {
1520    macrovalue = encodeForURL(macrovalue); // URL has invalid protocol like javascript:, so URL encode it
1521      }
1522    }
1523    else {
1524      macrovalue = dm_safe(macrovalue);
1525    }   
1526
1527    // set the default value for the macro
1528    disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, macrovalue);
1529
1530    // set macros for the encoded versions of the same value. Uses the functions in securitytools.h
1531    // https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet
1532
1533    text_t htmlsafe = encodeForHTML(macrovalue);
1534    text_t attrsafe = encodeForHTMLAttr(macrovalue);
1535    text_t urlsafe = encodeForURL(macrovalue);
1536    text_t jssafe = encodeForJavascript(macrovalue); // with default setting will return \\x and \\u for macro files
1537    text_t csssafe = encodeForCSS(macrovalue); // not yet used anywhere, but is available for use in macros
1538    text_t sqlsafe = encodeForSQL(macrovalue);
1539
1540    disp.setmacro ("cgiarg" + (*argshere).first + "Htmlsafe", displayclass::defaultpackage, htmlsafe);   
1541    disp.setmacro ("cgiarg" + (*argshere).first + "Attrsafe", displayclass::defaultpackage, attrsafe);
1542    disp.setmacro ("cgiarg" + (*argshere).first + "Urlsafe", displayclass::defaultpackage, urlsafe);
1543    disp.setmacro ("cgiarg" + (*argshere).first + "Jssafe", displayclass::defaultpackage, jssafe);
1544    disp.setmacro ("cgiarg" + (*argshere).first + "Csssafe", displayclass::defaultpackage, csssafe);
1545    disp.setmacro ("cgiarg" + (*argshere).first + "Sqlsafe", displayclass::defaultpackage, sqlsafe);
1546   
1547
1548    ++argshere;
1549  }
1550
1551  // set collection specific macros
1552  if (!collection.empty()) {
1553    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1554    if (collectproto != NULL) {
1555      FilterResponse_t response;
1556      text_tset metadata;
1557      get_info ("collection", collection, args["l"], metadata, false,
1558        collectproto, response, logout);
1559     
1560      if (!response.docInfo[0].metadata.empty()) {
1561    MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1562    MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1563    while (here != end) {
1564      if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1565          ((*here).first != "hasprevious")) {
1566        // check for args in form name:lang
1567        text_t name = g_EmptyText;
1568        text_t lang = g_EmptyText;
1569        bool colonfound=false;
1570        text_t::const_iterator a = (*here).first.begin();
1571        text_t::const_iterator b = (*here).first.end();
1572        while (a !=b) {
1573          if (*a==':') {
1574        colonfound=true;
1575          }
1576          else {
1577        if (colonfound)
1578          lang.push_back(*a);
1579        else name.push_back(*a);
1580          }
1581          ++a;
1582        }
1583        if (!lang.empty()) {
1584          if (args["l"]==lang) {
1585        disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1586          }
1587        }
1588        else { // the default one
1589          disp.setcollectionmacro(displayclass::defaultpackage, (*here).first,  "", (*here).second.values[0]);
1590        }
1591      }
1592      ++here;
1593    }
1594      }
1595
1596      text_t iconcollection;
1597      disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1598      if (!iconcollection.empty())
1599    {
1600      ColInfoResponse_t cinfo;
1601      comerror_t err;
1602      collectproto->get_collectinfo (collection, cinfo, err, logout);
1603      if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1604        {
1605          // local but with full path
1606          iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1607          disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1608        }
1609    }
1610    }
1611  }
1612 
1613  if (!collection.empty()) {
1614    ColInfoResponse_t cinfo;
1615    comerror_t err;
1616    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1617    if (collectproto != NULL) {
1618      collectproto->get_collectinfo (collection, cinfo, err, logout);
1619
1620
1621      // This part of the code used to use "cinfo.httpprefix" regardless
1622      // of the value it contained.  Since
1623      // this can come back with an empty (in the case of gsdl_mod), the
1624      // URL produced was invalid.
1625      //
1626      // Changed to test for empty first, and use configinfo.httpprefix as
1627      // a "backup"
1628      //
1629      // Point to consider: since configinfo.httpprefix has been offically
1630      // set as "httpprefix" in macros, it seems to make more sense to use
1631      // always use that version and not the cinfo version at all.
1632
1633      text_t httpprefix
1634    = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1635   
1636      text_t httpcollection;
1637      if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1638      httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1639    + encodeForURL(collection);
1640      disp.setmacro ("httpcollection", displayclass::defaultpackage,
1641             httpcollection);
1642     
1643      // as of gsdl 2.53, collect.cfg can specify macros
1644      if (cinfo.collection_macros.size() > 0) {
1645    collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1646    collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1647    while (this_macro != done_macro) {
1648      text_t package = "Global";
1649      text_t macroname = this_macro->first;
1650      // if this macro name is AAA:bbb then extract the package name
1651      text_t::const_iterator thischar, donechar;
1652      thischar = macroname.begin();
1653      donechar = macroname.end();
1654      while (thischar < donechar) {
1655        if (*thischar == ':') {
1656          package = substr(macroname.begin(),thischar);
1657          macroname = substr(thischar+1,donechar);
1658          break;
1659        }
1660        ++thischar;
1661      }
1662
1663      text_tmap params_map = this_macro->second;
1664      text_tmap::const_iterator this_param = params_map.begin();
1665      text_tmap::const_iterator done_param = params_map.end();
1666      while (this_param != done_param) {
1667        disp.setcollectionmacro(package,
1668                    macroname,
1669                    this_param->first,
1670                    this_param->second);
1671        ++this_param;
1672      }
1673     
1674      ++this_macro;
1675    }
1676      } // col macros
1677    } // collectproto != NULL
1678  }
1679
1680}
1681
1682// gets collection info from cache if found or
1683// calls collection server (and updates cache)
1684// returns NULL if there's an error
1685ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1686                              const text_t &collection,
1687                              ostream &logout) {
1688 
1689  // check the cache
1690  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1691  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1692    // found it
1693    return &((*it).second.info);
1694  }
1695
1696  // not cached, get info from collection server
1697  if (collectproto == NULL) {
1698    logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1699    return NULL;
1700  }
1701   
1702  comerror_t err;
1703  if (it == configinfo.collectinfo.end()) {
1704    collectioninfo_t cinfo;
1705    collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1706    if (err != noError) {
1707      outconvertclass text_t2ascii;
1708      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1709         << get_comerror_string (err) << "\"while getting collectinfo\n";
1710      return NULL;
1711    }
1712    cinfo.info_loaded = true;
1713    configinfo.collectinfo[collection] = cinfo;
1714    return &(configinfo.collectinfo[collection].info);
1715  } else {
1716    collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1717    if (err != noError) {
1718      outconvertclass text_t2ascii;
1719      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1720         << get_comerror_string (err) << "\"while getting collectinfo\n";
1721      return NULL;
1722    }
1723    (*it).second.info_loaded = true;
1724    return &((*it).second.info);
1725  }
1726}
1727
1728// removes a collection from the cache so that the next
1729// call to get_collectinfo_ptr() for that collection will
1730// retrieve the collection info from the collection server
1731void receptionist::uncache_collection (const text_t &collection) {
1732
1733  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1734  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1735
1736    (*it).second.info_loaded = false;
1737
1738  }
1739}
1740
1741// Handles an "Encoding" line from a configuration file - note that the
1742// configinfo.encodings map is a bit of a hack (to be fixed when the
1743// configuration files are tidied up).
1744void receptionist::configure_encoding (const text_tarray &cfgline) {
1745
1746  text_t subkey, subvalue, shortname, longname, mapfile;
1747  int multibyte = 0;
1748  text_t::const_iterator cfglinesub_here;
1749  text_tarray::const_iterator cfgline_here = cfgline.begin();
1750  text_tarray::const_iterator cfgline_end = cfgline.end();
1751  while (cfgline_here != cfgline_end) {
1752    if (*cfgline_here == "multibyte") {
1753      multibyte = 1;
1754    } else {
1755      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1756                      (*cfgline_here).end(), '=', subkey);
1757      if (subkey == "shortname") {
1758    shortname = substr (cfglinesub_here, (*cfgline_here).end());
1759      } else if (subkey == "longname") {
1760    longname = substr (cfglinesub_here, (*cfgline_here).end());
1761      } else if (subkey == "map") {
1762    mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1763      }
1764    }
1765    ++cfgline_here;
1766  }
1767  if (!shortname.empty()) {
1768    if (longname.empty()) longname = shortname;
1769
1770    // add the converter
1771    if (shortname == "utf-8") {
1772      utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1773      utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1774      utf8outconvert->set_rzws(1);
1775      add_converter (shortname, utf8inconvert, utf8outconvert);
1776      configinfo.encodings[longname] = shortname;
1777
1778    } else if (shortname == "utf-16be") {
1779      // we use the default input converter as this shouldn't ever be used
1780      // for converting from unicode...
1781      inconvertclass *inconverter = new inconvertclass();
1782      utf16outconvertclass *outconverter = new utf16outconvertclass();
1783      add_converter (shortname, inconverter, outconverter);
1784      configinfo.encodings[longname] = shortname;
1785   
1786    } else if (!mapfile.empty()) {
1787
1788      if (mapfile == "8859_1.ump") {
1789    // iso-8859-1 is a special case as it'll always be supported by the
1790    // standard converter class and therefore doesn't need to use its
1791    // mapping file
1792    inconvertclass *inconvert = new inconvertclass();
1793    rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1794    outconvert->set_rzws(1);
1795    add_converter (shortname, inconvert, outconvert); 
1796    configinfo.encodings[longname] = shortname;
1797
1798      } else {
1799    text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1800    text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1801    if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1802
1803      mapinconvertclass *mapinconvert = new mapinconvertclass();
1804      mapinconvert->setmapfile (to_uc_map, 0x003F);
1805      mapinconvert->set_multibyte (multibyte);
1806      mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1807      mapoutconvert->setmapfile (from_uc_map, 0x3F);
1808      mapoutconvert->set_multibyte (multibyte);
1809      mapoutconvert->set_rzws(1);
1810      add_converter (shortname, mapinconvert, mapoutconvert);
1811      configinfo.encodings[longname] = shortname;
1812    }
1813      }
1814    }
1815  }
1816}
Note: See TracBrowser for help on using the browser.