root/gsdl/trunk/runtime-src/src/recpt/receptionist.cpp @ 18824

Revision 18824, 56.1 KB (checked in by mdewsnip, 11 years ago)

Fixed get_cookie() so that it checks that cookiestring isn't empty before trying to fiddle with it, to prevent crashes on Windows.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT  1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include <assert.h>
39#include <time.h>
40#include <stdio.h> // for open()
41#include <fcntl.h> // for open() flags
42// following 2 are for printing Last-Modified http header.
43#include <sys/stat.h>
44#include <time.h>
45
46#if defined (GSDL_USE_IOS_H)
47#include <fstream.h>
48#else
49#include <fstream>
50#endif
51
52void recptconf::clear () {
53  gsdlhome.clear();
54  collecthome.clear();
55  dbhome.clear();
56  collectinfo.erase(collectinfo.begin(), collectinfo.end());
57  collection.clear();
58  collectdir.clear();
59  httpprefix.clear();
60  httpimg = "/images";
61  gwcgi.clear();
62  macrofiles.erase(macrofiles.begin(), macrofiles.end());
63  saveconf.clear();
64  usecookies = false;
65  logcgiargs = false;
66  LogDateFormat = LocalTime;
67
68  maintainer.clear();
69  MailServer.clear();
70  LogEvents = Disabled;
71  EmailEvents = Disabled;
72  EmailUserEvents = false;
73
74  languages.erase(languages.begin(), languages.end());
75  encodings.erase(encodings.begin(), encodings.end());
76
77  site_auth = false;
78  HomePageType = "images";
79  HomePageCols = 3;
80 
81  // these default page parameters can always be overriden
82  // in the configuration file
83  pageparams.erase(pageparams.begin(), pageparams.end());
84  pageparams["c"] = "";
85  pageparams["l"] = "en";
86
87#ifdef MACROPRECEDENCE
88  macroprecedence = MACROPRECEDENCE;
89#else
90  macroprecedence.clear();
91#endif
92}
93
94void collectioninfo_t::clear () {
95  gsdl_gsdlhome.clear();
96  gsdl_dbhome.clear();
97
98  info_loaded = false;
99  info.clear();
100}
101
102void languageinfo_t::clear () {
103  longname.clear();
104  defaultencoding.clear();
105}
106
107receptionist::receptionist () {
108  // create a list of cgi arguments
109  // this must be done before the configuration
110
111  cgiarginfo ainfo;
112
113  ainfo.shortname = "e";
114  ainfo.longname = "compressed arguments";
115  ainfo.multiplechar = true;
116  ainfo.defaultstatus = cgiarginfo::good;
117  ainfo.argdefault = g_EmptyText;
118  ainfo.savedarginfo = cgiarginfo::mustnot;
119  argsinfo.addarginfo (NULL, ainfo);
120
121  ainfo.shortname = "a";
122  ainfo.longname = "action";
123  ainfo.multiplechar = true;
124  ainfo.defaultstatus = cgiarginfo::none;
125  ainfo.argdefault = g_EmptyText;
126  ainfo.savedarginfo = cgiarginfo::must;
127  argsinfo.addarginfo (NULL, ainfo);
128
129  // w=western
130  ainfo.shortname = "w";
131  ainfo.longname = "encoding";
132  ainfo.multiplechar = true;
133  ainfo.defaultstatus = cgiarginfo::none;
134  ainfo.argdefault = g_EmptyText;
135  ainfo.savedarginfo = cgiarginfo::must;
136  argsinfo.addarginfo (NULL, ainfo);
137 
138  ainfo.shortname = "nw";
139  ainfo.longname = "new encoding";
140  ainfo.multiplechar = true;
141  ainfo.defaultstatus = cgiarginfo::none;
142  ainfo.argdefault = g_EmptyText;
143  ainfo.savedarginfo = cgiarginfo::mustnot;
144  argsinfo.addarginfo (NULL, ainfo);
145 
146  ainfo.shortname = "c";
147  ainfo.longname = "collection";
148  ainfo.multiplechar = true;
149  ainfo.defaultstatus = cgiarginfo::none;
150  ainfo.argdefault = g_EmptyText;
151  ainfo.savedarginfo = cgiarginfo::must;
152  argsinfo.addarginfo (NULL, ainfo);
153 
154  // the interface language name should use the ISO 639
155  // standard
156  ainfo.shortname = "l";
157  ainfo.longname = "interface language";
158  ainfo.multiplechar = true;
159  ainfo.defaultstatus = cgiarginfo::weak;
160  ainfo.argdefault = "en";
161  ainfo.savedarginfo = cgiarginfo::must;
162  argsinfo.addarginfo (NULL, ainfo);
163 
164  ainfo.shortname = "nl";
165  ainfo.longname = "new language";
166  ainfo.multiplechar = false;
167  ainfo.defaultstatus = cgiarginfo::none;
168  ainfo.argdefault = "0";
169  ainfo.savedarginfo = cgiarginfo::mustnot;
170  argsinfo.addarginfo (NULL, ainfo);
171 
172  // the GSDL_UID (cookie)
173  ainfo.shortname = "z";
174  ainfo.longname = "gsdl uid";
175  ainfo.multiplechar = true;
176  ainfo.defaultstatus = cgiarginfo::none;
177  ainfo.argdefault = g_EmptyText;
178  ainfo.savedarginfo = cgiarginfo::mustnot;
179  argsinfo.addarginfo (NULL, ainfo);
180}
181
182
183void receptionist::add_action (action *theaction) {
184  // make sure we have an action to add
185  if (theaction == NULL) return;
186
187  // add this action to the list of actions
188  actions.addaction(theaction);
189 
190  // add the cgi arguments from this action
191  argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
192}
193
194
195void receptionist::add_browser (browserclass *thebrowser) {
196  // make sure we have a browser to add
197  if (thebrowser == NULL) return;
198
199  // add this browser to the list of browsers
200  browsers.addbrowser(thebrowser);
201}
202
203
204void receptionist::setdefaultbrowser (const text_t &browsername) {
205  browsers.setdefaultbrowser (browsername);
206}
207
208
209// configure should be called for each line in the
210// configuration files to configure the receptionist and everything
211// it contains. The configuration should take place after everything
212// has been added but before the initialisation.
213
214void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
215  // configure the receptionist
216
217   
218   
219  if (cfgline.size() >= 1) {
220    cgiarginfo *info = NULL;
221    if (key == "gsdlhome") {
222      configinfo.gsdlhome = cfgline[0];
223      if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
224    }
225    else if (key == "collecthome") configinfo.collecthome = cfgline[0];
226    else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
227    else if (key == "collection") {
228      configinfo.collection = cfgline[0];
229      // also need to set the default arg to this collection
230      if ((info = argsinfo.getarginfo("c")) != NULL) {
231    info->defaultstatus = cgiarginfo::good;
232    info->argdefault = cfgline[0];
233      }
234     
235    }
236    else if (key == "collectdir") configinfo.collectdir = cfgline[0];
237    else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
238    else if (key == "httpimg") configinfo.httpimg = cfgline[0];
239    else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
240    else if (key == "macrofiles") {
241      // want to append to macrofiles (i.e. may be several config files
242      // contributing, maybe from several collections).
243      text_tarray::const_iterator here = cfgline.begin();
244      text_tarray::const_iterator end = cfgline.end();
245      while (here != end) {
246    configinfo.macrofiles.insert (*here);
247    ++here;
248      }
249    }
250    else if (key == "saveconf") configinfo.saveconf = cfgline[0];
251    else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
252    else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
253    else if (key == "maintainer") configinfo.maintainer = cfgline[0];
254    else if (key == "MailServer") configinfo.MailServer = cfgline[0];
255    else if (key == "LogDateFormat") {
256      if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
257      else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
258    }
259    else if (key == "LogEvents") {
260      if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
261      else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
262    }
263    else if (key == "EmailEvents") {
264      if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
265      else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
266    }
267    else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
268    else if (key == "pageparam") {
269      if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
270      else configinfo.pageparams[cfgline[0]] = "";
271    }
272    else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
273    else if (key == "collectinfo") {
274      if (cfgline.size() == 3) {
275    // for backwards compatability with older collections that only use
276    // gsdlhome and dbhome
277    collectioninfo_t cinfo;
278    cinfo.gsdl_gsdlhome = cfgline[1];
279    cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
280    cinfo.gsdl_dbhome = cfgline[2];
281    configinfo.collectinfo[cfgline[0]] = cinfo;
282      }
283      else if (cfgline.size() >= 4) {
284    collectioninfo_t cinfo;
285    cinfo.gsdl_gsdlhome = cfgline[1];
286    cinfo.gsdl_collecthome = cfgline[2];
287    cinfo.gsdl_dbhome = cfgline[3];
288    configinfo.collectinfo[cfgline[0]] = cinfo;
289      }
290    }
291
292    // Read in the value for the site_auth directive either true or false
293    else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
294
295    else if (key == "site_group")
296       joinchar(cfgline,',',configinfo.site_group);
297       
298    else if (key == "SiteFormat") {
299       if (cfgline[0] == "HomePageType") {
300      configinfo.HomePageType = cfgline[1];
301       } else if (cfgline[0] == "HomePageCols") {
302      configinfo.HomePageCols = cfgline[1].getint();
303       }
304    }
305       
306    else if (key == "cgiarg") {
307      // get shortname
308      bool seen_defaultstatus = false;
309      text_t subkey, subvalue;
310      text_t shortname;
311      text_t::const_iterator cfglinesub_here;
312      text_tarray::const_iterator cfgline_here = cfgline.begin();
313      text_tarray::const_iterator cfgline_end = cfgline.end();
314      while (cfgline_here != cfgline_end) {
315    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
316                    (*cfgline_here).end(), '=', subkey);
317    if (subkey == "shortname") {
318      shortname = substr (cfglinesub_here, (*cfgline_here).end());
319    }
320    ++cfgline_here;
321      }
322
323      // if we found the shortname process the line again filling in values
324      if (!shortname.empty()) {
325    cgiarginfo &chinfo = argsinfo[shortname];
326    chinfo.shortname = shortname; // in case this is a new argument
327   
328    cfgline_here = cfgline.begin();
329    while (cfgline_here != cfgline_end) {
330      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
331                      (*cfgline_here).end(), '=', subkey);
332      subvalue = substr (cfglinesub_here, (*cfgline_here).end());
333
334      if (subkey == "longname") chinfo.longname = subvalue;
335      else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
336      else if (subkey == "defaultstatus") {
337        seen_defaultstatus = true;
338        if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
339        else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
340        else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
341        else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
342        else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
343      }
344      else if (subkey == "argdefault") {
345        chinfo.argdefault = subvalue;
346        if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
347      }
348      else if (subkey == "savedarginfo") {
349        if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
350        else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
351        else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
352      }
353     
354      ++cfgline_here;
355    }
356      }
357
358    } else if (key == "Encoding") {
359
360      configure_encoding (cfgline);
361
362    } else if (key == "Language") {
363      text_t subkey, subvalue, shortname;
364      languageinfo_t lang;
365      text_t::const_iterator cfglinesub_here;
366      text_tarray::const_iterator cfgline_here = cfgline.begin();
367      text_tarray::const_iterator cfgline_end = cfgline.end();
368      while (cfgline_here != cfgline_end) {
369    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
370                    (*cfgline_here).end(), '=', subkey);
371    if (subkey == "shortname") {
372      shortname = substr (cfglinesub_here, (*cfgline_here).end());
373    } else if (subkey == "longname") {
374      lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
375    } else if (subkey == "default_encoding") {
376      lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
377    }
378    ++cfgline_here;
379      }
380      if (!shortname.empty()) {
381    if (lang.longname.empty()) lang.longname = shortname;
382    configinfo.languages[shortname] = lang;
383      }
384    }
385  }
386 
387  // configure the actions
388  actionptrmap::iterator actionhere = actions.begin ();
389  actionptrmap::iterator actionend = actions.end ();
390
391  while (actionhere != actionend) {
392    assert ((*actionhere).second.a != NULL);
393    if ((*actionhere).second.a != NULL)
394      (*actionhere).second.a->configure(key, cfgline);
395
396    ++actionhere;
397  }
398
399  // configure the protocols
400  recptprotolistclass::iterator protohere = protocols.begin ();
401  recptprotolistclass::iterator protoend = protocols.end ();
402
403  while (protohere != protoend) {
404    assert ((*protohere).p != NULL);
405    comerror_t err;
406    if ((*protohere).p != NULL)
407      (*protohere).p->configure(key, cfgline, err);
408   
409    ++protohere;
410  }
411
412  // configure the browsers
413  browserptrmap::iterator browserhere = browsers.begin ();
414  browserptrmap::iterator browserend = browsers.end ();
415
416  while (browserhere != browserend) {
417    assert ((*browserhere).second.b != NULL);
418    if ((*browserhere).second.b != NULL)
419      (*browserhere).second.b->configure(key, cfgline);
420   
421    ++browserhere;
422  }
423}
424
425
426void receptionist::configure (const text_t &key, const text_t &value) {
427  text_tarray cfgline;
428  cfgline.push_back (value);
429  configure(key, cfgline);
430}
431
432
433// init should be called after all the actions and protocols have been
434// added to the receptionist and after everything has been configured but
435// before any pages are created.  It returns true on success and false on
436// failure. If false is returned getpage should not be called (without
437// producing meaningless output), instead an error page should be produced
438// by the calling code.
439bool receptionist::init (ostream &logout) {
440
441  // first configure collectdir
442  if (!configinfo.collection.empty()) {
443
444    // collection specific mode
445
446    text_t collectdir = configinfo.gsdlhome;
447
448    if (!configinfo.collectdir.empty()) {
449      // has already been configured
450      collectdir = configinfo.collectdir;
451    } else {
452
453      // decide where collectdir is by searching for collect.cfg
454      // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
455      // then $GSDLHOME/etc/collect.cfg
456      collectdir = filename_cat (configinfo.gsdlhome, "collect");
457      collectdir = filename_cat (collectdir, configinfo.collection);
458      text_t filename = filename_cat (collectdir, "etc");
459      filename = filename_cat (filename, "collect.cfg");
460     
461      if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
462    }
463
464    configure("collectdir", collectdir);
465
466  }
467  else {
468
469    text_t collecthome;
470    if (configinfo.collecthome.empty()) {
471      collecthome = filename_cat(configinfo.gsdlhome,"collect");
472    }
473    else {
474      collecthome = configinfo.collecthome;
475    }
476
477    configure("collecthome", collecthome);
478
479    // for backwards compatability collectdir set to gsdlhome
480    // (possible it could now be removed)
481    configure("collectdir", configinfo.gsdlhome);
482  }
483
484
485  // read in the macro files
486  if (!read_macrofiles (logout)) return false;
487
488  // there must be at least one action defined
489  if (actions.empty()) {
490    logout << "Error: no actions have been added to the receptionist\n";
491    return false;
492  }
493
494  // there must be at least one browser defined
495  if (browsers.empty()) {
496    logout << "Error: no browsers have been added to the receptionist\n";
497    return false;
498  }
499
500  // create a saveconf string if there isn't one already
501  if (configinfo.saveconf.empty())
502    configinfo.saveconf = create_save_conf_str (argsinfo, logout);
503
504  // check the saveconf string
505  if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
506    return false;
507
508  // set a random seed
509  srand (time(NULL));
510
511  // if maintainer email address is something dodgy (for now I'll define
512  // dodgy as being anything that doesn't contain '@') disable EmailEvents
513  // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
514  // in this case but we will as it seems likely that MailServer will also
515  // be screwed up if maintainer is).
516  text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
517  text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
518                             maintainer_end, '@');
519  if (maintainer_here == maintainer_end) {
520    configinfo.EmailEvents = Disabled;
521    configinfo.EmailUserEvents = Disabled;
522  } else {
523    // if MailServer isn't set it should default to mail.maintainer-domain
524    if (configinfo.MailServer.empty()) {
525      configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
526    }
527  }
528
529  // init the actions
530  actionptrmap::iterator actionhere = actions.begin ();
531  actionptrmap::iterator actionend = actions.end ();
532  while (actionhere != actionend) {
533    if (((*actionhere).second.a == NULL) ||
534    !(*actionhere).second.a->init(logout)) return false;
535    ++actionhere;
536  }
537
538  // init the protocols
539  recptprotolistclass::iterator protohere = protocols.begin ();
540  recptprotolistclass::iterator protoend = protocols.end ();
541  while (protohere != protoend) {
542    comerror_t err;   
543    if (((*protohere).p == NULL) ||
544    !(*protohere).p->init(err, logout)) return false;
545    ++protohere;
546  }
547
548  // init the browsers
549  browserptrmap::iterator browserhere = browsers.begin ();
550  browserptrmap::iterator browserend = browsers.end ();
551  while (browserhere != browserend) {
552    if (((*browserhere).second.b == NULL) ||
553    !(*browserhere).second.b->init(logout)) return false;
554    ++browserhere;
555  }
556
557  return true;
558}
559
560// get the default encoding for the given language - if it fails for any
561// reason return ""
562text_t receptionist::get_default_encoding (const text_t &language) {
563 
564  // make sure language is valid
565  if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
566
567  text_t default_encoding = configinfo.languages[language].defaultencoding;
568
569  // make sure the encoding is valid
570  if (converters.find(default_encoding) == converters.end()) {
571    // we don't support the encoding specified as default for this language
572    if (configinfo.encodings.size()==1) {
573      // only 1 encoding specified in main.cfg, so use it
574      return configinfo.encodings.begin()->second;
575    }
576    return "";
577  }
578
579  return default_encoding;
580}
581
582// parse_cgi_args parses cgi arguments into an argument class.
583// This function should be called for each page request. It returns false
584// if there was a major problem with the cgi arguments.
585bool receptionist::parse_cgi_args (const text_t &argstr,
586                   fileupload_tmap &fileuploads,
587                   cgiargsclass &args,
588                   ostream &logout, text_tmap &fcgienv) {
589
590  // get an initial list of cgi arguments
591  args.clear();
592  split_cgi_args (argsinfo, argstr, args);
593
594  // expand the compressed argument (if there was one)
595  if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
596
597  // add the defaults
598  add_default_args (argsinfo, args, logout);
599
600  // add any file upload arguments
601  add_fileupload_args(argsinfo, args, fileuploads, logout);
602
603  // get the cookie
604  if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
605 
606  // if we're changing languages, set the encoding to the default for the new language
607  if (args["nl"] == "1") {
608    args["nw"] = get_default_encoding(args["l"]);
609  }
610
611  // get the input encoding
612  // if encoding isn't set, set it to the default for the current language
613  if ((args.getarg("w") == NULL) || args["w"].empty()) {
614    args["w"] = get_default_encoding(args["l"]);
615  }
616
617  text_t &arg_w = args["w"];
618
619  inconvertclass defaultinconvert;
620  inconvertclass *inconvert = converters.get_inconverter (arg_w);
621  if (inconvert == NULL) inconvert = &defaultinconvert;
622
623  // see if the next page will have a different encoding
624  if (args.getarg("nw") != NULL) arg_w = args["nw"];
625
626  // convert arguments which aren't in unicode to unicode
627  args_tounicode (args, *inconvert);
628
629
630  // decide on the output conversion class (needed for checking the external
631  // cgi arguments)
632  rzwsoutconvertclass defaultoutconverter;
633  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
634  if (outconverter == NULL) outconverter = &defaultoutconverter;
635  outconverter->reset();
636
637  // check the main cgi arguments
638  if (!check_mainargs (args, logout)) return false;
639
640  // check the arguments for the action
641  action *a = actions.getaction (args["a"]);
642  if (a != NULL) {
643    if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
644  } else {
645    // the action was not found!!
646    outconvertclass text_t2ascii;
647    logout << text_t2ascii << "Error: the action \"" << args["a"]
648       << "\" could not be found.\n";
649    return false;
650  }
651
652  // check external cgi arguments for each action
653  actionptrmap::iterator actionhere = actions.begin ();
654  actionptrmap::iterator actionend = actions.end ();
655  while (actionhere != actionend) {
656    assert ((*actionhere).second.a != NULL);
657    if ((*actionhere).second.a != NULL) {
658      if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
659                               configinfo.saveconf, logout))
660    return false;
661    }
662    ++actionhere;
663  }
664
665  // the action might have changed but we will assume that
666  // the cgiargs were checked properly when the change was made
667
668  return true;
669}
670
671// returns true if cookie already existed, false
672// if it was generated
673bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv) {
674
675  text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
676  if (!cookiestring.empty())
677  {
678    text_t::const_iterator end = cookiestring.end(); 
679    text_t::const_iterator here = findchar ((text_t::const_iterator)cookiestring.begin(), end, 'G');
680
681    while (here+9 < end) {
682   
683      if (substr(here, here+8) == "GSDL_UID") {
684    cookie = substr (here+9, findchar (here+9, end, ';'));
685    return true;
686      }
687      ++here;
688      here = findchar (here, end, 'G');
689    }
690  }
691
692  cookie.clear();
693  text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
694  time_t ttime = time(NULL);
695  if (!host.empty()) {
696    cookie += host;
697    cookie.push_back ('-');
698  }
699  cookie += text_t(ttime);
700 
701  return false;
702}
703
704// as above but just tests if cookie exists
705bool receptionist::get_cookie (text_tmap &fcgienv) {
706
707  text_t c = gsdl_getenv("HTTP_COOKIE", fcgienv);
708  if (!c.empty()) {
709    text_t cookiestring = c;
710   
711    text_t::const_iterator end = cookiestring.end(); 
712    text_t::const_iterator here = findchar ((text_t::const_iterator)cookiestring.begin(), end, 'G');
713
714    while (here+9 < end) {
715      if (substr(here, here+8) == "GSDL_UID") return true;
716      ++here;
717      here = findchar (here, end, 'G');
718    }
719  }
720  return false;
721}
722
723bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
724
725  // see if we want to log the cgi arguments
726  if (!configinfo.logcgiargs) return true;
727 
728  text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
729  text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
730  if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
731  text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
732
733  cgiargsclass::const_iterator args_here = args.begin();
734  cgiargsclass::const_iterator args_end = args.end();
735
736  text_t argstr;
737  bool first = true;
738  while (args_here != args_end) {
739    if (!first) argstr += ", ";
740    argstr += (*args_here).first + "=" + (*args_here).second.value;
741    first = false;
742    ++args_here;
743  }
744
745  text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
746
747  text_t logstr = script_name;
748  logstr += " " + host;
749  logstr += " [";
750  if (configinfo.LogDateFormat == UTCTime) {
751    logstr += get_date (false);
752  } else if (configinfo.LogDateFormat == Absolute) {
753    time_t ttime = time(NULL);
754    logstr += ttime;
755  } else {
756    // LocalTime
757    logstr += get_date (true);
758  }
759  logstr += "] (" + argstr + ") \"";
760  logstr += browser;
761  logstr += "\"\n";
762
763  return append_logstr (logfile, logstr, logout);
764}
765
766bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
767                  ostream &logout) {
768
769  utf8outconvertclass text_t2utf8;
770  char *lfile = filename.getcstr();
771
772  int fd = open(lfile, O_WRONLY | O_APPEND);
773 
774  if (fd == -1) {
775    logout << "Error: Couldn't open file " << lfile << "\n";
776    delete []lfile;
777    return false;
778  }
779
780  // lock_val is set to 0 if file is locked successfully
781  int lock_val = 1;
782  GSDL_LOCK_FILE (fd);
783  if (lock_val == 0) {
784    text_t tmp_log_str(logstr); // so we don't pass a const to setinput...
785    text_t2utf8.setinput(&tmp_log_str);
786    char *buffer=new char[logstr.size()];
787    size_t num_chars;
788    convertclass::status_t status;
789    text_t2utf8.convert(buffer, logstr.size(), num_chars, status);
790    // ignore status - assume it is "finished" as buffer is big enough
791    write(fd, buffer, num_chars);
792    GSDL_UNLOCK_FILE (fd);
793    delete []buffer;
794  } else {
795    logout << "Error: Couldn't lock file " << lfile << "\n";
796    close(fd);
797    delete []lfile;
798    return false;
799  }
800
801  close(fd);
802       
803  delete []lfile;
804  return true;
805}
806
807text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
808                   ostream &logout) {
809  text_t outstring;
810  outconvertclass text_t2ascii;
811
812  action *a = actions.getaction (args["a"]);
813  prepare_page (a, args, text_t2ascii, logout);
814  disp.expandstring (displayclass::defaultpackage, astring, outstring);
815  return outstring;
816}
817
818// produce_cgi_page will call get_cgihead_info and
819// produce_content in the appropriate way to output a cgi header and
820// the page content (if needed). If a page could not be created it
821// will return false
822bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
823                     ostream &logout, text_tmap &fcgienv) {
824  outconvertclass text_t2ascii;
825
826  response_t response;
827  text_t response_data;
828
829  // produce cgi header
830  get_cgihead_info (args, response, response_data, logout, fcgienv);
831  if (response == location) {
832    // location response (url may contain macros!!)
833    response_data = expandmacros (response_data, args, logout);
834
835    contentout << text_t2ascii << "Location: " << response_data << "\n\n";
836    contentout << flush;
837
838    return true;
839  } else if (response == content) {
840    // content response
841
842#ifdef GSDL_NOCACHE
843    contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
844    tm *tm_ptr = NULL;
845    time_t t = time(NULL);
846    tm_ptr = gmtime (&t);
847    if (tm_ptr != NULL) {
848      char *timestr = new char[128];
849      strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
850      contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
851      delete []timestr;
852    }
853    contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
854    contentout << "Pragma: no-cache\n"; // HTTP/1.0
855
856#else
857
858    // use the later of build.cfg and collect.cfg modification times
859    // as the Last-Modified: header, for caching values
860    struct stat file_info;
861    time_t latest=0;
862
863    text_t collectname="";
864    collectname=args["c"];
865    if (collectname != "") {
866
867      text_t collecthome;
868      if (!configinfo.collecthome.empty()) {
869    collecthome = configinfo.collecthome;
870      }   
871      else {
872    collecthome=filename_cat(configinfo.gsdlhome,"collect");
873      }
874      text_t collectdir=filename_cat(collecthome,collectname);
875     
876      text_t buildcfg=filename_cat(collectdir,"index");
877      buildcfg=filename_cat(buildcfg,"build.cfg");
878      char *buildcfg_ptr=buildcfg.getcstr();
879      text_t collectcfg=filename_cat(collectdir,"etc");
880      collectcfg=filename_cat(collectcfg,"collect.cfg");
881      char *collectcfg_ptr=collectcfg.getcstr();
882
883      if (stat(buildcfg_ptr, &file_info)) {
884    // we got an error. Currently don't handle error :(
885    //  logout <<
886      } else {
887    latest=file_info.st_mtime;
888      }
889   
890      if (stat(collectcfg_ptr, &file_info)) {
891    // error - unhandled for now
892      } else {
893    if (latest<file_info.st_mtime) latest=file_info.st_mtime;
894      }
895      delete []buildcfg_ptr;
896      delete []collectcfg_ptr;
897
898      if (latest>0) {
899    // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
900    // c library takes care of mem for this string... (has \n at end!!!!)
901    // latest is currently local time, convert to UTC.
902    struct tm* utc_latest;
903    utc_latest=gmtime(&latest);
904    contentout << "Last-Modified: " << asctime(utc_latest);
905      }
906    } // end of collection != ""
907
908#endif
909
910    contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
911  }
912  else if (response == undecided_location) {
913    // Wait until later to output the target location
914    // Used for the "I'm feeling lucky" functionality
915  }
916  else {
917    // unknown response
918    logout << "Error: get_cgihead_info returned an unknown response type.\n";
919    return false;
920  }
921
922  // produce cgi page
923  if (!produce_content (args, contentout, logout)) return false;
924
925  // flush contentout
926  contentout << flush;
927  return true;
928}
929
930
931// get_cgihead_info determines the cgi header information for
932// a set of cgi arguments. If response contains location then
933// response_data contains the redirect address. If reponse
934// contains content then reponse_data contains the content-type.
935// Note that images can now be produced by the receptionist.
936// Note also, alternative for get_cgihead_info below which
937// stores the information in a text_tmap so it is more easily digested
938
939void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
940                     text_t &response_data, ostream &logout,
941                     text_tmap &fcgienv) {
942  outconvertclass text_t2ascii;
943
944  // get the action
945  action *a = actions.getaction (args["a"]);
946  if (a != NULL) {
947    a->get_cgihead_info (args, &protocols, response, response_data, logout);
948
949  } else {
950    // the action was not found!!
951    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
952       << args["a"] << "\" could not be found.\n";
953    response = content;
954    response_data = "text/html";
955  }
956
957  // add the encoding information
958  if (response == content) {
959    if (converters.find(args["w"]) != converters.end()) {
960      response_data += "; charset=" + args["w"];
961    } else {
962      // default to latin 1
963      response_data += "; charset=ISO-8859-1";
964    }
965
966    // add cookie if required
967    if (configinfo.usecookies && !get_cookie(fcgienv))
968      response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
969    + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
970  }
971}
972
973
974// Alternative version of get_cgihead_info, stores fielded infomation
975// in text_tmap rather than concatenated string
976void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
977                     ostream &logout, text_tmap &fcgienv) {
978
979  response_t response;
980  text_t response_data;
981
982  // get the action
983  action *a = actions.getaction (args["a"]);
984  if (a != NULL) {
985    a->get_cgihead_info (args, &protocols, response, response_data, logout);
986
987  } else {
988    // the action was not found!!
989    outconvertclass text_t2ascii;
990    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
991       << args["a"] << "\" could not be found.\n";
992    response = content;
993    response_data = "text/html";
994  }
995
996  if (response == location) {
997    response_data = expandmacros(response_data, args, logout);
998    headers["Location"] = response_data;
999    return;
1000  }
1001
1002  // add the encoding information
1003  if (response == content) {
1004
1005    if (converters.find(args["w"]) != converters.end()) {
1006      headers["content-encoding"] = args["w"];
1007      response_data += "; charset=" + args["w"];
1008    } else {
1009      // default to utf-8
1010      headers["content-encoding"] = "utf-8";
1011      response_data += "; charset=utf-8";
1012    }
1013
1014    headers["content-type"] = response_data;
1015
1016  }
1017
1018}
1019
1020
1021
1022// produce the page content
1023bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1024                    ostream &logout) {
1025
1026  // decide on the output conversion class
1027  text_t &arg_w = args["w"];
1028  rzwsoutconvertclass defaultoutconverter;
1029  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1030  if (outconverter == NULL) outconverter = &defaultoutconverter;
1031  outconverter->reset();
1032
1033  // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1034  if (arg_w=="utf-16be") {
1035    contentout << '\xfe' << '\xff' ;
1036  }
1037
1038  recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1039  if (collectproto != NULL) {
1040    // get browsers to process OID
1041    text_t OID = args["d"];
1042    if (OID.empty()) OID = args["cl"];
1043    if (!OID.empty()) {
1044      text_tset metadata;
1045      text_tarray OIDs;
1046      OIDs.push_back (OID);
1047      if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1048      FilterResponse_t response;
1049      metadata.insert ("childtype");
1050      if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1051    text_t classifytype;
1052    if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1053      classifytype = response.docInfo[0].metadata["childtype"].values[0];
1054    else if (!is_top (OID)) {
1055      // not sure why this is occasionally not set, but it will
1056      // cause a segfault... possibly if built with no_text? jrm21
1057      if (response.docInfo[1].metadata.find("childtype")
1058          == response.docInfo[1].metadata.end()) {
1059        cerr << "receptionist: no childtype element in metadata map!"
1060         << endl;
1061      } else {
1062        if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1063          classifytype = response.docInfo[1].metadata["childtype"].values[0];
1064      }
1065    }
1066    browserclass *b = browsers.getbrowser (classifytype);
1067    b->processOID (args, collectproto, logout);
1068      }
1069    }
1070 
1071    // translate "d" and "cl" arguments if required
1072    translate_OIDs (args, collectproto, logout);
1073  }
1074 
1075  // produce the page using the desired action
1076  action *a = actions.getaction (args["a"]);
1077  if (a != NULL) {
1078    if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1079    if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1080      return false;
1081  } else {
1082    // the action was not found!!
1083    outconvertclass text_t2ascii;
1084
1085    logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1086       << args["a"] << "\" could not be found.\n";
1087   
1088    contentout << (*outconverter)
1089           << "<html>\n"
1090           << "<head>\n"
1091           << "<title>Error</title>\n"
1092           << "</head>\n"
1093           << "<body>\n"
1094           << "<h2>Oops!</h2>\n"
1095           << "Undefined Page. The action \""
1096           << args["a"] << "\" could not be found.\n"
1097           << "</body>\n"
1098           << "</html>\n";
1099  }
1100  return true;
1101}
1102
1103
1104// returns the compressed argument ("e") corresponding to the argument
1105// list. This can be used to save preferences between sessions.
1106text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1107  // decide on the output conversion class
1108  text_t &arg_w = args["w"];
1109  rzwsoutconvertclass defaultoutconverter;
1110  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1111  if (outconverter == NULL) outconverter = &defaultoutconverter;
1112  outconverter->reset();
1113
1114  text_t compressed_args;
1115  if (compress_save_args (argsinfo, configinfo.saveconf, args,
1116              compressed_args, *outconverter, logout))
1117    return compressed_args;
1118
1119  return g_EmptyText;
1120}
1121
1122
1123// will read in all the macro files. If one is not found an
1124// error message will be written to logout and the method will
1125// return false.
1126bool receptionist::read_macrofiles (ostream &logout) {
1127  outconvertclass text_t2ascii;
1128
1129  // redirect the error output to logout
1130  ostream *savedlogout = disp.setlogout (&logout);
1131
1132  // unload any macros that were previously loaded - this allows us to call
1133  // this function a second time to reload all the macro files (useful for
1134  // reading in changed macro files in server versions of greenstone)
1135  disp.unloaddefaultmacros();
1136
1137  // load up the default macro files, the collection directory
1138  // is searched first for the file (if this is being used in
1139  // collection specific mode) and then the main directory(s)
1140  text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1141
1142  text_tset maindirs;
1143  text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1144  maindirs.insert (gsdlmacrodir);
1145  colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1146  colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1147  while (colhere != colend) {
1148    if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1149      gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1150      maindirs.insert (gsdlmacrodir);
1151    }
1152    ++colhere;
1153  }
1154
1155  text_tset::iterator arrhere = configinfo.macrofiles.begin();
1156  text_tset::iterator arrend = configinfo.macrofiles.end();
1157  text_t filename;
1158  while (arrhere != arrend) {
1159    bool foundfile = false;
1160
1161    // try in the collection directory if this is being
1162    // run in collection specific mode
1163    if (!configinfo.collection.empty()) {
1164      filename = filename_cat (colmacrodir, *arrhere);
1165      if (file_exists (filename)) {
1166    disp.loaddefaultmacros(filename);
1167    foundfile = true;
1168      }
1169    }
1170
1171    // if we haven't found the macro file yet try in
1172    // the main macro directory(s)
1173    // if file is found in more than one main directory
1174    // we'll load all copies
1175    if (!foundfile) {
1176      text_tset::const_iterator dirhere = maindirs.begin();
1177      text_tset::const_iterator dirend = maindirs.end();
1178      while (dirhere != dirend) {
1179    filename = filename_cat (*dirhere, *arrhere);
1180    if (file_exists (filename)) {
1181      disp.loaddefaultmacros(filename);
1182      foundfile = true;
1183    }
1184    ++dirhere;
1185      }
1186    }
1187
1188    // see if we found the file or not
1189    if (!foundfile) {
1190      logout << text_t2ascii
1191         << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1192      if (configinfo.collection.empty()) {
1193    text_t dirs;
1194    joinchar (maindirs, ", ", dirs);
1195    logout << text_t2ascii
1196           << "It should be in either of the following directories ("
1197           << dirs << ").\n\n";
1198
1199      } else {
1200    logout << text_t2ascii
1201           << "It should be in either " << colmacrodir << " or in "
1202           << gsdlmacrodir << ".\n\n";
1203      }
1204      // don't crap out if a macro file is missing
1205      //disp.setlogout (savedlogout);
1206      //return false;
1207    }
1208    ++arrhere;
1209  }
1210
1211  // success
1212
1213  // reset logout to what it was
1214  disp.setlogout (savedlogout);
1215  return true;
1216}
1217
1218
1219
1220
1221// Go through the list of macro files looking to see
1222// if any exist in the collectoin specific area.  If they
1223// do then read them in and add them to the set of existing
1224// current macros
1225
1226void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1227{
1228  outconvertclass text_t2ascii;
1229
1230  // disp.unloadcollectionmacros();
1231
1232  // redirect the error output to logout
1233  ostream *savedlogout = disp.setlogout (&logout);
1234
1235  text_t colmacrodir
1236    = filename_cat (configinfo.collecthome,collection, "macros");
1237
1238  if (directory_exists (colmacrodir)) {
1239
1240    text_tset::iterator arrhere = configinfo.macrofiles.begin();
1241    text_tset::iterator arrend = configinfo.macrofiles.end();
1242    text_t filename;
1243    while (arrhere != arrend) {
1244
1245      filename = filename_cat (colmacrodir, *arrhere);
1246      if (file_exists (filename)) {
1247    disp.loadcollectionmacros(filename);
1248      }
1249     
1250      ++arrhere;
1251    }
1252  }
1253
1254  // reset logout to what it was
1255  disp.setlogout (savedlogout);
1256}
1257
1258
1259
1260
1261// check_mainargs will check all the main arguments. If a major
1262// error is found it will return false and no cgi page should
1263// be created using the arguments.
1264
1265bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1266   
1267   if(configinfo.site_auth)
1268      {
1269     args["uan"] = "1";
1270     args["ug"] = configinfo.site_group;
1271      }
1272   
1273   
1274   // if this receptionist is running in collection dependant mode
1275   // then it should always set the collection argument to the
1276   // collection
1277   if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1278   
1279   // if current collection uses ccscols make sure
1280   // "ccs" argument is set and make "cc" default to
1281   // all collections in "ccs"
1282   if (args["a"] != "config" && !args["c"].empty()) {
1283     
1284      text_t &arg_c = args["c"];
1285      recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1286      if (collectproto == NULL) {
1287     // oops, this collection isn't valid
1288     outconvertclass text_t2ascii;
1289     logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1290     // args["c"].clear();
1291     
1292      } else {
1293     
1294     ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1295     
1296     if(cinfo->authenticate == "collection")
1297        {
1298           args["uan"] = "1";
1299           args["ug"] = cinfo->auth_group;
1300        }
1301     
1302     
1303      if (cinfo != NULL) {
1304     if (!cinfo->ccsCols.empty()) {
1305        args["ccs"] = 1;
1306        if (args["cc"].empty()) {
1307           text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1308           text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1309           bool first = true;
1310           while (col_here != col_end) {
1311          // make sure it's a valid collection
1312          if (protocols.getrecptproto (*col_here, logout) != NULL) {
1313             if (!first) args["cc"].push_back (',');
1314             args["cc"] += *col_here;
1315             first = false;
1316          }
1317          ++col_here;
1318        }
1319      }
1320    }
1321      } else {
1322    logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1323      }
1324    }
1325  }
1326
1327  // argument "v" can only be 0 or 1. Use the default value
1328  // if it is out of range
1329  int arg_v = args.getintarg ("v");
1330  if (arg_v != 0 && arg_v != 1) {
1331    cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1332    if (vinfo != NULL) args["v"] = vinfo->argdefault;
1333  }
1334
1335  // argument "f" can only be 0 or 1. Use the default value
1336  // if it is out of range
1337  int arg_f = args.getintarg ("f");
1338  if (arg_f != 0 && arg_f != 1) {
1339    cgiarginfo *finfo = argsinfo.getarginfo ("f");
1340    if (finfo != NULL) args["f"] = finfo->argdefault;
1341  }
1342
1343  return true;
1344}
1345
1346// translate_OIDs translates the "d" and "cl" arguments to their correct values
1347// if they use the tricky ".fc", ".lc" type syntax.
1348void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1349                   ostream &logout) {
1350
1351  FilterResponse_t response;
1352  FilterRequest_t request;
1353  comerror_t err;
1354  text_t &arg_d = args["d"];
1355  text_t &arg_cl = args["cl"];
1356  text_t &collection = args["c"];
1357 
1358  // do a call to translate OIDs if required
1359  request.filterName = "NullFilter";
1360  request.filterResultOptions = FROID;
1361  if (!arg_d.empty() && needs_translating (arg_d)) {
1362    request.docSet.push_back (arg_d);
1363    collectproto->filter (collection, request, response, err, logout);
1364    arg_d = response.docInfo[0].OID;
1365    request.clear();
1366  }
1367  // we'll also check here that the "cl" argument has a "classify" doctype
1368  // (in case ".fc" or ".lc" have screwed up)
1369  if (needs_translating (arg_cl)) {
1370    request.fields.insert ("doctype");
1371    request.docSet.push_back (arg_cl);
1372    request.filterResultOptions = FRmetadata;
1373    collectproto->filter (collection, request, response, err, logout);
1374    // set to original value (without .xx stuff) if doctype isn't "classify"
1375    if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1376      strip_suffix (arg_cl);
1377    else
1378      arg_cl = response.docInfo[0].OID;
1379  }
1380}
1381
1382// prepare_page sets up page parameters, sets display macros
1383// and opens the page ready for output
1384void receptionist::prepare_page (action *a, cgiargsclass &args,
1385                 outconvertclass &outconvert,
1386                 ostream &logout) {
1387  // set up page parameters
1388  text_t pageparams;
1389  bool first = true;
1390
1391  text_tmap::iterator params_here = configinfo.pageparams.begin();
1392  text_tmap::iterator params_end = configinfo.pageparams.end();
1393  while (params_here != params_end) {
1394    // page params are those from main.cfg (eg pageparam v 0) plus
1395    // two defaults set in recptconf.clear() (c="" and l=en)
1396    // This used to check if the current value of the page param
1397    // == the default value, then don't add in it the list
1398    // but if l=en, and there is a macro with [l=en], then it doesn't
1399    // find it.
1400    // so now all page params will go into the list. I assume this will
1401    // mean more attempts to find each macro, but nothing worsee than
1402    // that.  --kjdon
1403    //if (args[(*params_here).first] != (*params_here).second) {
1404      if (first)
1405    first = false;
1406      else
1407    pageparams += ",";
1408
1409      pageparams += (*params_here).first;
1410      pageparams += "=";
1411      pageparams += args[(*params_here).first];
1412      // }
1413   
1414    ++params_here;
1415  }
1416 
1417
1418  // open the page
1419  disp.openpage(pageparams, configinfo.macroprecedence);
1420
1421  disp.unloadcollectionmacros();
1422
1423  text_t collection = args["c"];
1424  if (!collection.empty()) {
1425    read_collection_macrofiles(collection,logout);
1426  }
1427
1428  // define external macros for each action
1429  actionptrmap::iterator actionhere = actions.begin ();
1430  actionptrmap::iterator actionend = actions.end ();
1431
1432  while (actionhere != actionend) {
1433    assert ((*actionhere).second.a != NULL);
1434    if ((*actionhere).second.a != NULL) {
1435      (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1436    }
1437    ++actionhere;
1438  }
1439
1440
1441  // define internal macros for the current action
1442  a->define_internal_macros (disp, args, &protocols, logout);
1443 
1444  // define general macros. the defining of general macros is done here so that
1445  // the last possible version of the cgi arguments are used
1446  define_general_macros (args, outconvert, logout);
1447}
1448
1449void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1450                      ostream &logout) {
1451
1452  text_t &collection = args["c"];
1453
1454  disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1455  disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1456  disp.setmacro ("httpimg", displayclass::defaultpackage, configinfo.httpimg);
1457  disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1458
1459
1460  text_t compressedoptions = get_compressed_arg(args, logout);
1461  disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1462  // need a decoded version of compressedoptions for use within forms
1463  // as browsers encode values from forms before sending to server
1464  // (e.g. %25 becomes %2525)
1465  decode_cgi_arg (compressedoptions);
1466  if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1467    // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1468    compressedoptions = to_uni(compressedoptions);
1469  }
1470  disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1471
1472#if defined (__WIN32__)
1473  disp.setmacro ("win32", displayclass::defaultpackage, "1");
1474#endif
1475
1476  // set _cgiargX_ macros for each cgi argument
1477  cgiargsclass::const_iterator argshere = args.begin();
1478  cgiargsclass::const_iterator argsend = args.end();
1479  while (argshere != argsend) {
1480    if (((*argshere).first == "q") ||
1481    ((*argshere).first == "qa") ||
1482    ((*argshere).first == "qtt") ||
1483    ((*argshere).first == "qty") ||
1484    ((*argshere).first == "qp") ||
1485    ((*argshere).first == "qpl") ||
1486    ((*argshere).first == "qr") ||
1487    ((*argshere).first == "q2"))
1488      // need to escape special characters from query string
1489      disp.setmacro ("cgiarg" + (*argshere).first,
1490             displayclass::defaultpackage, html_safe((*argshere).second.value));
1491    else if ((*argshere).first == "hp") {
1492      disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, (*argshere).second.value);
1493    } else {
1494      disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, dm_safe((*argshere).second.value));
1495    }
1496    ++argshere;
1497  }
1498
1499  // set collection specific macros
1500  if (!collection.empty()) {
1501    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1502    if (collectproto != NULL) {
1503      FilterResponse_t response;
1504      text_tset metadata;
1505      get_info ("collection", collection, args["l"], metadata, false,
1506        collectproto, response, logout);
1507     
1508      if (!response.docInfo[0].metadata.empty()) {
1509    MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1510    MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1511    while (here != end) {
1512      if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1513          ((*here).first != "hasprevious")) {
1514        // check for args in form name:lang
1515        text_t name = g_EmptyText;
1516        text_t lang = g_EmptyText;
1517        bool colonfound=false;
1518        text_t::const_iterator a = (*here).first.begin();
1519        text_t::const_iterator b = (*here).first.end();
1520        while (a !=b) {
1521          if (*a==':') {
1522        colonfound=true;
1523          }
1524          else {
1525        if (colonfound)
1526          lang.push_back(*a);
1527        else name.push_back(*a);
1528          }
1529          ++a;
1530        }
1531        if (!lang.empty()) {
1532          if (args["l"]==lang) {
1533        disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1534          }
1535        }
1536        else { // the default one
1537          disp.setcollectionmacro(displayclass::defaultpackage, (*here).first,  "", (*here).second.values[0]);
1538        }
1539      }
1540      ++here;
1541    }
1542      }
1543
1544      text_t iconcollection;
1545      disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1546      if (!iconcollection.empty())
1547    {
1548      ColInfoResponse_t cinfo;
1549      comerror_t err;
1550      collectproto->get_collectinfo (collection, cinfo, err, logout);
1551      if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1552        {
1553          // local but with full path
1554          iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1555          disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1556        }
1557    }
1558    }
1559  }
1560
1561  if (!collection.empty()) {
1562    ColInfoResponse_t cinfo;
1563    comerror_t err;
1564    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1565    if (collectproto != NULL) {
1566      collectproto->get_collectinfo (collection, cinfo, err, logout);
1567      text_t httpcollection;
1568      if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1569      httpcollection += cinfo.httpdomain + cinfo.httpprefix + "/collect/"
1570    + collection;
1571      disp.setmacro ("httpcollection", displayclass::defaultpackage,
1572             httpcollection);
1573      // as of gsdl 2.53, collect.cfg can specify macros
1574      if (cinfo.collection_macros.size() > 0) {
1575    collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1576    collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1577    while (this_macro != done_macro) {
1578      text_t package = "Global";
1579      text_t macroname = this_macro->first;
1580      // if this macro name is AAA:bbb then extract the package name
1581      text_t::const_iterator thischar, donechar;
1582      thischar = macroname.begin();
1583      donechar = macroname.end();
1584      while (thischar < donechar) {
1585        if (*thischar == ':') {
1586          package = substr(macroname.begin(),thischar);
1587          macroname = substr(thischar+1,donechar);
1588          break;
1589        }
1590        ++thischar;
1591      }
1592
1593      text_tmap params_map = this_macro->second;
1594      text_tmap::const_iterator this_param = params_map.begin();
1595      text_tmap::const_iterator done_param = params_map.end();
1596      while (this_param != done_param) {
1597        disp.setcollectionmacro(package,
1598                    macroname,
1599                    this_param->first,
1600                    this_param->second);
1601        ++this_param;
1602      }
1603     
1604      ++this_macro;
1605    }
1606      } // col macros
1607    } // collectproto != NULL
1608  }
1609
1610}
1611
1612// gets collection info from cache if found or
1613// calls collection server (and updates cache)
1614// returns NULL if there's an error
1615ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1616                              const text_t &collection,
1617                              ostream &logout) {
1618 
1619  // check the cache
1620  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1621  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1622    // found it
1623    return &((*it).second.info);
1624  }
1625
1626  // not cached, get info from collection server
1627  if (collectproto == NULL) {
1628    logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1629    return NULL;
1630  }
1631   
1632  comerror_t err;
1633  if (it == configinfo.collectinfo.end()) {
1634    collectioninfo_t cinfo;
1635    collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1636    if (err != noError) {
1637      outconvertclass text_t2ascii;
1638      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1639         << get_comerror_string (err) << "\"while getting collectinfo\n";
1640      return NULL;
1641    }
1642    cinfo.info_loaded = true;
1643    configinfo.collectinfo[collection] = cinfo;
1644    return &(configinfo.collectinfo[collection].info);
1645  } else {
1646    collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1647    if (err != noError) {
1648      outconvertclass text_t2ascii;
1649      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1650         << get_comerror_string (err) << "\"while getting collectinfo\n";
1651      return NULL;
1652    }
1653    (*it).second.info_loaded = true;
1654    return &((*it).second.info);
1655  }
1656}
1657
1658// removes a collection from the cache so that the next
1659// call to get_collectinfo_ptr() for that collection will
1660// retrieve the collection info from the collection server
1661void receptionist::uncache_collection (const text_t &collection) {
1662
1663  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1664  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1665
1666    (*it).second.info_loaded = false;
1667
1668  }
1669}
1670
1671// Handles an "Encoding" line from a configuration file - note that the
1672// configinfo.encodings map is a bit of a hack (to be fixed when the
1673// configuration files are tidied up).
1674void receptionist::configure_encoding (const text_tarray &cfgline) {
1675
1676  text_t subkey, subvalue, shortname, longname, mapfile;
1677  int multibyte = 0;
1678  text_t::const_iterator cfglinesub_here;
1679  text_tarray::const_iterator cfgline_here = cfgline.begin();
1680  text_tarray::const_iterator cfgline_end = cfgline.end();
1681  while (cfgline_here != cfgline_end) {
1682    if (*cfgline_here == "multibyte") {
1683      multibyte = 1;
1684    } else {
1685      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1686                      (*cfgline_here).end(), '=', subkey);
1687      if (subkey == "shortname") {
1688    shortname = substr (cfglinesub_here, (*cfgline_here).end());
1689      } else if (subkey == "longname") {
1690    longname = substr (cfglinesub_here, (*cfgline_here).end());
1691      } else if (subkey == "map") {
1692    mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1693      }
1694    }
1695    ++cfgline_here;
1696  }
1697  if (!shortname.empty()) {
1698    if (longname.empty()) longname = shortname;
1699
1700    // add the converter
1701    if (shortname == "utf-8") {
1702      utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1703      utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1704      utf8outconvert->set_rzws(1);
1705      add_converter (shortname, utf8inconvert, utf8outconvert);
1706      configinfo.encodings[longname] = shortname;
1707
1708    } else if (shortname == "utf-16be") {
1709      // we use the default input converter as this shouldn't ever be used
1710      // for converting from unicode...
1711      inconvertclass *inconverter = new inconvertclass();
1712      utf16outconvertclass *outconverter = new utf16outconvertclass();
1713      add_converter (shortname, inconverter, outconverter);
1714      configinfo.encodings[longname] = shortname;
1715   
1716    } else if (!mapfile.empty()) {
1717
1718      if (mapfile == "8859_1.ump") {
1719    // iso-8859-1 is a special case as it'll always be supported by the
1720    // standard converter class and therefore doesn't need to use its
1721    // mapping file
1722    inconvertclass *inconvert = new inconvertclass();
1723    rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1724    outconvert->set_rzws(1);
1725    add_converter (shortname, inconvert, outconvert); 
1726    configinfo.encodings[longname] = shortname;
1727
1728      } else {
1729    text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1730    text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1731    if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1732
1733      mapinconvertclass *mapinconvert = new mapinconvertclass();
1734      mapinconvert->setmapfile (to_uc_map, 0x003F);
1735      mapinconvert->set_multibyte (multibyte);
1736      mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1737      mapoutconvert->setmapfile (from_uc_map, 0x3F);
1738      mapoutconvert->set_multibyte (multibyte);
1739      mapoutconvert->set_rzws(1);
1740      add_converter (shortname, mapinconvert, mapoutconvert);
1741      configinfo.encodings[longname] = shortname;
1742    }
1743      }
1744    }
1745  }
1746}
Note: See TracBrowser for help on using the browser.