root/main/trunk/greenstone2/runtime-src/src/recpt/receptionist.cpp @ 24106

Revision 24106, 57.4 KB (checked in by davidb, 9 years ago)

Two new macros set, _remoteAddr_ and _remoteHost_ (if reverse lookup is know). These macros reflect the comparable environment variables that a CGI environment set

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26// following line required to get fstream.filedesc() on darwin (Mac OS X)
27// gcc 2.91 automatically defines this in stream.h
28#define _STREAM_COMPAT  1
29
30#include "receptionist.h"
31#include "recptprototools.h"
32#include "fileutil.h"
33#include "cgiutils.h"
34#include "htmlutils.h"
35#include "gsdltools.h"
36#include "gsdltimes.h"
37#include "OIDtools.h"
38#include <assert.h>
39#include <time.h>
40#include <stdio.h> // for open()
41#include <fcntl.h> // for open() flags
42// following 2 are for printing Last-Modified http header.
43#include <sys/stat.h>
44#include <time.h>
45
46#if defined (GSDL_USE_IOS_H)
47#include <fstream.h>
48#else
49#include <fstream>
50#endif
51
52void recptconf::clear () {
53  gsdlhome.clear();
54  collecthome.clear();
55  dbhome.clear();
56  collectinfo.erase(collectinfo.begin(), collectinfo.end());
57  collection.clear();
58  collectdir.clear();
59  httpprefix.clear();
60  httpweb.clear();
61  gwcgi.clear();
62  macrofiles.erase(macrofiles.begin(), macrofiles.end());
63  saveconf.clear();
64  usecookies = false;
65  logcgiargs = false;
66  LogDateFormat = LocalTime;
67
68  maintainer.clear();
69  MailServer.clear();
70  LogEvents = Disabled;
71  EmailEvents = Disabled;
72  EmailUserEvents = false;
73
74  languages.erase(languages.begin(), languages.end());
75  encodings.erase(encodings.begin(), encodings.end());
76
77  site_auth = false;
78  HomePageType = "images";
79  HomePageCols = 3;
80 
81  // these default page parameters can always be overriden
82  // in the configuration file
83  pageparams.erase(pageparams.begin(), pageparams.end());
84  pageparams["c"] = "";
85  pageparams["l"] = "en";
86
87#ifdef MACROPRECEDENCE
88  macroprecedence = MACROPRECEDENCE;
89#else
90  macroprecedence.clear();
91#endif
92}
93
94
95void collectioninfo_t::clear () {
96  gsdl_gsdlhome.clear();
97  gsdl_dbhome.clear();
98
99  info_loaded = false;
100  info.clear();
101}
102
103void languageinfo_t::clear () {
104  longname.clear();
105  defaultencoding.clear();
106}
107
108receptionist::receptionist () {
109  // create a list of cgi arguments
110  // this must be done before the configuration
111
112  cgiarginfo ainfo;
113
114  ainfo.shortname = "e";
115  ainfo.longname = "compressed arguments";
116  ainfo.multiplechar = true;
117  ainfo.defaultstatus = cgiarginfo::good;
118  ainfo.argdefault = g_EmptyText;
119  ainfo.savedarginfo = cgiarginfo::mustnot;
120  argsinfo.addarginfo (NULL, ainfo);
121
122  ainfo.shortname = "a";
123  ainfo.longname = "action";
124  ainfo.multiplechar = true;
125  ainfo.defaultstatus = cgiarginfo::none;
126  ainfo.argdefault = g_EmptyText;
127  ainfo.savedarginfo = cgiarginfo::must;
128  argsinfo.addarginfo (NULL, ainfo);
129
130  // w=western
131  ainfo.shortname = "w";
132  ainfo.longname = "encoding";
133  ainfo.multiplechar = true;
134  ainfo.defaultstatus = cgiarginfo::none;
135  ainfo.argdefault = g_EmptyText;
136  ainfo.savedarginfo = cgiarginfo::must;
137  argsinfo.addarginfo (NULL, ainfo);
138 
139  ainfo.shortname = "nw";
140  ainfo.longname = "new encoding";
141  ainfo.multiplechar = true;
142  ainfo.defaultstatus = cgiarginfo::none;
143  ainfo.argdefault = g_EmptyText;
144  ainfo.savedarginfo = cgiarginfo::mustnot;
145  argsinfo.addarginfo (NULL, ainfo);
146 
147  ainfo.shortname = "c";
148  ainfo.longname = "collection";
149  ainfo.multiplechar = true;
150  ainfo.defaultstatus = cgiarginfo::none;
151  ainfo.argdefault = g_EmptyText;
152  ainfo.savedarginfo = cgiarginfo::must;
153  argsinfo.addarginfo (NULL, ainfo);
154 
155  // the interface language name should use the ISO 639
156  // standard
157  ainfo.shortname = "l";
158  ainfo.longname = "interface language";
159  ainfo.multiplechar = true;
160  ainfo.defaultstatus = cgiarginfo::weak;
161  ainfo.argdefault = "en";
162  ainfo.savedarginfo = cgiarginfo::must;
163  argsinfo.addarginfo (NULL, ainfo);
164 
165  ainfo.shortname = "nl";
166  ainfo.longname = "new language";
167  ainfo.multiplechar = false;
168  ainfo.defaultstatus = cgiarginfo::none;
169  ainfo.argdefault = "0";
170  ainfo.savedarginfo = cgiarginfo::mustnot;
171  argsinfo.addarginfo (NULL, ainfo);
172 
173  // the GSDL_UID (cookie)
174  ainfo.shortname = "z";
175  ainfo.longname = "gsdl uid";
176  ainfo.multiplechar = true;
177  ainfo.defaultstatus = cgiarginfo::none;
178  ainfo.argdefault = g_EmptyText;
179  ainfo.savedarginfo = cgiarginfo::mustnot;
180  argsinfo.addarginfo (NULL, ainfo);
181}
182
183
184void receptionist::add_action (action *theaction) {
185  // make sure we have an action to add
186  if (theaction == NULL) return;
187
188  // add this action to the list of actions
189  actions.addaction(theaction);
190 
191  // add the cgi arguments from this action
192  argsinfo.addarginfo (NULL, *(theaction->getargsinfo()));
193}
194
195
196void receptionist::add_browser (browserclass *thebrowser) {
197  // make sure we have a browser to add
198  if (thebrowser == NULL) return;
199
200  // add this browser to the list of browsers
201  browsers.addbrowser(thebrowser);
202}
203
204
205void receptionist::setdefaultbrowser (const text_t &browsername) {
206  browsers.setdefaultbrowser (browsername);
207}
208
209
210// configure should be called for each line in the
211// configuration files to configure the receptionist and everything
212// it contains. The configuration should take place after everything
213// has been added but before the initialisation.
214
215void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
216  // configure the receptionist
217
218   
219   
220  if (cfgline.size() >= 1) {
221    cgiarginfo *info = NULL;
222    if (key == "gsdlhome") {
223      configinfo.gsdlhome = cfgline[0];
224      if (configinfo.dbhome.empty()) configinfo.dbhome = cfgline[0];
225    }
226    else if (key == "collecthome") configinfo.collecthome = cfgline[0];
227    else if (key == "gdbmhome") configinfo.dbhome = cfgline[0];
228    else if (key == "collection") {
229      configinfo.collection = cfgline[0];
230      // also need to set the default arg to this collection
231      if ((info = argsinfo.getarginfo("c")) != NULL) {
232    info->defaultstatus = cgiarginfo::good;
233    info->argdefault = cfgline[0];
234      }
235     
236    }
237    else if (key == "collectdir") configinfo.collectdir = cfgline[0];
238    else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
239    else if (key == "httpweb") configinfo.httpweb = cfgline[0];
240    else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
241    else if (key == "macrofiles") {
242      // want to append to macrofiles (i.e. may be several config files
243      // contributing, maybe from several collections).
244      text_tarray::const_iterator here = cfgline.begin();
245      text_tarray::const_iterator end = cfgline.end();
246      while (here != end) {
247    configinfo.macrofiles.insert (*here);
248    ++here;
249      }
250    }
251    else if (key == "saveconf") configinfo.saveconf = cfgline[0];
252    else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
253    else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
254    else if (key == "maintainer") configinfo.maintainer = cfgline[0];
255    else if (key == "MailServer") configinfo.MailServer = cfgline[0];
256    else if (key == "LogDateFormat") {
257      if (cfgline[0] == "UTCTime") configinfo.LogDateFormat = UTCTime;
258      else if (cfgline[0] == "Absolute") configinfo.LogDateFormat = Absolute;
259    }
260    else if (key == "LogEvents") {
261      if (cfgline[0] == "CollectorEvents") configinfo.LogEvents = CollectorEvents;
262      else if (cfgline[0] == "AllEvents") configinfo.LogEvents = AllEvents;
263    }
264    else if (key == "EmailEvents") {
265      if (cfgline[0] == "CollectorEvents") configinfo.EmailEvents = CollectorEvents;
266      else if (cfgline[0] == "AllEvents") configinfo.EmailEvents = AllEvents;
267    }
268    else if (key == "EmailUserEvents") configinfo.EmailUserEvents = (cfgline[0] == "true");
269    else if (key == "pageparam") {
270      if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
271      else configinfo.pageparams[cfgline[0]] = "";
272    }
273    else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
274    else if (key == "collectinfo") {
275      if (cfgline.size() == 3) {
276    // for backwards compatability with older collections that only use
277    // gsdlhome and dbhome
278    collectioninfo_t cinfo;
279    cinfo.gsdl_gsdlhome = cfgline[1];
280    cinfo.gsdl_collecthome = filename_cat(cfgline[1],"collect");
281    cinfo.gsdl_dbhome = cfgline[2];
282    configinfo.collectinfo[cfgline[0]] = cinfo;
283      }
284      else if (cfgline.size() >= 4) {
285    collectioninfo_t cinfo;
286    cinfo.gsdl_gsdlhome = cfgline[1];
287    cinfo.gsdl_collecthome = cfgline[2];
288    cinfo.gsdl_dbhome = cfgline[3];
289    configinfo.collectinfo[cfgline[0]] = cinfo;
290      }
291    }
292
293    // Read in the value for the site_auth directive either true or false
294    else if (key == "site_auth") configinfo.site_auth = (cfgline[0] == "true");
295
296    else if (key == "site_group")
297       joinchar(cfgline,',',configinfo.site_group);
298       
299    else if (key == "SiteFormat") {
300       if (cfgline[0] == "HomePageType") {
301      configinfo.HomePageType = cfgline[1];
302       } else if (cfgline[0] == "HomePageCols") {
303      configinfo.HomePageCols = cfgline[1].getint();
304       }
305    }
306       
307    else if (key == "cgiarg") {
308      // get shortname
309      bool seen_defaultstatus = false;
310      text_t subkey, subvalue;
311      text_t shortname;
312      text_t::const_iterator cfglinesub_here;
313      text_tarray::const_iterator cfgline_here = cfgline.begin();
314      text_tarray::const_iterator cfgline_end = cfgline.end();
315      while (cfgline_here != cfgline_end) {
316    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
317                    (*cfgline_here).end(), '=', subkey);
318    if (subkey == "shortname") {
319      shortname = substr (cfglinesub_here, (*cfgline_here).end());
320    }
321    ++cfgline_here;
322      }
323
324      // if we found the shortname process the line again filling in values
325      if (!shortname.empty()) {
326    cgiarginfo &chinfo = argsinfo[shortname];
327    chinfo.shortname = shortname; // in case this is a new argument
328   
329    cfgline_here = cfgline.begin();
330    while (cfgline_here != cfgline_end) {
331      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
332                      (*cfgline_here).end(), '=', subkey);
333      subvalue = substr (cfglinesub_here, (*cfgline_here).end());
334
335      if (subkey == "longname") chinfo.longname = subvalue;
336      else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
337      else if (subkey == "multiplevalue") chinfo.multiplevalue = (subvalue == "true");
338      else if (subkey == "defaultstatus") {
339        seen_defaultstatus = true;
340        if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
341        else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
342        else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
343        else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
344        else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
345      }
346      else if (subkey == "argdefault") {
347        chinfo.argdefault = subvalue;
348        if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
349      }
350      else if (subkey == "savedarginfo") {
351        if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
352        else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
353        else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
354      }
355     
356      ++cfgline_here;
357    }
358      }
359
360    } else if (key == "Encoding") {
361
362      configure_encoding (cfgline);
363
364    } else if (key == "Language") {
365      text_t subkey, subvalue, shortname;
366      languageinfo_t lang;
367      text_t::const_iterator cfglinesub_here;
368      text_tarray::const_iterator cfgline_here = cfgline.begin();
369      text_tarray::const_iterator cfgline_end = cfgline.end();
370      while (cfgline_here != cfgline_end) {
371    cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
372                    (*cfgline_here).end(), '=', subkey);
373    if (subkey == "shortname") {
374      shortname = substr (cfglinesub_here, (*cfgline_here).end());
375    } else if (subkey == "longname") {
376      lang.longname = substr (cfglinesub_here, (*cfgline_here).end());
377    } else if (subkey == "default_encoding") {
378      lang.defaultencoding = substr (cfglinesub_here, (*cfgline_here).end());
379    }
380    ++cfgline_here;
381      }
382      if (!shortname.empty()) {
383    if (lang.longname.empty()) lang.longname = shortname;
384    configinfo.languages[shortname] = lang;
385      }
386    }
387  }
388 
389  // configure the actions
390  actionptrmap::iterator actionhere = actions.begin ();
391  actionptrmap::iterator actionend = actions.end ();
392
393  while (actionhere != actionend) {
394    assert ((*actionhere).second.a != NULL);
395    if ((*actionhere).second.a != NULL)
396      (*actionhere).second.a->configure(key, cfgline);
397
398    ++actionhere;
399  }
400
401  // configure the protocols
402  recptprotolistclass::iterator protohere = protocols.begin ();
403  recptprotolistclass::iterator protoend = protocols.end ();
404
405  while (protohere != protoend) {
406    assert ((*protohere).p != NULL);
407    comerror_t err;
408    if ((*protohere).p != NULL)
409      (*protohere).p->configure(key, cfgline, err);
410   
411    ++protohere;
412  }
413
414  // configure the browsers
415  browserptrmap::iterator browserhere = browsers.begin ();
416  browserptrmap::iterator browserend = browsers.end ();
417
418  while (browserhere != browserend) {
419    assert ((*browserhere).second.b != NULL);
420    if ((*browserhere).second.b != NULL)
421      (*browserhere).second.b->configure(key, cfgline);
422   
423    ++browserhere;
424  }
425}
426
427
428void receptionist::configure (const text_t &key, const text_t &value) {
429  text_tarray cfgline;
430  cfgline.push_back (value);
431  configure(key, cfgline);
432}
433
434
435// init should be called after all the actions and protocols have been
436// added to the receptionist and after everything has been configured but
437// before any pages are created.  It returns true on success and false on
438// failure. If false is returned getpage should not be called (without
439// producing meaningless output), instead an error page should be produced
440// by the calling code.
441bool receptionist::init (ostream &logout) {
442
443  // first configure collectdir
444  if (!configinfo.collection.empty()) {
445
446    // collection specific mode
447
448    text_t collectdir = configinfo.gsdlhome;
449
450    if (!configinfo.collectdir.empty()) {
451      // has already been configured
452      collectdir = configinfo.collectdir;
453    } else {
454
455      // decide where collectdir is by searching for collect.cfg
456      // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
457      // then $GSDLHOME/etc/collect.cfg
458      collectdir = filename_cat (configinfo.gsdlhome, "collect");
459      collectdir = filename_cat (collectdir, configinfo.collection);
460      text_t filename = filename_cat (collectdir, "etc");
461      filename = filename_cat (filename, "collect.cfg");
462     
463      if (!file_exists(filename)) collectdir = configinfo.gsdlhome;
464    }
465
466    configure("collectdir", collectdir);
467
468  }
469  else {
470
471    text_t collecthome;
472    if (configinfo.collecthome.empty()) {
473      collecthome = filename_cat(configinfo.gsdlhome,"collect");
474    }
475    else {
476      collecthome = configinfo.collecthome;
477    }
478
479    configure("collecthome", collecthome);
480
481    // for backwards compatability collectdir set to gsdlhome
482    // (possible it could now be removed)
483    configure("collectdir", configinfo.gsdlhome);
484  }
485
486
487  // read in the macro files
488  if (!read_macrofiles (logout)) return false;
489
490  // there must be at least one action defined
491  if (actions.empty()) {
492    logout << "Error: no actions have been added to the receptionist\n";
493    return false;
494  }
495
496  // there must be at least one browser defined
497  if (browsers.empty()) {
498    logout << "Error: no browsers have been added to the receptionist\n";
499    return false;
500  }
501
502  // create a saveconf string if there isn't one already
503  if (configinfo.saveconf.empty())
504    configinfo.saveconf = create_save_conf_str (argsinfo, logout);
505
506  // check the saveconf string
507  if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
508    return false;
509
510  // set a random seed
511  srand (time(NULL));
512
513  // if maintainer email address is something dodgy (for now I'll define
514  // dodgy as being anything that doesn't contain '@') disable EmailEvents
515  // and EmailUserEvents (we don't strictly need to disable EmailUserEvents
516  // in this case but we will as it seems likely that MailServer will also
517  // be screwed up if maintainer is).
518  text_t::const_iterator maintainer_end = configinfo.maintainer.end ();
519  text_t::const_iterator maintainer_here = findchar ((text_t::const_iterator)configinfo.maintainer.begin(),
520                             maintainer_end, '@');
521  if (maintainer_here == maintainer_end) {
522    configinfo.EmailEvents = Disabled;
523    configinfo.EmailUserEvents = Disabled;
524  } else {
525    // if MailServer isn't set it should default to mail.maintainer-domain
526    if (configinfo.MailServer.empty()) {
527      configinfo.MailServer = "mail." + substr (maintainer_here+1, maintainer_end);
528    }
529  }
530
531  // init the actions
532  actionptrmap::iterator actionhere = actions.begin ();
533  actionptrmap::iterator actionend = actions.end ();
534  while (actionhere != actionend) {
535    if (((*actionhere).second.a == NULL) ||
536    !(*actionhere).second.a->init(logout)) return false;
537    ++actionhere;
538  }
539
540  // init the protocols
541  recptprotolistclass::iterator protohere = protocols.begin ();
542  recptprotolistclass::iterator protoend = protocols.end ();
543  while (protohere != protoend) {
544    comerror_t err;   
545    if (((*protohere).p == NULL) ||
546    !(*protohere).p->init(err, logout)) return false;
547    ++protohere;
548  }
549
550  // init the browsers
551  browserptrmap::iterator browserhere = browsers.begin ();
552  browserptrmap::iterator browserend = browsers.end ();
553  while (browserhere != browserend) {
554    if (((*browserhere).second.b == NULL) ||
555    !(*browserhere).second.b->init(logout)) return false;
556    ++browserhere;
557  }
558
559  return true;
560}
561
562// get the default encoding for the given language - if it fails for any
563// reason return ""
564text_t receptionist::get_default_encoding (const text_t &language) {
565 
566  // make sure language is valid
567  if (configinfo.languages.find(language) == configinfo.languages.end()) return "";
568
569  text_t default_encoding = configinfo.languages[language].defaultencoding;
570
571  // make sure the encoding is valid
572  if (converters.find(default_encoding) == converters.end()) {
573    // we don't support the encoding specified as default for this language
574    if (configinfo.encodings.size()==1) {
575      // only 1 encoding specified in main.cfg, so use it
576      return configinfo.encodings.begin()->second;
577    }
578    return "";
579  }
580
581  return default_encoding;
582}
583
584// parse_cgi_args parses cgi arguments into an argument class.
585// This function should be called for each page request. It returns false
586// if there was a major problem with the cgi arguments.
587bool receptionist::parse_cgi_args (const text_t &argstr,
588                   fileupload_tmap &fileuploads,
589                   cgiargsclass &args,
590                   ostream &logout, text_tmap &fcgienv) {
591
592  // get an initial list of cgi arguments
593  args.clear();
594  split_cgi_args (argsinfo, argstr, args);
595
596  // expand the compressed argument (if there was one)
597  if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
598
599  // add the defaults
600  add_default_args (argsinfo, args, logout);
601
602  // add any file upload arguments
603  add_fileupload_args(argsinfo, args, fileuploads, logout);
604
605  // get the cookie
606  if (configinfo.usecookies) get_cookie(args["z"], fcgienv);
607 
608  // if we're changing languages, set the encoding to the default for the new language
609  if (args["nl"] == "1") {
610    args["nw"] = get_default_encoding(args["l"]);
611  }
612
613  // get the input encoding
614  // if encoding isn't set, set it to the default for the current language
615  if ((args.getarg("w") == NULL) || args["w"].empty()) {
616    args["w"] = get_default_encoding(args["l"]);
617  }
618
619  text_t &arg_w = args["w"];
620
621  inconvertclass defaultinconvert;
622  inconvertclass *inconvert = converters.get_inconverter (arg_w);
623  if (inconvert == NULL) inconvert = &defaultinconvert;
624
625  // see if the next page will have a different encoding
626  if (args.getarg("nw") != NULL) arg_w = args["nw"];
627
628  // convert arguments which aren't in unicode to unicode
629  args_tounicode (args, *inconvert);
630
631
632  // decide on the output conversion class (needed for checking the external
633  // cgi arguments)
634  rzwsoutconvertclass defaultoutconverter;
635  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
636  if (outconverter == NULL) outconverter = &defaultoutconverter;
637  outconverter->reset();
638
639  // check the main cgi arguments
640  if (!check_mainargs (args, logout)) return false;
641
642  // check the arguments for the action
643  action *a = actions.getaction (args["a"]);
644  if (a != NULL) {
645    if (!a->check_cgiargs (argsinfo, args, &protocols, logout)) return false;
646  } else {
647    // the action was not found!!
648    outconvertclass text_t2ascii;
649    logout << text_t2ascii << "Error: the action \"" << args["a"]
650       << "\" could not be found.\n";
651    return false;
652  }
653
654  // check external cgi arguments for each action
655  actionptrmap::iterator actionhere = actions.begin ();
656  actionptrmap::iterator actionend = actions.end ();
657  while (actionhere != actionend) {
658    assert ((*actionhere).second.a != NULL);
659    if ((*actionhere).second.a != NULL) {
660      if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
661                               configinfo.saveconf, logout))
662    return false;
663    }
664    ++actionhere;
665  }
666
667  // the action might have changed but we will assume that
668  // the cgiargs were checked properly when the change was made
669
670  return true;
671}
672
673
674// Returns true if cookie already existed, false if it was generated
675bool receptionist::get_cookie (text_t &cookie, text_tmap &fcgienv)
676{
677  // See if we can get the GSDL_UID cookie
678  text_t cookiestring = gsdl_getenv ("HTTP_COOKIE", fcgienv);
679  if (!cookiestring.empty()) // This should really be handled by the findword function...
680  {
681    // Check if the cookie contains GSDL_UID
682    text_t gsdl_uid = "GSDL_UID=";
683    text_t::iterator gsdl_uid_start = findword(cookiestring.begin(), cookiestring.end(), gsdl_uid);
684    if (gsdl_uid_start != cookiestring.end())
685    {
686      // Yes, so extract its value
687      cookie = substr(gsdl_uid_start + gsdl_uid.size(), findchar(gsdl_uid_start + gsdl_uid.size(), cookiestring.end(), ';'));
688      return true;
689    }
690  }
691
692  // Generate a new key "[host]-[epoch time]", e.g. test.com-1256764496
693  cookie.clear();
694  text_t host = gsdl_getenv("REMOTE_ADDR", fcgienv);
695  time_t ttime = time(NULL);
696  if (!host.empty())
697  {
698    cookie += host;
699    cookie.push_back ('-');
700  }
701  cookie += text_t(ttime);
702
703  return false;
704}
705
706
707// Same as above but just tests if cookie exists
708bool receptionist::get_cookie (text_tmap &fcgienv)
709{
710  text_t cookie_jar = "";
711  return get_cookie(cookie_jar, fcgienv);
712}
713
714
715bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout, text_tmap &fcgienv) {
716
717  // see if we want to log the cgi arguments
718  if (!configinfo.logcgiargs) return true;
719 
720  text_t host = gsdl_getenv ("REMOTE_HOST", fcgienv);
721  text_t script_name = gsdl_getenv ("SCRIPT_NAME", fcgienv);
722  if (host.empty()) host = gsdl_getenv ("REMOTE_ADDR", fcgienv);
723  text_t browser = gsdl_getenv ("HTTP_USER_AGENT", fcgienv);
724
725  cgiargsclass::const_iterator args_here = args.begin();
726  cgiargsclass::const_iterator args_end = args.end();
727
728  text_t argstr;
729  bool first = true;
730  while (args_here != args_end) {
731    if (!first) argstr += ", ";
732    argstr += (*args_here).first + "=" + (*args_here).second.value;
733    first = false;
734    ++args_here;
735  }
736
737  text_t logfile = filename_cat (configinfo.dbhome, "etc", "usage.txt");
738
739  text_t logstr = script_name;
740  logstr += " " + host;
741  logstr += " [";
742  if (configinfo.LogDateFormat == UTCTime) {
743    logstr += get_date (false);
744  } else if (configinfo.LogDateFormat == Absolute) {
745    time_t ttime = time(NULL);
746    logstr += ttime;
747  } else {
748    // LocalTime
749    logstr += get_date (true);
750  }
751  logstr += "] (" + argstr + ") \"";
752  logstr += browser;
753  logstr += "\"\n";
754
755  return append_logstr (logfile, logstr, logout);
756}
757
758bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
759                  ostream &logout) {
760
761  char *lfile = filename.getcstr();
762
763  int fd = open(lfile, O_WRONLY | O_APPEND);
764 
765  if (fd == -1) {
766    logout << "Error: Couldn't open file " << lfile << "\n";
767    delete []lfile;
768    return false;
769  }
770
771  // lock_val is set to 0 if file is locked successfully
772  int lock_val = 1;
773  GSDL_LOCK_FILE (fd);
774  if (lock_val == 0) {
775    // Write the string out in UTF-8
776    text_t tmp_log_str_utf8 = to_utf8(logstr);
777    char *buffer = tmp_log_str_utf8.getcstr();
778    size_t num_chars = tmp_log_str_utf8.size();
779    write(fd, buffer, num_chars);
780    GSDL_UNLOCK_FILE (fd);
781    delete []buffer;
782  } else {
783    logout << "Error: Couldn't lock file " << lfile << "\n";
784    close(fd);
785    delete []lfile;
786    return false;
787  }
788
789  close(fd);
790       
791  delete []lfile;
792  return true;
793}
794
795text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
796                   ostream &logout) {
797  text_t outstring;
798  outconvertclass text_t2ascii;
799
800  action *a = actions.getaction (args["a"]);
801  if (a != NULL)
802  {
803    prepare_page (a, args, text_t2ascii, logout);
804  }
805  disp.expandstring (displayclass::defaultpackage, astring, outstring);
806  return outstring;
807}
808
809// produce_cgi_page will call get_cgihead_info and
810// produce_content in the appropriate way to output a cgi header and
811// the page content (if needed). If a page could not be created it
812// will return false
813bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
814                     ostream &logout, text_tmap &fcgienv) {
815  outconvertclass text_t2ascii;
816
817  response_t response;
818  text_t response_data;
819
820  // produce cgi header
821  get_cgihead_info (args, response, response_data, logout, fcgienv);
822  if (response == location) {
823    // location response (url may contain macros!!)
824    response_data = expandmacros (response_data, args, logout);
825
826    contentout << text_t2ascii << "Location: " << response_data << "\n\n";
827    contentout << flush;
828
829    return true;
830  } else if (response == content) {
831    // content response
832
833#ifdef GSDL_NOCACHE
834    contentout << "Expires: Mon, 26 Jul 1997 05:00:00 GMT\n"; // date in the past
835    tm *tm_ptr = NULL;
836    time_t t = time(NULL);
837    tm_ptr = gmtime (&t);
838    if (tm_ptr != NULL) {
839      char *timestr = new char[128];
840      strftime (timestr, 128, "%a, %d %b %Y %H:%M:%S", tm_ptr);
841      contentout << "Last-Modified: " << timestr << " GMT\n"; // always modified
842      delete []timestr;
843    }
844    contentout << "Cache-Control: no-cache, must-revalidate\n"; // HTTP/1.1
845    contentout << "Pragma: no-cache\n"; // HTTP/1.0
846
847#else
848
849    // use the later of build.cfg and collect.cfg modification times
850    // as the Last-Modified: header, for caching values
851    struct stat file_info;
852    time_t latest=0;
853
854    text_t collectname="";
855    collectname=args["c"];
856    if (collectname != "") {
857
858      text_t collecthome;
859      if (!configinfo.collecthome.empty()) {
860    collecthome = configinfo.collecthome;
861      }   
862      else {
863    collecthome=filename_cat(configinfo.gsdlhome,"collect");
864      }
865      text_t collectdir=filename_cat(collecthome,collectname);
866     
867      text_t buildcfg=filename_cat(collectdir,"index");
868      buildcfg=filename_cat(buildcfg,"build.cfg");
869      char *buildcfg_ptr=buildcfg.getcstr();
870      text_t collectcfg=filename_cat(collectdir,"etc");
871      collectcfg=filename_cat(collectcfg,"collect.cfg");
872      char *collectcfg_ptr=collectcfg.getcstr();
873
874      if (stat(buildcfg_ptr, &file_info)) {
875    // we got an error. Currently don't handle error :(
876    //  logout <<
877      } else {
878    latest=file_info.st_mtime;
879      }
880   
881      if (stat(collectcfg_ptr, &file_info)) {
882    // error - unhandled for now
883      } else {
884    if (latest<file_info.st_mtime) latest=file_info.st_mtime;
885      }
886      delete []buildcfg_ptr;
887      delete []collectcfg_ptr;
888
889      if (latest>0) {
890    // print out modified time, "DDD, dd MMM YYYY hh:mm:ss" format
891    // c library takes care of mem for this string... (has \n at end!!!!)
892    // latest is currently local time, convert to UTC.
893    struct tm* utc_latest;
894    utc_latest=gmtime(&latest);
895    contentout << "Last-Modified: " << asctime(utc_latest);
896      }
897    } // end of collection != ""
898
899#endif
900
901    contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
902  }
903  else if (response == undecided_location) {
904    // Wait until later to output the target location
905    // Used for the "I'm feeling lucky" functionality
906  }
907  else {
908    // unknown response
909    logout << "Error: get_cgihead_info returned an unknown response type.\n";
910    return false;
911  }
912
913  // produce cgi page
914  if (!produce_content (args, contentout, logout)) return false;
915
916  // flush contentout
917  contentout << flush;
918  return true;
919}
920
921
922// get_cgihead_info determines the cgi header information for
923// a set of cgi arguments. If response contains location then
924// response_data contains the redirect address. If reponse
925// contains content then reponse_data contains the content-type.
926// Note that images can now be produced by the receptionist.
927// Note also, alternative for get_cgihead_info below which
928// stores the information in a text_tmap so it is more easily digested
929
930void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
931                     text_t &response_data, ostream &logout,
932                     text_tmap &fcgienv) {
933  outconvertclass text_t2ascii;
934
935  // get the action
936  action *a = actions.getaction (args["a"]);
937  if (a != NULL) {
938    a->get_cgihead_info (args, &protocols, response, response_data, logout);
939
940  } else {
941    // the action was not found!!
942    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
943       << args["a"] << "\" could not be found.\n";
944    response = content;
945    response_data = "text/html";
946  }
947
948  // add the encoding information
949  if (response == content) {
950    if (converters.find(args["w"]) != converters.end()) {
951      response_data += "; charset=" + args["w"];
952    } else {
953      // default to latin 1
954      response_data += "; charset=ISO-8859-1";
955    }
956
957    // add cookie if required
958    if (configinfo.usecookies && !get_cookie(fcgienv))
959      response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
960    + "; expires=Fri, 25-Dec-2037 00:00:00 GMT";
961  }
962}
963
964
965// Alternative version of get_cgihead_info, stores fielded infomation
966// in text_tmap rather than concatenated string
967void receptionist::get_cgihead_info (cgiargsclass &args, text_tmap &headers,
968                     ostream &logout, text_tmap &fcgienv) {
969
970  response_t response;
971  text_t response_data;
972
973  // get the action
974  action *a = actions.getaction (args["a"]);
975  if (a != NULL) {
976    a->get_cgihead_info (args, &protocols, response, response_data, logout);
977
978  } else {
979    // the action was not found!!
980    outconvertclass text_t2ascii;
981    logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
982       << args["a"] << "\" could not be found.\n";
983    response = content;
984    response_data = "text/html";
985  }
986
987  if (response == location) {
988    response_data = expandmacros(response_data, args, logout);
989    headers["Location"] = response_data;
990    return;
991  }
992
993  // add the encoding information
994  if (response == content) {
995
996    if (converters.find(args["w"]) != converters.end()) {
997      headers["content-encoding"] = args["w"];
998      response_data += "; charset=" + args["w"];
999    } else {
1000      // default to utf-8
1001      headers["content-encoding"] = "utf-8";
1002      response_data += "; charset=utf-8";
1003    }
1004
1005    headers["content-type"] = response_data;
1006
1007  }
1008
1009}
1010
1011
1012
1013// produce the page content
1014bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
1015                    ostream &logout) {
1016
1017  // decide on the output conversion class
1018  text_t &arg_w = args["w"];
1019  rzwsoutconvertclass defaultoutconverter;
1020  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1021  if (outconverter == NULL) outconverter = &defaultoutconverter;
1022  outconverter->reset();
1023
1024  // needed for 16-bit unicode only - big endian marker 0xfeff (RFC 2781)
1025  if (arg_w=="utf-16be") {
1026    contentout << '\xfe' << '\xff' ;
1027  }
1028
1029  recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
1030  if (collectproto != NULL) {
1031    // get browsers to process OID
1032    text_t OID = args["d"];
1033    if (OID.empty()) OID = args["cl"];
1034    if (!OID.empty()) {
1035      text_tset metadata;
1036      text_tarray OIDs;
1037      OIDs.push_back (OID);
1038      if (!is_top(OID)) OIDs.push_back (OID + ".pr");
1039      FilterResponse_t response;
1040      metadata.insert ("childtype");
1041      if (get_info (OIDs, args["c"], args["l"], metadata, false, collectproto, response, logout)) {
1042    text_t classifytype;
1043    if (!response.docInfo[0].metadata["childtype"].values[0].empty())
1044      classifytype = response.docInfo[0].metadata["childtype"].values[0];
1045    else if (!is_top (OID)) {
1046      // not sure why this is occasionally not set, but it will
1047      // cause a segfault... possibly if built with no_text? jrm21
1048      if (response.docInfo[1].metadata.find("childtype")
1049          == response.docInfo[1].metadata.end()) {
1050        cerr << "receptionist: no childtype element in metadata map!"
1051         << endl;
1052      } else {
1053        if (!response.docInfo[1].metadata["childtype"].values[0].empty())
1054          classifytype = response.docInfo[1].metadata["childtype"].values[0];
1055      }
1056    }
1057    browserclass *b = browsers.getbrowser (classifytype);
1058    b->processOID (args, collectproto, logout);
1059      }
1060    }
1061 
1062    // translate "d" and "cl" arguments if required
1063    translate_OIDs (args, collectproto, logout);
1064  }
1065 
1066  // produce the page using the desired action
1067  action *a = actions.getaction (args["a"]);
1068  if (a != NULL) {
1069    if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
1070    if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
1071      return false;
1072  } else {
1073    // the action was not found!!
1074    outconvertclass text_t2ascii;
1075
1076    logout << text_t2ascii << "Error receptionist::produce_content: the action \""
1077       << args["a"] << "\" could not be found.\n";
1078   
1079    contentout << (*outconverter)
1080           << "<html>\n"
1081           << "<head>\n"
1082           << "<title>Error</title>\n"
1083           << "</head>\n"
1084           << "<body>\n"
1085           << "<h2>Oops!</h2>\n"
1086           << "Undefined Page. The action \""
1087           << args["a"] << "\" could not be found.\n"
1088           << "</body>\n"
1089           << "</html>\n";
1090  }
1091  return true;
1092}
1093
1094
1095// returns the compressed argument ("e") corresponding to the argument
1096// list. This can be used to save preferences between sessions.
1097text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
1098  // decide on the output conversion class
1099  text_t &arg_w = args["w"];
1100  rzwsoutconvertclass defaultoutconverter;
1101  rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
1102  if (outconverter == NULL) outconverter = &defaultoutconverter;
1103  outconverter->reset();
1104
1105  text_t compressed_args;
1106  if (compress_save_args (argsinfo, configinfo.saveconf, args,
1107              compressed_args, *outconverter, logout))
1108    return compressed_args;
1109
1110  return g_EmptyText;
1111}
1112
1113
1114// will read in all the macro files. If one is not found an
1115// error message will be written to logout and the method will
1116// return false.
1117bool receptionist::read_macrofiles (ostream &logout) {
1118  outconvertclass text_t2ascii;
1119
1120  // redirect the error output to logout
1121  ostream *savedlogout = disp.setlogout (&logout);
1122
1123  // unload any macros that were previously loaded - this allows us to call
1124  // this function a second time to reload all the macro files (useful for
1125  // reading in changed macro files in server versions of greenstone)
1126  disp.unloaddefaultmacros();
1127
1128  // load up the default macro files, the collection directory
1129  // is searched first for the file (if this is being used in
1130  // collection specific mode) and then the main directory(s)
1131  text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
1132
1133  text_tset maindirs;
1134  text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
1135  maindirs.insert (gsdlmacrodir);
1136  colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
1137  colinfo_tmap::iterator colend = configinfo.collectinfo.end();
1138  while (colhere != colend) {
1139    if (!((*colhere).second.gsdl_gsdlhome).empty()) {
1140      gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
1141      maindirs.insert (gsdlmacrodir);
1142    }
1143    ++colhere;
1144  }
1145
1146  text_tset::iterator arrhere = configinfo.macrofiles.begin();
1147  text_tset::iterator arrend = configinfo.macrofiles.end();
1148  text_t filename;
1149  while (arrhere != arrend) {
1150    bool foundfile = false;
1151
1152    // try in the collection directory if this is being
1153    // run in collection specific mode
1154    if (!configinfo.collection.empty()) {
1155      filename = filename_cat (colmacrodir, *arrhere);
1156      if (file_exists (filename)) {
1157    disp.loaddefaultmacros(filename);
1158    foundfile = true;
1159      }
1160    }
1161
1162    // if we haven't found the macro file yet try in
1163    // the main macro directory(s)
1164    // if file is found in more than one main directory
1165    // we'll load all copies
1166    if (!foundfile) {
1167      text_tset::const_iterator dirhere = maindirs.begin();
1168      text_tset::const_iterator dirend = maindirs.end();
1169      while (dirhere != dirend) {
1170    filename = filename_cat (*dirhere, *arrhere);
1171    if (file_exists (filename)) {
1172      disp.loaddefaultmacros(filename);
1173      foundfile = true;
1174    }
1175    ++dirhere;
1176      }
1177    }
1178
1179    // see if we found the file or not
1180    if (!foundfile) {
1181      logout << text_t2ascii
1182         << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1183      if (configinfo.collection.empty()) {
1184    text_t dirs;
1185    joinchar (maindirs, ", ", dirs);
1186    logout << text_t2ascii
1187           << "It should be in either of the following directories ("
1188           << dirs << ").\n\n";
1189
1190      } else {
1191    logout << text_t2ascii
1192           << "It should be in either " << colmacrodir << " or in "
1193           << gsdlmacrodir << ".\n\n";
1194      }
1195      // don't crap out if a macro file is missing
1196      //disp.setlogout (savedlogout);
1197      //return false;
1198    }
1199    ++arrhere;
1200  }
1201
1202  // success
1203
1204  // reset logout to what it was
1205  disp.setlogout (savedlogout);
1206  return true;
1207}
1208
1209
1210
1211
1212// Go through the list of macro files looking to see
1213// if any exist in the collectoin specific area.  If they
1214// do then read them in and add them to the set of existing
1215// current macros
1216
1217void receptionist::read_collection_macrofiles (const text_t& collection, ostream &logout)
1218{
1219  outconvertclass text_t2ascii;
1220
1221  // disp.unloadcollectionmacros();
1222
1223  // redirect the error output to logout
1224  ostream *savedlogout = disp.setlogout (&logout);
1225
1226  text_t colmacrodir
1227    = filename_cat (configinfo.collecthome,collection, "macros");
1228
1229  if (directory_exists (colmacrodir)) {
1230
1231    text_tset::iterator arrhere = configinfo.macrofiles.begin();
1232    text_tset::iterator arrend = configinfo.macrofiles.end();
1233    text_t filename;
1234    while (arrhere != arrend) {
1235
1236      filename = filename_cat (colmacrodir, *arrhere);
1237      if (file_exists (filename)) {
1238    disp.loadcollectionmacros(filename);
1239      }
1240     
1241      ++arrhere;
1242    }
1243  }
1244
1245  // reset logout to what it was
1246  disp.setlogout (savedlogout);
1247}
1248
1249
1250
1251
1252// check_mainargs will check all the main arguments. If a major
1253// error is found it will return false and no cgi page should
1254// be created using the arguments.
1255
1256bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1257   
1258   if(configinfo.site_auth)
1259      {
1260     args["uan"] = "1";
1261     args["ug"] = configinfo.site_group;
1262      }
1263   
1264   
1265   // if this receptionist is running in collection dependant mode
1266   // then it should always set the collection argument to the
1267   // collection
1268   if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1269   
1270   // if current collection uses ccscols make sure
1271   // "ccs" argument is set and make "cc" default to
1272   // all collections in "ccs"
1273   if (args["a"] != "config" && !args["c"].empty()) {
1274     
1275      text_t &arg_c = args["c"];
1276      recptproto *collectproto = protocols.getrecptproto (arg_c, logout);
1277      if (collectproto == NULL) {
1278     // oops, this collection isn't valid
1279     outconvertclass text_t2ascii;
1280     logout << text_t2ascii << "ERROR: Invalid collection: " << arg_c << "\n";
1281     // args["c"].clear();
1282     
1283      } else {
1284     
1285     ColInfoResponse_t *cinfo = get_collectinfo_ptr (collectproto, arg_c, logout);
1286     
1287     if(cinfo->authenticate == "collection")
1288        {
1289           args["uan"] = "1";
1290           args["ug"] = cinfo->auth_group;
1291        }
1292     
1293     
1294      if (cinfo != NULL) {
1295     if (!cinfo->ccsCols.empty()) {
1296        args["ccs"] = 1;
1297        if (args["cc"].empty()) {
1298           text_tarray::const_iterator col_here = cinfo->ccsCols.begin();
1299           text_tarray::const_iterator col_end = cinfo->ccsCols.end();
1300           bool first = true;
1301           while (col_here != col_end) {
1302          // make sure it's a valid collection
1303          if (protocols.getrecptproto (*col_here, logout) != NULL) {
1304             if (!first) args["cc"].push_back (',');
1305             args["cc"] += *col_here;
1306             first = false;
1307          }
1308          ++col_here;
1309        }
1310      }
1311    }
1312      } else {
1313    logout << "ERROR (receptionist::check_mainargs): get_collectinfo_ptr returned NULL\n";
1314      }
1315    }
1316  }
1317
1318  // argument "v" can only be 0 or 1. Use the default value
1319  // if it is out of range
1320  int arg_v = args.getintarg ("v");
1321  if (arg_v != 0 && arg_v != 1) {
1322    cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1323    if (vinfo != NULL) args["v"] = vinfo->argdefault;
1324  }
1325
1326  // argument "f" can only be 0 or 1. Use the default value
1327  // if it is out of range
1328  int arg_f = args.getintarg ("f");
1329  if (arg_f != 0 && arg_f != 1) {
1330    cgiarginfo *finfo = argsinfo.getarginfo ("f");
1331    if (finfo != NULL) args["f"] = finfo->argdefault;
1332  }
1333
1334  return true;
1335}
1336
1337// translate_OIDs translates the "d" and "cl" arguments to their correct values
1338// if they use the tricky ".fc", ".lc" type syntax.
1339void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1340                   ostream &logout) {
1341
1342  FilterResponse_t response;
1343  FilterRequest_t request;
1344  comerror_t err;
1345  text_t &arg_d = args["d"];
1346  text_t &arg_cl = args["cl"];
1347  text_t &collection = args["c"];
1348 
1349  // do a call to translate OIDs if required
1350  request.filterName = "NullFilter";
1351  request.filterResultOptions = FROID;
1352  if (!arg_d.empty() && needs_translating (arg_d)) {
1353    request.docSet.push_back (arg_d);
1354    collectproto->filter (collection, request, response, err, logout);
1355    arg_d = response.docInfo[0].OID;
1356    request.clear();
1357  }
1358  // we'll also check here that the "cl" argument has a "classify" doctype
1359  // (in case ".fc" or ".lc" have screwed up)
1360  if (needs_translating (arg_cl)) {
1361    request.fields.insert ("doctype");
1362    request.docSet.push_back (arg_cl);
1363    request.filterResultOptions = FRmetadata;
1364    collectproto->filter (collection, request, response, err, logout);
1365    // set to original value (without .xx stuff) if doctype isn't "classify"
1366    if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1367      strip_suffix (arg_cl);
1368    else
1369      arg_cl = response.docInfo[0].OID;
1370  }
1371}
1372
1373// prepare_page sets up page parameters, sets display macros
1374// and opens the page ready for output
1375void receptionist::prepare_page (action *a, cgiargsclass &args,
1376                 outconvertclass &outconvert,
1377                 ostream &logout) {
1378  // set up page parameters
1379  text_t pageparams;
1380  bool first = true;
1381
1382  text_tmap::iterator params_here = configinfo.pageparams.begin();
1383  text_tmap::iterator params_end = configinfo.pageparams.end();
1384  while (params_here != params_end) {
1385    // page params are those from main.cfg (eg pageparam v 0) plus
1386    // two defaults set in recptconf.clear() (c="" and l=en)
1387    // This used to check if the current value of the page param
1388    // == the default value, then don't add in it the list
1389    // but if l=en, and there is a macro with [l=en], then it doesn't
1390    // find it.
1391    // so now all page params will go into the list. I assume this will
1392    // mean more attempts to find each macro, but nothing worsee than
1393    // that.  --kjdon
1394    //if (args[(*params_here).first] != (*params_here).second) {
1395      if (first)
1396    first = false;
1397      else
1398    pageparams += ",";
1399
1400      pageparams += (*params_here).first;
1401      pageparams += "=";
1402      pageparams += args[(*params_here).first];
1403      // }
1404   
1405    ++params_here;
1406  }
1407 
1408
1409  // open the page
1410  disp.openpage(pageparams, configinfo.macroprecedence);
1411
1412  disp.unloadcollectionmacros();
1413
1414  text_t collection = args["c"];
1415  if (!collection.empty()) {
1416    read_collection_macrofiles(collection,logout);
1417  }
1418
1419  // define external macros for each action
1420  actionptrmap::iterator actionhere = actions.begin ();
1421  actionptrmap::iterator actionend = actions.end ();
1422
1423  while (actionhere != actionend) {
1424    assert ((*actionhere).second.a != NULL);
1425    if ((*actionhere).second.a != NULL) {
1426      (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1427    }
1428    ++actionhere;
1429  }
1430
1431
1432  // define internal macros for the current action
1433  a->define_internal_macros (disp, args, &protocols, logout);
1434 
1435  // define general macros. the defining of general macros is done here so that
1436  // the last possible version of the cgi arguments are used
1437  define_general_macros (args, outconvert, logout);
1438}
1439
1440
1441void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1442                      ostream &logout) {
1443
1444  text_t &collection = args["c"];
1445
1446  disp.setmacro ("gsdlhome", displayclass::defaultpackage, dm_safe(configinfo.gsdlhome));
1447  disp.setmacro ("gwcgi", displayclass::defaultpackage, configinfo.gwcgi);
1448  disp.setmacro ("httpweb", displayclass::defaultpackage, configinfo.httpweb);
1449  disp.setmacro ("httpprefix", displayclass::defaultpackage, configinfo.httpprefix);
1450
1451  // This perhaps should be done with gsdl_getenv() which takes the
1452  // 'fcgienv' parameter (for fast-cgi), however if changed to this, this
1453  // additional parameter would need to be passed into here (not sure how
1454  // that would effect any virtual inheritence going on), or else moved
1455  // higher up the calling to chain to, e.g., produce_cgi_page()
1456
1457  char* remote_addr = getenv("REMOTE_ADDR");
1458
1459  if (remote_addr != NULL) {
1460     text_t remote_addr_t(remote_addr);
1461     disp.setmacro ("remoteAddr", displayclass::defaultpackage, remote_addr_t);
1462  }
1463
1464  char* remote_host = getenv("REMOTE_HOST");
1465  if (remote_host != NULL) {
1466     text_t remote_host_t(remote_host);
1467     disp.setmacro ("remoteHost", displayclass::defaultpackage, remote_host_t);
1468  }
1469  else {
1470     // setting this to "unknown" is easier to deal with in format/macro
1471     // statements, rather than testing for _remoteHost_
1472     disp.setmacro ("remoteHost", displayclass::defaultpackage, "unknown");
1473  }
1474
1475
1476  text_t compressedoptions = get_compressed_arg(args, logout);
1477  disp.setmacro ("compressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1478  // need a decoded version of compressedoptions for use within forms
1479  // as browsers encode values from forms before sending to server
1480  // (e.g. %25 becomes %2525)
1481  decode_cgi_arg (compressedoptions);
1482  if (args["w"] == "utf-8") { // if the encoding was utf-8, then compressed options was utf-8, and we need unicode.
1483    // if encoding wasn't utf-8, then compressed opotions may be screwed up, but seems to work for 8 bit encodings?
1484    compressedoptions = to_uni(compressedoptions);
1485  }
1486  disp.setmacro ("decodedcompressedoptions", displayclass::defaultpackage, dm_safe(compressedoptions));
1487
1488#if defined (__WIN32__)
1489  disp.setmacro ("win32", displayclass::defaultpackage, "1");
1490#endif
1491
1492  // set _cgiargX_ macros for each cgi argument
1493  cgiargsclass::const_iterator argshere = args.begin();
1494  cgiargsclass::const_iterator argsend = args.end();
1495  while (argshere != argsend) {
1496    if (((*argshere).first == "q") ||
1497    ((*argshere).first == "qa") ||
1498    ((*argshere).first == "qtt") ||
1499    ((*argshere).first == "qty") ||
1500    ((*argshere).first == "qp") ||
1501    ((*argshere).first == "qpl") ||
1502    ((*argshere).first == "qr") ||
1503    ((*argshere).first == "q2"))
1504      // need to escape special characters from query string
1505      disp.setmacro ("cgiarg" + (*argshere).first,
1506             displayclass::defaultpackage, html_safe((*argshere).second.value));
1507    else if ((*argshere).first == "hp") {
1508      disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, (*argshere).second.value);
1509    } else {
1510      disp.setmacro ("cgiarg" + (*argshere).first, displayclass::defaultpackage, dm_safe((*argshere).second.value));
1511    }
1512    ++argshere;
1513  }
1514
1515  // set collection specific macros
1516  if (!collection.empty()) {
1517    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1518    if (collectproto != NULL) {
1519      FilterResponse_t response;
1520      text_tset metadata;
1521      get_info ("collection", collection, args["l"], metadata, false,
1522        collectproto, response, logout);
1523     
1524      if (!response.docInfo[0].metadata.empty()) {
1525    MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1526    MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1527    while (here != end) {
1528      if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1529          ((*here).first != "hasprevious")) {
1530        // check for args in form name:lang
1531        text_t name = g_EmptyText;
1532        text_t lang = g_EmptyText;
1533        bool colonfound=false;
1534        text_t::const_iterator a = (*here).first.begin();
1535        text_t::const_iterator b = (*here).first.end();
1536        while (a !=b) {
1537          if (*a==':') {
1538        colonfound=true;
1539          }
1540          else {
1541        if (colonfound)
1542          lang.push_back(*a);
1543        else name.push_back(*a);
1544          }
1545          ++a;
1546        }
1547        if (!lang.empty()) {
1548          if (args["l"]==lang) {
1549        disp.setcollectionmacro(displayclass::defaultpackage, name, "", (*here).second.values[0]);
1550          }
1551        }
1552        else { // the default one
1553          disp.setcollectionmacro(displayclass::defaultpackage, (*here).first,  "", (*here).second.values[0]);
1554        }
1555      }
1556      ++here;
1557    }
1558      }
1559
1560      text_t iconcollection;
1561      disp.expandstring (displayclass::defaultpackage, "_iconcollection_", iconcollection);
1562      if (!iconcollection.empty())
1563    {
1564      ColInfoResponse_t cinfo;
1565      comerror_t err;
1566      collectproto->get_collectinfo (collection, cinfo, err, logout);
1567      if (iconcollection[0]=='/' && !cinfo.httpdomain.empty())
1568        {
1569          // local but with full path
1570          iconcollection = "http://" + cinfo.httpdomain + iconcollection;
1571          disp.setmacro("iconcollection", displayclass::defaultpackage, iconcollection);
1572        }
1573    }
1574    }
1575  }
1576 
1577  if (!collection.empty()) {
1578    ColInfoResponse_t cinfo;
1579    comerror_t err;
1580    recptproto *collectproto = protocols.getrecptproto (collection, logout);
1581    if (collectproto != NULL) {
1582      collectproto->get_collectinfo (collection, cinfo, err, logout);
1583
1584
1585      // This part of the code used to use "cinfo.httpprefix" regardless
1586      // of the value it contained.  Since
1587      // this can come back with an empty (in the case of gsdl_mod), the
1588      // URL produced was invalid.
1589      //
1590      // Changed to test for empty first, and use configinfo.httpprefix as
1591      // a "backup"
1592      //
1593      // Point to consider: since configinfo.httpprefix has been offically
1594      // set as "httpprefix" in macros, it seems to make more sense to use
1595      // always use that version and not the cinfo version at all.
1596
1597      text_t httpprefix
1598    = (!cinfo.httpprefix.empty()) ? cinfo.httpprefix : configinfo.httpprefix;
1599   
1600      text_t httpcollection;
1601      if (!cinfo.httpdomain.empty()) httpcollection = "http://";
1602      httpcollection += cinfo.httpdomain + httpprefix + "/collect/"
1603    + collection;
1604      disp.setmacro ("httpcollection", displayclass::defaultpackage,
1605             httpcollection);
1606     
1607      // as of gsdl 2.53, collect.cfg can specify macros
1608      if (cinfo.collection_macros.size() > 0) {
1609    collectionmeta_map::const_iterator this_macro=cinfo.collection_macros.begin();
1610    collectionmeta_map::const_iterator done_macro=cinfo.collection_macros.end();
1611    while (this_macro != done_macro) {
1612      text_t package = "Global";
1613      text_t macroname = this_macro->first;
1614      // if this macro name is AAA:bbb then extract the package name
1615      text_t::const_iterator thischar, donechar;
1616      thischar = macroname.begin();
1617      donechar = macroname.end();
1618      while (thischar < donechar) {
1619        if (*thischar == ':') {
1620          package = substr(macroname.begin(),thischar);
1621          macroname = substr(thischar+1,donechar);
1622          break;
1623        }
1624        ++thischar;
1625      }
1626
1627      text_tmap params_map = this_macro->second;
1628      text_tmap::const_iterator this_param = params_map.begin();
1629      text_tmap::const_iterator done_param = params_map.end();
1630      while (this_param != done_param) {
1631        disp.setcollectionmacro(package,
1632                    macroname,
1633                    this_param->first,
1634                    this_param->second);
1635        ++this_param;
1636      }
1637     
1638      ++this_macro;
1639    }
1640      } // col macros
1641    } // collectproto != NULL
1642  }
1643
1644}
1645
1646// gets collection info from cache if found or
1647// calls collection server (and updates cache)
1648// returns NULL if there's an error
1649ColInfoResponse_t *receptionist::get_collectinfo_ptr (recptproto *collectproto,
1650                              const text_t &collection,
1651                              ostream &logout) {
1652 
1653  // check the cache
1654  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1655  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1656    // found it
1657    return &((*it).second.info);
1658  }
1659
1660  // not cached, get info from collection server
1661  if (collectproto == NULL) {
1662    logout << "ERROR: receptionist::get_collectinfo_ptr passed null collectproto\n";
1663    return NULL;
1664  }
1665   
1666  comerror_t err;
1667  if (it == configinfo.collectinfo.end()) {
1668    collectioninfo_t cinfo;
1669    collectproto->get_collectinfo (collection, cinfo.info, err, logout);
1670    if (err != noError) {
1671      outconvertclass text_t2ascii;
1672      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1673         << get_comerror_string (err) << "\"while getting collectinfo\n";
1674      return NULL;
1675    }
1676    cinfo.info_loaded = true;
1677    configinfo.collectinfo[collection] = cinfo;
1678    return &(configinfo.collectinfo[collection].info);
1679  } else {
1680    collectproto->get_collectinfo (collection, (*it).second.info, err, logout);
1681    if (err != noError) {
1682      outconvertclass text_t2ascii;
1683      logout << text_t2ascii << "ERROR (receptionist::getcollectinfo_ptr): \""
1684         << get_comerror_string (err) << "\"while getting collectinfo\n";
1685      return NULL;
1686    }
1687    (*it).second.info_loaded = true;
1688    return &((*it).second.info);
1689  }
1690}
1691
1692// removes a collection from the cache so that the next
1693// call to get_collectinfo_ptr() for that collection will
1694// retrieve the collection info from the collection server
1695void receptionist::uncache_collection (const text_t &collection) {
1696
1697  colinfo_tmap::iterator it = configinfo.collectinfo.find (collection);
1698  if ((it != configinfo.collectinfo.end()) && ((*it).second.info_loaded)) {
1699
1700    (*it).second.info_loaded = false;
1701
1702  }
1703}
1704
1705// Handles an "Encoding" line from a configuration file - note that the
1706// configinfo.encodings map is a bit of a hack (to be fixed when the
1707// configuration files are tidied up).
1708void receptionist::configure_encoding (const text_tarray &cfgline) {
1709
1710  text_t subkey, subvalue, shortname, longname, mapfile;
1711  int multibyte = 0;
1712  text_t::const_iterator cfglinesub_here;
1713  text_tarray::const_iterator cfgline_here = cfgline.begin();
1714  text_tarray::const_iterator cfgline_end = cfgline.end();
1715  while (cfgline_here != cfgline_end) {
1716    if (*cfgline_here == "multibyte") {
1717      multibyte = 1;
1718    } else {
1719      cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
1720                      (*cfgline_here).end(), '=', subkey);
1721      if (subkey == "shortname") {
1722    shortname = substr (cfglinesub_here, (*cfgline_here).end());
1723      } else if (subkey == "longname") {
1724    longname = substr (cfglinesub_here, (*cfgline_here).end());
1725      } else if (subkey == "map") {
1726    mapfile = substr (cfglinesub_here, (*cfgline_here).end());
1727      }
1728    }
1729    ++cfgline_here;
1730  }
1731  if (!shortname.empty()) {
1732    if (longname.empty()) longname = shortname;
1733
1734    // add the converter
1735    if (shortname == "utf-8") {
1736      utf8inconvertclass *utf8inconvert = new utf8inconvertclass();
1737      utf8outconvertclass *utf8outconvert = new utf8outconvertclass();
1738      utf8outconvert->set_rzws(1);
1739      add_converter (shortname, utf8inconvert, utf8outconvert);
1740      configinfo.encodings[longname] = shortname;
1741
1742    } else if (shortname == "utf-16be") {
1743      // we use the default input converter as this shouldn't ever be used
1744      // for converting from unicode...
1745      inconvertclass *inconverter = new inconvertclass();
1746      utf16outconvertclass *outconverter = new utf16outconvertclass();
1747      add_converter (shortname, inconverter, outconverter);
1748      configinfo.encodings[longname] = shortname;
1749   
1750    } else if (!mapfile.empty()) {
1751
1752      if (mapfile == "8859_1.ump") {
1753    // iso-8859-1 is a special case as it'll always be supported by the
1754    // standard converter class and therefore doesn't need to use its
1755    // mapping file
1756    inconvertclass *inconvert = new inconvertclass();
1757    rzwsoutconvertclass *outconvert = new rzwsoutconvertclass();
1758    outconvert->set_rzws(1);
1759    add_converter (shortname, inconvert, outconvert); 
1760    configinfo.encodings[longname] = shortname;
1761
1762      } else {
1763    text_t to_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "to_uc", mapfile);
1764    text_t from_uc_map = filename_cat(configinfo.gsdlhome, "mappings", "from_uc", mapfile);
1765    if (file_exists(to_uc_map) && file_exists(from_uc_map)) {
1766
1767      mapinconvertclass *mapinconvert = new mapinconvertclass();
1768      mapinconvert->setmapfile (to_uc_map, 0x003F);
1769      mapinconvert->set_multibyte (multibyte);
1770      mapoutconvertclass *mapoutconvert = new mapoutconvertclass();
1771      mapoutconvert->setmapfile (from_uc_map, 0x3F);
1772      mapoutconvert->set_multibyte (multibyte);
1773      mapoutconvert->set_rzws(1);
1774      add_converter (shortname, mapinconvert, mapoutconvert);
1775      configinfo.encodings[longname] = shortname;
1776    }
1777      }
1778    }
1779  }
1780}
Note: See TracBrowser for help on using the browser.