source: trunk/gsdl/src/recpt/receptionist.cpp@ 806

Last change on this file since 806 was 806, checked in by sjboddie, 24 years ago

added arabic encoding

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 39.8 KB
Line 
1/**********************************************************************
2 *
3 * receptionist.cpp -- a web interface for the gsdl
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * $Id: receptionist.cpp 806 1999-12-06 01:14:16Z sjboddie $
25 *
26 *********************************************************************/
27
28/*
29 $Log$
30 Revision 1.43 1999/12/06 01:14:16 sjboddie
31 added arabic encoding
32
33 Revision 1.42 1999/12/05 21:21:04 sjboddie
34 added support for multiple gsdlhomes and gdbmhomes
35
36 Revision 1.41 1999/11/25 21:44:16 sjboddie
37 fixed bug in logout
38
39 Revision 1.40 1999/11/08 20:26:37 sjboddie
40 added multiplevalue option to cgiarginfo
41
42 Revision 1.39 1999/11/03 22:49:10 sjboddie
43 A location url may now contain macros
44
45 Revision 1.38 1999/11/01 21:49:34 sjboddie
46 changes to arguments of many functions
47
48 Revision 1.37 1999/10/20 03:55:03 sjboddie
49 yet another problem with calling browserclass
50 processOID functions correctly
51
52 Revision 1.36 1999/10/19 03:23:44 davidb
53 Collection building support through web pages
54 and internal and external link handling for collection documents
55
56 Revision 1.35 1999/10/18 20:07:05 sjboddie
57 tidied up a few things - moved processing of "hp" argument
58 to htmlbrowserclass
59
60 Revision 1.34 1999/10/14 23:00:52 sjboddie
61 finished changes to browsing support
62
63 Revision 1.33 1999/10/10 08:14:10 sjboddie
64 - metadata now returns mp rather than array
65 - redesigned browsing support (although it's not finished so
66 won't currently work ;-)
67
68 Revision 1.32 1999/09/21 11:28:45 sjboddie
69 tidied up file locking
70
71 Revision 1.31 1999/09/16 21:38:17 sjboddie
72 added some file locking stuff for logging. Windows still needs to
73 be done.
74
75 Revision 1.30 1999/09/07 04:56:58 sjboddie
76 added GPL notice
77
78 Revision 1.29 1999/09/03 10:02:30 rjmcnab
79 Made the page parameters configurable. Now the page parameters must
80 correspond to cgi arguments in name and value (ie language=zh should now
81 be l=zh) which makes things more consistent anyway. Removed a couple of
82 specialised NZDL page parameters.
83
84 Moved the combining of the cgi arguments so that the receptionist does
85 all the configuration now.
86
87 Made the macro precedence configurable.
88
89 Made cgi arguments totally configurable. Now any piece of information about
90 a cgi argument can be configured meaning that cgi arguments can be declared
91 from the configuration file.
92
93 Removed the argdefault configuration argument. This should now be done
94 using cgiarg.
95
96 Revision 1.28 1999/09/03 04:39:46 rjmcnab
97 Made cookies and logs optional (they are turned off by default). To
98 turn them on put
99
100 usecookies true
101 logcgiargs true
102
103 in your configuration file.
104
105 Revision 1.27 1999/09/02 00:27:21 rjmcnab
106 A few small things.
107
108 Revision 1.26 1999/08/25 04:43:06 sjboddie
109 made FilterRequest_t::docSet an array rather than a set
110
111 Revision 1.25 1999/08/20 00:59:01 sjboddie
112 -fixed up location redirection
113 -added some usage logging, also now set a GSDL_UID cookie. Logging
114 does NOT presently lock the log file while it's in use. That has yet
115 to be done.
116
117 Revision 1.24 1999/08/13 04:16:42 sjboddie
118 added some collection-level metadata stuff
119
120 Revision 1.23 1999/08/11 23:28:59 sjboddie
121 added support for html classifier (i.e. the hp argumant now must be
122 translated too).
123
124 Revision 1.22 1999/08/10 22:45:21 sjboddie
125 format option ShowTopPages is now called DocumentTopPages
126
127 Revision 1.21 1999/08/09 04:25:17 sjboddie
128 moved OID translation stuff from documentaction::define_external_macros
129 to receptionist
130
131 Revision 1.20 1999/07/30 02:13:09 sjboddie
132 -added collectinfo argument to some functions
133 -made some function prototypes virtual
134
135 Revision 1.19 1999/07/15 06:02:05 rjmcnab
136 Moved the setting of argsinfo into the constructor. Added the configuration
137 command argdefault (as used by the actions). Added code to output the
138 correct charset based on the page encoding so that the user does not need
139 to specify the encoding used for a particular page.
140
141 Revision 1.18 1999/07/11 01:05:20 rjmcnab
142 Stored origin of cgiarg with argument.
143
144 Revision 1.17 1999/07/10 22:18:26 rjmcnab
145 Added calls to define_external_cgiargs.
146
147 Revision 1.16 1999/06/27 21:49:03 sjboddie
148 fixed a couple of version conflicts - tidied up some small things
149
150 Revision 1.15 1999/06/26 01:14:32 rjmcnab
151 Made a couple of changes to handle different encodings.
152
153 Revision 1.14 1999/06/09 00:08:36 sjboddie
154 query string macro (_cgiargq_) is now made html safe before being set
155
156 Revision 1.13 1999/06/08 04:29:31 sjboddie
157 added argsinfo to the call to check_cgiargs to make it easy to set
158 args to their default if they're found to be screwed up
159
160 Revision 1.12 1999/04/30 01:59:42 sjboddie
161 lots of stuff - getting documentaction working (documentaction replaces
162 old browseaction)
163
164 Revision 1.11 1999/03/25 03:06:43 sjboddie
165
166 altered receptionist slightly so it now passes *collectproto to
167 define_internal_macros and define_external_macros - need it
168 for browseaction
169
170 Revision 1.10 1999/03/05 03:53:54 sjboddie
171
172 fixed some bugs
173
174 Revision 1.9 1999/02/28 20:00:16 rjmcnab
175
176
177 Fixed a few things.
178
179 Revision 1.8 1999/02/25 21:58:59 rjmcnab
180
181 Merged sources.
182
183 Revision 1.7 1999/02/21 22:33:55 rjmcnab
184
185 Lots of stuff :-)
186
187 Revision 1.6 1999/02/11 01:24:05 rjmcnab
188
189 Fixed a few compiler warnings.
190
191 Revision 1.5 1999/02/08 01:28:02 rjmcnab
192
193 Got the receptionist producing something using the statusaction.
194
195 Revision 1.4 1999/02/05 10:42:46 rjmcnab
196
197 Continued working on receptionist
198
199 Revision 1.3 1999/02/04 10:00:56 rjmcnab
200
201 Developed the idea of an "action" and having them define the cgi arguments
202 which they need and how those cgi arguments function.
203
204 Revision 1.2 1999/02/04 01:17:27 rjmcnab
205
206 Got it outputing something.
207
208
209 */
210
211
212#include "receptionist.h"
213#include "fileutil.h"
214#include "cgiutils.h"
215#include "htmlutils.h"
216#include "OIDtools.h"
217#include <assert.h>
218#include <time.h>
219#include <stdio.h>
220#include <fstream.h>
221
222#if defined (__WIN32_)
223#include "wincgiutils.h"
224#endif
225
226void recptconf::clear () {
227 gsdlhome.clear();
228 gdbmhome.clear();
229 collectinfo.erase(collectinfo.begin(), collectinfo.end());
230 collection.clear();
231 collectdir.clear();
232 httpprefix.clear();
233 httpimg.clear();
234 gwcgi.clear();
235 macrofiles.erase(macrofiles.begin(), macrofiles.end());
236 saveconf.clear();
237 usecookies = false;
238 logcgiargs = false;
239
240 // these default page parameters can always be overriden
241 // in the configuration file
242 pageparams.erase(pageparams.begin(), pageparams.end());
243 pageparams["c"] = "";
244 pageparams["l"] = "en";
245
246#ifdef MACROPRECEDENCE
247 macroprecedence = MACROPRECEDENCE;
248#else
249 macroprecedence.clear();
250#endif
251}
252
253
254
255receptionist::receptionist () {
256 // create a list of cgi arguments
257 // this must be done before the configuration
258
259 cgiarginfo ainfo;
260
261 ainfo.shortname = "e";
262 ainfo.longname = "compressed arguments";
263 ainfo.multiplechar = true;
264 ainfo.defaultstatus = cgiarginfo::good;
265 ainfo.argdefault = "";
266 ainfo.savedarginfo = cgiarginfo::mustnot;
267 argsinfo.addarginfo (NULL, ainfo);
268
269 ainfo.shortname = "a";
270 ainfo.longname = "action";
271 ainfo.multiplechar = true;
272 ainfo.defaultstatus = cgiarginfo::none;
273 ainfo.argdefault = "";
274 ainfo.savedarginfo = cgiarginfo::must;
275 argsinfo.addarginfo (NULL, ainfo);
276
277 // w=western
278 ainfo.shortname = "w";
279 ainfo.longname = "encoding";
280 ainfo.multiplechar = true;
281 ainfo.defaultstatus = cgiarginfo::weak;
282 ainfo.argdefault = "w";
283 ainfo.savedarginfo = cgiarginfo::must;
284 argsinfo.addarginfo (NULL, ainfo);
285
286 ainfo.shortname = "nw";
287 ainfo.longname = "new encoding";
288 ainfo.multiplechar = true;
289 ainfo.defaultstatus = cgiarginfo::none;
290 ainfo.argdefault = "";
291 ainfo.savedarginfo = cgiarginfo::mustnot;
292 argsinfo.addarginfo (NULL, ainfo);
293
294 ainfo.shortname = "c";
295 ainfo.longname = "collection";
296 ainfo.multiplechar = true;
297 ainfo.defaultstatus = cgiarginfo::none;
298 ainfo.argdefault = "";
299 ainfo.savedarginfo = cgiarginfo::must;
300 argsinfo.addarginfo (NULL, ainfo);
301
302 // the interface language name should use the ISO 639
303 // standard
304 ainfo.shortname = "l";
305 ainfo.longname = "interface language";
306 ainfo.multiplechar = true;
307 ainfo.defaultstatus = cgiarginfo::weak;
308 ainfo.argdefault = "en";
309 ainfo.savedarginfo = cgiarginfo::must;
310 argsinfo.addarginfo (NULL, ainfo);
311
312 // the GSDL_UID (cookie)
313 ainfo.shortname = "z";
314 ainfo.longname = "gsdl uid";
315 ainfo.multiplechar = true;
316 ainfo.defaultstatus = cgiarginfo::none;
317 ainfo.argdefault = "";
318 ainfo.savedarginfo = cgiarginfo::mustnot;
319 argsinfo.addarginfo (NULL, ainfo);
320}
321
322
323void receptionist::add_action (action *theaction) {
324 // make sure we have an action to add
325 if (theaction == NULL) return;
326
327 // add this action to the list of actions
328 actions.addaction(theaction);
329
330 // add the cgi arguments from this action
331 argsinfo.addarginfo (NULL, theaction->getargsinfo());
332}
333
334
335void receptionist::add_browser (browserclass *thebrowser) {
336 // make sure we have a browser to add
337 if (thebrowser == NULL) return;
338
339 // add this browser to the list of browsers
340 browsers.addbrowser(thebrowser);
341}
342
343
344void receptionist::setdefaultbrowser (const text_t &browsername) {
345 browsers.setdefaultbrowser (browsername);
346}
347
348
349// configure should be called for each line in the
350// configuration files to configure the receptionist and everything
351// it contains. The configuration should take place after everything
352// has been added but before the initialisation.
353void receptionist::configure (const text_t &key, const text_tarray &cfgline) {
354 // configure the receptionist
355 if (cfgline.size() >= 1) {
356 cgiarginfo *info = NULL;
357 if (key == "gsdlhome") configinfo.gsdlhome = cfgline[0];
358 else if (key == "collection") {
359 configinfo.collection = cfgline[0];
360 // also need to set the default arg to this collection
361 if ((info = argsinfo.getarginfo("c")) != NULL) {
362 info->defaultstatus = cgiarginfo::good;
363 info->argdefault = cfgline[0];
364 }
365
366 } else if (key == "collectdir") configinfo.collectdir = cfgline[0];
367 else if (key == "httpprefix") configinfo.httpprefix = cfgline[0];
368 else if (key == "httpimg") configinfo.httpimg = cfgline[0];
369 else if (key == "gwcgi") configinfo.gwcgi = cfgline[0];
370 else if (key == "macrofiles") {
371 // want to append to macrofiles (i.e. may be several config files
372 // contributing, maybe from several collections).
373 text_tarray::const_iterator here = cfgline.begin();
374 text_tarray::const_iterator end = cfgline.end();
375 while (here != end) {
376 configinfo.macrofiles.insert (*here);
377 here ++;
378 }
379 }
380 else if (key == "saveconf") configinfo.saveconf = cfgline[0];
381 else if (key == "usecookies") configinfo.usecookies = (cfgline[0] == "true");
382 else if (key == "logcgiargs") configinfo.logcgiargs = (cfgline[0] == "true");
383 else if (key == "pageparam") {
384 if (cfgline.size() >= 2) configinfo.pageparams[cfgline[0]] = cfgline[1];
385 else configinfo.pageparams[cfgline[0]] = "";
386 }
387 else if (key == "macroprecedence") configinfo.macroprecedence = cfgline[0];
388 else if (key == "collectinfo") {
389 if (cfgline.size() >= 3) {
390 collectioninfo_t cinfo;
391 cinfo.gsdl_gsdlhome = cfgline[1];
392 cinfo.gsdl_gdbmhome = cfgline[2];
393 configinfo.collectinfo[cfgline[0]] = cinfo;
394 }
395 }
396
397 else if (key == "cgiarg") {
398 // get shortname
399 bool seen_defaultstatus = false;
400 text_t subkey, subvalue;
401 text_t shortname;
402 text_t::const_iterator cfglinesub_here;
403 text_tarray::const_iterator cfgline_here = cfgline.begin();
404 text_tarray::const_iterator cfgline_end = cfgline.end();
405 while (cfgline_here != cfgline_end) {
406 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
407 (*cfgline_here).end(), '=', subkey);
408 if (subkey == "shortname") {
409 shortname = substr (cfglinesub_here, (*cfgline_here).end());
410 }
411 cfgline_here++;
412 }
413
414 // if we found the shortname process the line again filling in values
415 if (!shortname.empty()) {
416 cgiarginfo &chinfo = argsinfo[shortname];
417 chinfo.shortname = shortname; // in case this is a new argument
418
419 cfgline_here = cfgline.begin();
420 while (cfgline_here != cfgline_end) {
421 cfglinesub_here = getdelimitstr((*cfgline_here).begin(),
422 (*cfgline_here).end(), '=', subkey);
423 subvalue = substr (cfglinesub_here, (*cfgline_here).end());
424
425 if (subkey == "longname") chinfo.longname = subvalue;
426 else if (subkey == "multiplechar") chinfo.multiplechar = (subvalue == "true");
427 else if (subkey == "defaultstatus") {
428 seen_defaultstatus = true;
429 if (subvalue == "none") chinfo.defaultstatus = cgiarginfo::none;
430 else if (subvalue == "weak") chinfo.defaultstatus = cgiarginfo::weak;
431 else if (subvalue == "good") chinfo.defaultstatus = cgiarginfo::good;
432 else if (subvalue == "config") chinfo.defaultstatus = cgiarginfo::config;
433 else if (subvalue == "imperative") chinfo.defaultstatus = cgiarginfo::imperative;
434 }
435 else if (subkey == "argdefault") {
436 chinfo.argdefault = subvalue;
437 if (!seen_defaultstatus) chinfo.defaultstatus = cgiarginfo::config;
438 }
439 else if (subkey == "savedarginfo") {
440 if (subvalue == "mustnot") chinfo.savedarginfo = cgiarginfo::mustnot;
441 else if (subvalue == "can") chinfo.savedarginfo = cgiarginfo::can;
442 else if (subvalue == "must") chinfo.savedarginfo = cgiarginfo::must;
443 }
444
445 cfgline_here++;
446 }
447 }
448 }
449 }
450
451 // configure the actions
452 actionptrmap::iterator actionhere = actions.begin ();
453 actionptrmap::iterator actionend = actions.end ();
454
455 while (actionhere != actionend) {
456 assert ((*actionhere).second.a != NULL);
457 if ((*actionhere).second.a != NULL)
458 (*actionhere).second.a->configure(key, cfgline);
459
460 actionhere++;
461 }
462
463 // configure the protocols
464 recptprotolistclass::iterator protohere = protocols.begin ();
465 recptprotolistclass::iterator protoend = protocols.end ();
466
467 while (protohere != protoend) {
468 assert ((*protohere).p != NULL);
469 if ((*protohere).p != NULL)
470 (*protohere).p->configure(key, cfgline);
471
472 protohere++;
473 }
474
475 // configure the browsers
476 browserptrmap::iterator browserhere = browsers.begin ();
477 browserptrmap::iterator browserend = browsers.end ();
478
479 while (browserhere != browserend) {
480 assert ((*browserhere).second.b != NULL);
481 if ((*browserhere).second.b != NULL)
482 (*browserhere).second.b->configure(key, cfgline);
483
484 browserhere++;
485 }
486}
487
488
489void receptionist::configure (const text_t &key, const text_t &value) {
490 text_tarray cfgline;
491 cfgline.push_back (value);
492 configure(key, cfgline);
493}
494
495
496// init should be called after all the actions, protocols, and
497// converters have been added to the receptionist and after everything
498// has been configured but before any pages are created.
499// It returns true on success and false on failure. If false is
500// returned getpage should not be called (without producing
501// meaningless output), instead an error page should be
502// produced by the calling code.
503bool receptionist::init (ostream &logout) {
504 // first configure collectdir
505 text_t thecollectdir = configinfo.gsdlhome;
506 if (!configinfo.collection.empty()) {
507 // collection specific mode
508 if (!configinfo.collectdir.empty()) {
509 // has already been configured
510 thecollectdir = configinfo.collectdir;
511 } else {
512 // decide where collectdir is by searching for collect.cfg
513 // look in $GSDLHOME/collect/collection-name/etc/collect.cfg and
514 // then $GSDLHOME/etc/collect.cfg
515 thecollectdir = filename_cat (configinfo.gsdlhome, "collect");
516 thecollectdir = filename_cat (thecollectdir, configinfo.collection);
517 text_t filename = filename_cat (thecollectdir, "etc");
518 filename = filename_cat (filename, "collect.cfg");
519
520 if (!file_exists(filename)) thecollectdir = configinfo.gsdlhome;
521 }
522 }
523 configure("collectdir", thecollectdir);
524
525 // read in the macro files
526 if (!read_macrofiles (logout)) return false;
527
528 // there must be at least one action defined
529 if (actions.empty()) {
530 logout << "Error: no actions have been added to the receptionist\n";
531 return false;
532 }
533
534 // there must be at least one browser defined
535 if (browsers.empty()) {
536 logout << "Error: no browsers have been added to the receptionist\n";
537 return false;
538 }
539
540 // create a saveconf string if there isn't one already
541 if (configinfo.saveconf.empty())
542 configinfo.saveconf = create_save_conf_str (argsinfo, logout);
543
544 // check the saveconf string
545 if (!check_save_conf_str (configinfo.saveconf, argsinfo, logout))
546 return false;
547
548 // set a random seed
549 srand (time(NULL));
550
551 // make the output converters remove all the zero-width spaces
552 convertinfoclass::iterator converthere = converters.begin ();
553 convertinfoclass::iterator convertend = converters.end ();
554 text_t defaultconvertname;
555 while (converthere != convertend) {
556 assert ((*converthere).second.outconverter != NULL);
557 if ((*converthere).second.outconverter != NULL) {
558 (*converthere).second.outconverter->set_rzws(1);
559 if (defaultconvertname.empty())
560 defaultconvertname = (*converthere).second.name;
561 }
562 converthere++;
563 }
564
565 // set default converter if no good one has been defined
566 if (!defaultconvertname.empty()) {
567 cgiarginfo *ainfo = argsinfo.getarginfo ("w");
568 if (ainfo->argdefault != "w") {
569 if ((ainfo != NULL) && (converters.get_outconverter(ainfo->argdefault) == NULL)) {
570 ainfo->defaultstatus = cgiarginfo::good;
571 ainfo->argdefault = defaultconvertname;
572 }
573 }
574 }
575
576 // init the actions
577 actionptrmap::iterator actionhere = actions.begin ();
578 actionptrmap::iterator actionend = actions.end ();
579 while (actionhere != actionend) {
580 if (((*actionhere).second.a == NULL) ||
581 !(*actionhere).second.a->init(logout)) return false;
582 actionhere++;
583 }
584
585 // init the protocols
586 recptprotolistclass::iterator protohere = protocols.begin ();
587 recptprotolistclass::iterator protoend = protocols.end ();
588 while (protohere != protoend) {
589 if (((*protohere).p == NULL) ||
590 !(*protohere).p->init(logout)) return false;
591 protohere++;
592 }
593
594 // init the browsers
595 browserptrmap::iterator browserhere = browsers.begin ();
596 browserptrmap::iterator browserend = browsers.end ();
597 while (browserhere != browserend) {
598 if (((*browserhere).second.b == NULL) ||
599 !(*browserhere).second.b->init(logout)) return false;
600 browserhere++;
601 }
602
603 return true;
604}
605
606
607// parse_cgi_args parses cgi arguments into an argument class.
608// This function should be called for each page request. It returns false
609// if there was a major problem with the cgi arguments.
610bool receptionist::parse_cgi_args (const text_t &argstr, cgiargsclass &args,
611 ostream &logout) {
612 outconvertclass text_t2ascii;
613
614 // get an initial list of cgi arguments
615 args.clear();
616 split_cgi_args (argsinfo, argstr, args);
617
618 // expand the compressed argument (if there was one)
619 if (!expand_save_args (argsinfo, configinfo.saveconf, args, logout)) return false;
620
621 // add the defaults
622 add_default_args (argsinfo, args, logout);
623
624 // get the cookie
625 if (configinfo.usecookies) get_cookie(args["z"]);
626
627 // get the input encoding
628 text_t &arg_w = args["w"];
629 inconvertclass defaultinconvert;
630 inconvertclass *inconvert = converters.get_inconverter (arg_w);
631 if (inconvert == NULL) inconvert = &defaultinconvert;
632
633 // see if the next page will have a different encoding
634 if (args.getarg("nw") != NULL) arg_w = args["nw"];
635
636 // convert arguments which aren't in unicode to unicode
637 args_tounicode (args, *inconvert);
638
639
640 // decide on the output conversion class (needed for checking the external
641 // cgi arguments)
642 rzwsoutconvertclass defaultoutconverter;
643 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
644 if (outconverter == NULL) outconverter = &defaultoutconverter;
645 outconverter->reset();
646
647 // check the main cgi arguments
648 if (!check_mainargs (args, logout)) return false;
649
650 // check the arguments for the action
651 action *a = actions.getaction (args["a"]);
652 if (a != NULL) {
653 if (!a->check_cgiargs (argsinfo, args, logout)) return false;
654 } else {
655 // the action was not found!!
656 logout << text_t2ascii << "Error: the action \"" << args["a"]
657 << "\" could not be found.\n";
658 return false;
659 }
660
661 // check external cgi arguments for each action
662 actionptrmap::iterator actionhere = actions.begin ();
663 actionptrmap::iterator actionend = actions.end ();
664 while (actionhere != actionend) {
665 assert ((*actionhere).second.a != NULL);
666 if ((*actionhere).second.a != NULL) {
667 if (!(*actionhere).second.a->check_external_cgiargs (argsinfo, args, *outconverter,
668 configinfo.saveconf, logout))
669 return false;
670 }
671 actionhere++;
672 }
673
674 // the action might have changed but we will assume that
675 // the cgiargs were checked properly when the change was made
676
677 return true;
678}
679
680// returns true if cookie already existed, false
681// if it was generated
682bool receptionist::get_cookie (text_t &cookie) {
683
684 char *c = getenv("HTTP_COOKIE");
685 if (c != NULL) {
686 text_t cookiestring = c;
687
688 text_t::const_iterator end = cookiestring.end();
689 text_t::const_iterator here = findchar (cookiestring.begin(), end, 'G');
690
691 if (substr(here, here+8) == "GSDL_UID") {
692 cookie = substr (here+9, findchar (here+9, end, ';'));
693 return true;
694 }
695 }
696
697 cookie.clear();
698 char *host = getenv("REMOTE_ADDR");
699 time_t ttime = time(NULL);
700 if (host != NULL) {
701 cookie += host;
702 cookie.push_back ('-');
703 }
704 cookie += text_t(ttime);
705
706 return false;
707}
708
709// as above but just tests if cookie exists
710bool receptionist::get_cookie () {
711
712 char *c = getenv("HTTP_COOKIE");
713 if (c != NULL) {
714 text_t cookiestring = c;
715
716 text_t::const_iterator end = cookiestring.end();
717 text_t::const_iterator here = findchar (cookiestring.begin(), end, 'G');
718
719 if (substr(here, here+8) == "GSDL_UID")
720 return true;
721 }
722 return false;
723}
724
725bool receptionist::log_cgi_args (cgiargsclass &args, ostream &logout) {
726
727 // see if we want to log the cgi arguments
728 if (!configinfo.logcgiargs) return true;
729
730 char *host;
731 host = getenv("REMOTE_HOST");
732 if (host == NULL) host = getenv ("REMOTE_ADDR");
733 if (host == NULL) host = "";
734 char *browser = getenv("HTTP_USER_AGENT");
735 if (browser == NULL) browser = "";
736 time_t ttime = time(NULL);
737
738 cgiargsclass::const_iterator args_here = args.begin();
739 cgiargsclass::const_iterator args_end = args.end();
740
741 text_t argstr;
742 bool first = true;
743 while (args_here != args_end) {
744 if (!first) argstr += ", ";
745 argstr += (*args_here).first + "=" + (*args_here).second.value;
746 first = false;
747 args_here ++;
748 }
749
750 text_t logfile = filename_cat (configinfo.gsdlhome, "etc");
751 logfile = filename_cat (logfile, "usage.txt");
752
753 text_t logstr = host;
754 logstr += " [";
755 logstr += ttime;
756 logstr += "] (" + argstr + ") \"";
757 logstr += browser;
758 logstr += "\"\n";
759
760 return append_logstr (logfile, logstr, logout);
761}
762
763bool receptionist::append_logstr (const text_t &filename, const text_t &logstr,
764 ostream &logout) {
765
766 utf8outconvertclass text_t2utf8;
767 char *lfile = filename.getcstr();
768 ofstream log (lfile, ios::app);
769
770 if (!log) {
771 logout << "Error: Couldn't open file " << lfile << "\n";
772 delete lfile;
773 return false;
774 }
775
776 int fd = GSDL_GET_FILEDESC(log);
777
778 // lock_val is set to 0 if file is locked successfully
779 int lock_val = 1;
780 GSDL_LOCK_FILE (fd);
781 if (lock_val == 0) {
782 log << text_t2utf8 << logstr;
783 GSDL_UNLOCK_FILE (fd);
784 } else {
785 logout << "Error: Couldn't lock file " << lfile << "\n";
786 log.close();
787 delete lfile;
788 return false;
789 }
790
791 log.close();
792
793 delete lfile;
794 return true;
795}
796
797text_t receptionist::expandmacros (const text_t &astring, cgiargsclass &args,
798 ostream &logout) {
799 text_t outstring;
800 outconvertclass text_t2ascii;
801
802 action *a = actions.getaction (args["a"]);
803 prepare_page (a, args, text_t2ascii, logout);
804 disp.expandstring ("Global", astring, outstring);
805 return outstring;
806}
807
808// produce_cgi_page will call get_cgihead_info and
809// produce_content in the appropriate way to output a cgi header and
810// the page content (if needed). If a page could not be created it
811// will return false
812bool receptionist::produce_cgi_page (cgiargsclass &args, ostream &contentout,
813 ostream &logout) {
814 outconvertclass text_t2ascii;
815
816 response_t response;
817 text_t response_data;
818
819 // produce cgi header
820 get_cgihead_info (args, response, response_data, logout);
821 if (response == location) {
822 // location response (url may contain macros!!)
823 response_data = expandmacros (response_data, args, logout);
824 contentout << text_t2ascii << "Location: " << response_data << "\n\n";
825 contentout << flush;
826 return true;
827 } else if (response == content) {
828 // content response
829 contentout << text_t2ascii << "Content-type: " << response_data << "\n\n";
830 } else {
831 // unknown response
832 logout << "Error: get_cgihead_info returned an unknown response type.\n";
833 return false;
834 }
835
836 // produce cgi page
837 if (!produce_content (args, contentout, logout)) return false;
838
839 // flush contentout
840 contentout << flush;
841 return true;
842}
843
844
845// get_cgihead_info determines the cgi header information for
846// a set of cgi arguments. If response contains location then
847// response_data contains the redirect address. If reponse
848// contains content then reponse_data contains the content-type.
849// Note that images can now be produced by the receptionist.
850void receptionist::get_cgihead_info (cgiargsclass &args, response_t &response,
851 text_t &response_data, ostream &logout) {
852 outconvertclass text_t2ascii;
853
854 // get the action
855 action *a = actions.getaction (args["a"]);
856 if (a != NULL) {
857 a->get_cgihead_info (args, &protocols, response, response_data, logout);
858
859 } else {
860 // the action was not found!!
861 logout << text_t2ascii << "Error receptionist::get_cgihead_info: the action \""
862 << args["a"] << "\" could not be found.\n";
863 response = content;
864 response_data = "text/html";
865 }
866
867 // add the encoding information
868 if (response == content) {
869 if (args["w"] == "u") {
870 response_data += "; charset=UTF-8";
871 } else if (args["w"] == "g") {
872 response_data += "; charset=GBK";
873 } else if (args["w"] == "a") {
874 response_data += "; charset=ISO-8859-6";
875 } else {
876 response_data += "; charset=ISO-8859-1";
877 }
878
879 // add cookie if required
880 if (configinfo.usecookies && !get_cookie())
881 response_data += "\nSet-Cookie: GSDL_UID=" + args["z"]
882 + "; expires=25-Dec-37 00:00:00 GMT";
883 }
884}
885
886
887// produce the page content
888bool receptionist::produce_content (cgiargsclass &args, ostream &contentout,
889 ostream &logout) {
890
891 // decide on the output conversion class
892 text_t &arg_w = args["w"];
893 rzwsoutconvertclass defaultoutconverter;
894 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
895 if (outconverter == NULL) outconverter = &defaultoutconverter;
896 outconverter->reset();
897
898
899 recptproto *collectproto = protocols.getrecptproto (args["c"], logout);
900 if (collectproto != NULL) {
901 // get browsers to process OID
902 text_t OID = args["d"];
903 if (OID.empty()) OID = args["cl"];
904 if (!OID.empty()) {
905 text_tset metadata;
906 text_tarray OIDs;
907 OIDs.push_back (OID);
908 if (!is_top(OID)) OIDs.push_back (OID + ".pr");
909 FilterResponse_t response;
910 metadata.insert ("childtype");
911 if (get_info (OIDs, args["c"], metadata, false, collectproto, response, logout)) {
912 text_t classifytype;
913 if (!response.docInfo[0].metadata["childtype"].values[0].empty())
914 classifytype = response.docInfo[0].metadata["childtype"].values[0];
915 else if (!is_top (OID)) {
916 if (!response.docInfo[1].metadata["childtype"].values[0].empty())
917 classifytype = response.docInfo[1].metadata["childtype"].values[0];
918 }
919 browserclass *b = browsers.getbrowser (classifytype);
920 b->processOID (args, collectproto, logout);
921 }
922 }
923
924 // translate "d" and "cl" arguments if required
925 translate_OIDs (args, collectproto, logout);
926 }
927
928 // produce the page using the desired action
929 action *a = actions.getaction (args["a"]);
930 if (a != NULL) {
931 if (a->uses_display(args)) prepare_page (a, args, (*outconverter), logout);
932 if (!a->do_action (args, &protocols, &browsers, disp, (*outconverter), contentout, logout))
933 return false;
934
935 } else {
936 // the action was not found!!
937 outconvertclass text_t2ascii;
938
939 logout << text_t2ascii << "Error receptionist::produce_content: the action \""
940 << args["a"] << "\" could not be found.\n";
941
942 contentout << (*outconverter)
943 << "<html>\n"
944 << "<head>\n"
945 << "<title>Error</title>\n"
946 << "</head>\n"
947 << "<body>\n"
948 << "<h2>Oops!</h2>\n"
949 << "Undefined Page. The action \""
950 << args["a"] << "\" could not be found.\n"
951 << "</body>\n"
952 << "</html>\n";
953 }
954 return true;
955}
956
957
958// returns the compressed argument ("e") corresponding to the argument
959// list. This can be used to save preferences between sessions.
960text_t receptionist::get_compressed_arg (cgiargsclass &args, ostream &logout) {
961 // decide on the output conversion class
962 text_t &arg_w = args["w"];
963 rzwsoutconvertclass defaultoutconverter;
964 rzwsoutconvertclass *outconverter = converters.get_outconverter (arg_w);
965 if (outconverter == NULL) outconverter = &defaultoutconverter;
966 outconverter->reset();
967
968 text_t compressed_args;
969 if (compress_save_args (argsinfo, configinfo.saveconf, args,
970 compressed_args, *outconverter, logout))
971 return compressed_args;
972
973 return "";
974}
975
976
977// will read in all the macro files. If one is not found an
978// error message will be written to logout and the method will
979// return false.
980bool receptionist::read_macrofiles (ostream &logout) {
981 outconvertclass text_t2ascii;
982
983 // redirect the error output to logout
984 ostream *savedlogout = disp.setlogout (&logout);
985
986 // load up the default macro files, the collection directory
987 // is searched first for the file (if this is being used in
988 // collection specific mode) and then the main directory(s)
989 text_t colmacrodir = filename_cat (configinfo.collectdir, "macros");
990
991 text_tarray maindirs;
992 text_t gsdlmacrodir = filename_cat (configinfo.gsdlhome, "macros");
993 maindirs.push_back (gsdlmacrodir);
994 colinfo_tmap::iterator colhere = configinfo.collectinfo.begin();
995 colinfo_tmap::iterator colend = configinfo.collectinfo.end();
996 while (colhere != colend) {
997 gsdlmacrodir = filename_cat ((*colhere).second.gsdl_gsdlhome, "macros");
998 maindirs.push_back (gsdlmacrodir);
999 colhere ++;
1000 }
1001
1002 text_tset::iterator arrhere = configinfo.macrofiles.begin();
1003 text_tset::iterator arrend = configinfo.macrofiles.end();
1004 text_t filename;
1005 while (arrhere != arrend) {
1006 bool foundfile = false;
1007
1008 // try in the collection directory if this is being
1009 // run in collection specific mode
1010 if (!configinfo.collection.empty()) {
1011 filename = filename_cat (colmacrodir, *arrhere);
1012 if (file_exists (filename)) {
1013 disp.loaddefaultmacros(filename);
1014 foundfile = true;
1015 }
1016 }
1017
1018 // if we haven't found the macro file yet try in
1019 // the main macro directory(s)
1020 // if file is found in more than one main directory
1021 // we'll load all copies
1022 if (!foundfile) {
1023 text_tarray::const_iterator dirhere = maindirs.begin();
1024 text_tarray::const_iterator dirend = maindirs.end();
1025 while (dirhere != dirend) {
1026 filename = filename_cat (*dirhere, *arrhere);
1027 if (file_exists (filename)) {
1028 disp.loaddefaultmacros(filename);
1029 foundfile = true;
1030 }
1031 dirhere ++;
1032 }
1033 }
1034
1035 // see if we found the file or not
1036 if (!foundfile) {
1037 logout << text_t2ascii
1038 << "Error: the macro file \"" << *arrhere << "\" could not be found.\n";
1039 if (configinfo.collection.empty()) {
1040 text_t dirs;
1041 joinchar (maindirs, ", ", dirs);
1042 logout << text_t2ascii
1043 << "It should be in either of the following directories ("
1044 << dirs << ").\n\n";
1045
1046 } else {
1047 logout << text_t2ascii
1048 << "It should be in either " << colmacrodir << " or in "
1049 << gsdlmacrodir << ".\n\n";
1050 }
1051 // reset logout to what it was
1052 disp.setlogout (savedlogout);
1053 return false;
1054 }
1055 arrhere++;
1056 }
1057
1058 // success
1059
1060 // reset logout to what it was
1061 disp.setlogout (savedlogout);
1062 return true;
1063}
1064
1065
1066// check_mainargs will check all the main arguments. If a major
1067// error is found it will return false and no cgi page should
1068// be created using the arguments.
1069bool receptionist::check_mainargs (cgiargsclass &args, ostream &logout) {
1070 // if this receptionist is running in collection dependant mode
1071 // then it should always set the collection argument to the
1072 // collection
1073 if (!configinfo.collection.empty()) args["c"] = configinfo.collection;
1074
1075 // if cross-collection is turned on make "cc" default to
1076 // all available collections and "c" default to any
1077 // valid collection
1078 if (args["ccs"] == "1") {
1079 if (args["cc"].empty()) {
1080 recptprotolistclass::iterator rprotolist_here = protocols.begin();
1081 recptprotolistclass::iterator rprotolist_end = protocols.end();
1082 bool first = true;
1083 while (rprotolist_here != rprotolist_end) {
1084 if ((*rprotolist_here).p != NULL) {
1085 text_tarray collist;
1086 comerror_t err;
1087 (*rprotolist_here).p->get_collection_list (collist, err, logout);
1088 if (err == noError) {
1089 text_tarray::iterator collist_here = collist.begin();
1090 text_tarray::iterator collist_end = collist.end();
1091 while (collist_here != collist_end) {
1092 if (!first) args["cc"].push_back (',');
1093 else if (args["c"].empty()) args["c"] = *collist_here;
1094 args["cc"] += *collist_here;
1095 first = false;
1096 collist_here ++;
1097 }
1098 }
1099 }
1100 rprotolist_here ++;
1101 }
1102 } else if (args["c"].empty()) {
1103 text_t argcc = args["cc"];
1104 decode_cgi_arg (argcc);
1105 text_t::const_iterator it = argcc.begin();
1106 args["c"] = substr (it, findchar (it, argcc.end(), ','));
1107 }
1108 }
1109
1110 // argument "v" can only be 0 or 1. Use the default value
1111 // if it is out of range
1112 int arg_v = args.getintarg ("v");
1113 if (arg_v != 0 && arg_v != 1) {
1114 cgiarginfo *vinfo = argsinfo.getarginfo ("v");
1115 if (vinfo != NULL) args["v"] = vinfo->argdefault;
1116 }
1117
1118 // argument "f" can only be 0 or 1. Use the default value
1119 // if it is out of range
1120 int arg_f = args.getintarg ("f");
1121 if (arg_f != 0 && arg_f != 1) {
1122 cgiarginfo *finfo = argsinfo.getarginfo ("f");
1123 if (finfo != NULL) args["f"] = finfo->argdefault;
1124 }
1125
1126 return true;
1127}
1128
1129// translate_OIDs translates the "d" and "cl" arguments to their correct values
1130// if they use the tricky ".fc", ".lc" type syntax.
1131void receptionist::translate_OIDs (cgiargsclass &args, recptproto *collectproto,
1132 ostream &logout) {
1133
1134 FilterResponse_t response;
1135 FilterRequest_t request;
1136 comerror_t err;
1137 text_t &arg_d = args["d"];
1138 text_t &arg_cl = args["cl"];
1139 text_t &collection = args["c"];
1140
1141 // do a call to translate OIDs if required
1142 request.filterName = "NullFilter";
1143 request.filterResultOptions = FROID;
1144 if (!arg_d.empty() && needs_translating (arg_d)) {
1145 request.docSet.push_back (arg_d);
1146 collectproto->filter (collection, request, response, err, logout);
1147 arg_d = response.docInfo[0].OID;
1148 request.clear();
1149 }
1150 // we'll also check here that the "cl" argument has a "classify" doctype
1151 // (in case ".fc" or ".lc" have screwed up)
1152 if (needs_translating (arg_cl)) {
1153 request.fields.insert ("doctype");
1154 request.docSet.push_back (arg_cl);
1155 request.filterResultOptions = FRmetadata;
1156 collectproto->filter (collection, request, response, err, logout);
1157 // set to original value (without .xx stuff) if doctype isn't "classify"
1158 if (response.docInfo[0].metadata["doctype"].values[0] != "classify")
1159 strip_suffix (arg_cl);
1160 else
1161 arg_cl = response.docInfo[0].OID;
1162 }
1163}
1164
1165// prepare_page sets up page parameters, sets display macros
1166// and opens the page ready for output
1167void receptionist::prepare_page (action *a, cgiargsclass &args,
1168 outconvertclass &outconvert,
1169 ostream &logout) {
1170 // set up page parameters
1171 text_t pageparams;
1172 bool first = true;
1173
1174 text_tmap::iterator params_here = configinfo.pageparams.begin();
1175 text_tmap::iterator params_end = configinfo.pageparams.end();
1176 while (params_here != params_end) {
1177 if (args[(*params_here).first] != (*params_here).second) {
1178 if (!first) pageparams += ",";
1179 first = false;
1180 pageparams += (*params_here).first;
1181 pageparams += "=";
1182 pageparams += args[(*params_here).first];
1183 }
1184
1185 params_here++;
1186 }
1187
1188
1189 // open the page
1190 disp.openpage(pageparams, configinfo.macroprecedence);
1191
1192
1193 // define external macros for each action
1194 actionptrmap::iterator actionhere = actions.begin ();
1195 actionptrmap::iterator actionend = actions.end ();
1196
1197 while (actionhere != actionend) {
1198 assert ((*actionhere).second.a != NULL);
1199 if ((*actionhere).second.a != NULL)
1200 (*actionhere).second.a->define_external_macros (disp, args, &protocols, logout);
1201 actionhere++;
1202 }
1203
1204 // define internal macros for the current action
1205 a->define_internal_macros (disp, args, &protocols, logout);
1206
1207 // define general macros. the defining of general macros is done here so that
1208 // the last possible version of the cgi arguments are used
1209 define_general_macros (args, outconvert, logout);
1210}
1211
1212void receptionist::define_general_macros (cgiargsclass &args, outconvertclass &/*outconvert*/,
1213 ostream &logout) {
1214
1215 text_t &collection = args["c"];
1216
1217 disp.setmacro ("gsdlhome", "Global", configinfo.gsdlhome);
1218 disp.setmacro ("gwcgi", "Global", configinfo.gwcgi);
1219 disp.setmacro ("httpimg", "Global", configinfo.httpimg);
1220 disp.setmacro ("httpprefix", "Global", configinfo.httpprefix);
1221 disp.setmacro ("compressedoptions", "Global", get_compressed_arg(args, logout));
1222
1223 // set _cgiargX_ macros for each cgi argument
1224 cgiargsclass::const_iterator argshere = args.begin();
1225 cgiargsclass::const_iterator argsend = args.end();
1226 while (argshere != argsend) {
1227 if (((*argshere).first == "q") ||
1228 ((*argshere).first == "qa") ||
1229 ((*argshere).first == "qtt") ||
1230 ((*argshere).first == "qty") ||
1231 ((*argshere).first == "qp") ||
1232 ((*argshere).first == "qpl") ||
1233 ((*argshere).first == "qr") ||
1234 ((*argshere).first == "q2"))
1235 // need to escape special characters from query string
1236 disp.setmacro ("cgiarg" + (*argshere).first,
1237 "Global", html_safe((*argshere).second.value));
1238 else
1239 disp.setmacro ("cgiarg" + (*argshere).first, "Global", (*argshere).second.value);
1240 argshere ++;
1241 }
1242
1243 // set collection specific macros
1244 if (!collection.empty()) {
1245 recptproto *collectproto = protocols.getrecptproto (collection, logout);
1246 if (collectproto != NULL) {
1247 FilterResponse_t response;
1248 text_tset metadata;
1249 get_info ("collection", collection, metadata, false,
1250 collectproto, response, logout);
1251
1252 if (!response.docInfo[0].metadata.empty()) {
1253 MetadataInfo_tmap::const_iterator here = response.docInfo[0].metadata.begin();
1254 MetadataInfo_tmap::const_iterator end = response.docInfo[0].metadata.end();
1255 while (here != end) {
1256 if (((*here).first != "haschildren") && ((*here).first != "hasnext") &&
1257 ((*here).first != "hasprevious")) {
1258 disp.setmacro ((*here).first, "Global", (*here).second.values[0]);
1259 }
1260 here ++;
1261 }
1262 }
1263 }
1264 }
1265}
Note: See TracBrowser for help on using the repository browser.