source: trunk/gsdl/src/recpt/cgiwrapper.cpp@ 1308

Last change on this file since 1308 was 1308, checked in by sjboddie, 24 years ago

fixed a minor bug that was causing problems for end-user collection
building

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.0 KB
Line 
1/**********************************************************************
2 *
3 * cgiwrapper.cpp -- output pages using the cgi protocol
4 * Copyright (C) 1999 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gsdlconf.h"
27#include "cgiwrapper.h"
28#include "recptconfig.h"
29#include "fileutil.h"
30#include <stdlib.h>
31#include <assert.h>
32
33#if defined(GSDL_USE_OBJECTSPACE)
34# include <ospace/std/iostream>
35# include <ospace/std/fstream>
36#elif defined(GSDL_USE_IOS_H)
37# include <iostream.h>
38# include <fstream.h>
39#else
40# include <iostream>
41# include <fstream>
42#endif
43
44#ifdef USE_FASTCGI
45#include "fcgiapp.h"
46#endif
47
48
49#ifdef USE_FASTCGI
50// used to output the text from receptionist
51class fcgistreambuf : public streambuf {
52public:
53 fcgistreambuf ();
54 int sync ();
55 int overflow (int ch);
56 int underflow () {return EOF;}
57
58 void fcgisbreset() {fcgx_stream = NULL; other_ostream = NULL;};
59 void set_fcgx_stream(FCGX_Stream *newone) {fcgx_stream=newone;};
60 void set_other_ostream(ostream *newone) {other_ostream=newone;};
61
62private:
63 FCGX_Stream *fcgx_stream;
64 ostream *other_ostream;
65};
66
67fcgistreambuf::fcgistreambuf() {
68 fcgisbreset();
69 if (base() == ebuf()) allocate();
70 setp (base(), ebuf());
71};
72
73int fcgistreambuf::sync () {
74 if ((fcgx_stream != NULL) &&
75 (FCGX_PutStr (pbase(), out_waiting(), fcgx_stream) < 0)) {
76 fcgx_stream = NULL;
77 }
78
79 if (other_ostream != NULL) {
80 char *thepbase=pbase();
81 for (int i=0;i<out_waiting();i++) (*other_ostream).put(thepbase[i]);
82 }
83
84 setp (pbase(), epptr());
85
86 return 0;
87}
88
89int fcgistreambuf::overflow (int ch) {
90 if (sync () == EOF) return EOF;
91 if (ch != EOF) sputc (ch);
92 return 0;
93}
94
95#endif
96
97static void format_error_string (text_t &errorpage, const text_t &errortext, bool debug) {
98
99 errorpage.clear();
100
101 if (debug) {
102 errorpage += "\n";
103 errorpage += "ERROR: " + errortext;
104 errorpage += "\n";
105
106 } else {
107
108 errorpage += "Content-type: text/html\n\n";
109
110 errorpage += "<html>\n";
111 errorpage += "<head>\n";
112 errorpage += "<title>Error</title>\n";
113 errorpage += "</head>\n";
114 errorpage += "<body>\n";
115 errorpage += "<h2>Oops!</h2>\n";
116 errorpage += errortext;
117 errorpage += "</body>\n";
118 errorpage += "</html>\n";
119 }
120}
121
122static void page_errorcollect (const text_t &gsdlhome, text_t &errorpage, bool debug) {
123
124 text_t collectdir = filename_cat (gsdlhome, "collect");
125
126 text_t errortext = "No valid collections were found: Check that your collect directory\n";
127 errortext += "(" + collectdir + ") is readable and contains at least one valid collection.\n";
128 errortext += "Note that modelcol is NOT a valid collection.\n";
129 errortext += "If the path to your collect directory is wrong edit the 'gsdlhome' field\n";
130 errortext += "in your gsdlsite.cfg configuration file.\n";
131
132 format_error_string (errorpage, errortext, debug);
133}
134
135static void page_errorsitecfg (text_t &errorpage, bool debug, int mode) {
136
137 text_t errortext;
138
139 if (mode == 0) {
140 errortext += "The gsdlsite.cfg configuration file could not be found. This\n";
141 errortext += "file should contain configuration information relating to this\n";
142 errortext += "site's setup.\n";
143
144 } else if (mode == 1) {
145 errortext += "The gsdlsite.cfg configuration file does not contain a valid\n";
146 errortext += "gsdlhome entry.\n";
147 }
148
149 if (debug) {
150 errortext += "gsdlsite.cfg should reside in the directory from which the\n";
151 errortext += "library executable was run.\n";
152 } else {
153 errortext += "gsdlsite.cfg should reside in the same directory as the library\n";
154 errortext += "executable file.\n";
155 }
156
157 format_error_string (errorpage, errortext, debug);
158}
159
160
161static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection,
162 bool debug, text_t &errorpage) {
163
164 text_t errortext;
165
166 if (collection.empty()) {
167 text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
168 errortext += "The main.cfg configuration file could not be found. This file\n";
169 errortext += "should contain configuration information relating to the\n";
170 errortext += "setup of the interface. As this receptionist is not being run\n";
171 errortext += "in collection specific mode the file should reside at\n";
172 errortext += main_cfg_file + ".\n";
173 } else {
174 text_t collect_cfg_file = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg");
175 text_t main_collect_cfg_file = filename_cat (gsdlhome, "etc", "collect.cfg");
176 text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
177 errortext += "Either the collect.cfg or main.cfg configuration file could\n";
178 errortext += "not be found. This file should contain configuration information\n";
179 errortext += "relating to the setup of the interface. As this receptionist is\n";
180 errortext += "being run in collection specific mode the file should reside\n";
181 errortext += "at either " + collect_cfg_file + ",\n";
182 errortext += main_collect_cfg_file + " or " + main_cfg_file + ".\n";
183 }
184
185 format_error_string (errorpage, errortext, debug);
186}
187
188
189static void page_errorinit (const text_t &gsdlhome, bool debug, text_t &errorpage) {
190
191 text_t errortext = "An error occurred during the initialisation of the Greenstone Digital\n";
192 errortext += "Library software. It is likely that the software has not been setup\n";
193 errortext += "correctly.\n";
194
195 text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");
196 char *ifile = init_file.getcstr();
197 ifstream initin (ifile);
198 delete ifile;
199 if (initin) {
200 errortext += "The initialisation error log, " + init_file + ", contains the\n";
201 errortext += "following information:\n\n";
202 if (!debug) errortext += "<pre>\n";
203
204 char c;
205 initin.get(c);
206 while (!initin.eof ()) {
207 errortext.push_back(c);
208 initin.get(c);
209 }
210
211 if (!debug) errortext += "</pre>\n";
212
213 initin.close();
214
215 } else {
216 errortext += "Please consult " + init_file + " for more information.\n";
217 }
218
219 format_error_string (errorpage, errortext, debug);
220}
221
222static void page_errorparseargs (const text_t &gsdlhome, bool debug, text_t &errorpage) {
223
224 text_t errortext = "An error occurred during the parsing of the cgi arguments.\n";
225
226 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
227 char *efile = error_file.getcstr();
228 ifstream errin (efile);
229 delete efile;
230 if (errin) {
231 errortext += "The error log, " + error_file + ", contains the\n";
232 errortext += "following information:\n\n";
233 if (!debug) errortext += "<pre>\n";
234
235 char c;
236 errin.get(c);
237 while (!errin.eof ()) {
238 errortext.push_back(c);
239 errin.get(c);
240 }
241 if (!debug) errortext += "</pre>\n";
242 errin.close();
243
244 } else {
245 errortext += "Please consult " + error_file + " for more information.\n";
246 }
247
248 format_error_string (errorpage, errortext, debug);
249}
250
251static void page_errorcgipage (const text_t &gsdlhome, bool debug, text_t &errorpage) {
252
253 text_t errortext = "An error occurred during the construction of the cgi page.\n";
254
255 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
256 char *efile = error_file.getcstr();
257 ifstream errin (efile);
258 delete efile;
259 if (errin) {
260 errortext += "The error log, " + error_file + ", contains the\n";
261 errortext += "following information:\n\n";
262 if (!debug) errortext += "<pre>\n";
263
264 char c;
265 errin.get(c);
266 while (!errin.eof ()) {
267 errortext.push_back(c);
268 errin.get(c);
269 }
270 if (!debug) errortext += "</pre>\n";
271 errin.close();
272
273 } else {
274 errortext += "Please consult " + error_file + " for more information.\n";
275 }
276
277 format_error_string (errorpage, errortext, debug);
278}
279
280static void print_debug_info (receptionist &recpt) {
281
282 outconvertclass text_t2ascii;
283 recptconf configinfo = recpt.get_configinfo ();
284 text_t etc_dir = filename_cat (configinfo.gsdlhome, "etc");
285
286 cout << "\n";
287 cout << text_t2ascii
288 << "------------------------------------------------------------\n"
289 << "Configuration and initialization completed successfully.\n"
290 << " Note that more debug information may be available in the\n"
291 << " initialization and error logs initout.txt and errout.txt\n"
292 << " in " << etc_dir << ".\n"
293 << "------------------------------------------------------------\n\n";
294
295 bool colspec = false;
296 if (configinfo.collection.empty()) {
297 cout << "Receptionist is running in \"general\" (i.e. not \"collection\n"
298 << "specific\") mode.\n";
299 } else {
300 cout << text_t2ascii
301 << "Receptionist is running in \"collection specific\" mode.\n"
302 << " collection=" << configinfo.collection << "\n"
303 << " collection directory=" << configinfo.collectdir << "\n";
304 colspec = true;
305 }
306
307 cout << text_t2ascii << "gsdlhome=" << configinfo.gsdlhome << "\n";
308 if (!configinfo.gdbmhome.empty())
309 cout << text_t2ascii << "gdbmhome=" << configinfo.gdbmhome << "\n";
310 cout << text_t2ascii << "httpprefix=" << configinfo.httpprefix << "\n";
311 cout << text_t2ascii << "httpimg=" << configinfo.httpimg << "\n";
312 cout << text_t2ascii << "gwcgi=" << configinfo.gwcgi << "\n"
313 << " Note that unless gwcgi has been set from a configuration\n"
314 << " file it is dependent on environment variables set by your\n"
315 << " webserver. Therefore it may not have the same value when run\n"
316 << " from the command line as it would be when run from your\n"
317 << " web server.\n";
318 if (configinfo.usecookies)
319 cout << "cookies are enabled\n";
320 else
321 cout << "cookies are disabled\n";
322 if (configinfo.logcgiargs)
323 cout << "logging is enabled\n";
324 else
325 cout << "logging is disabled\n";
326 cout << "------------------------------------------------------------\n\n";
327
328 text_tset::const_iterator this_mfile = configinfo.macrofiles.begin();
329 text_tset::const_iterator end_mfile = configinfo.macrofiles.end();
330 cout << "Macro Files:\n"
331 << "------------\n";
332 text_t mfile;
333 bool found;
334 while (this_mfile != end_mfile) {
335 cout << text_t2ascii << *this_mfile;
336 int spaces = (22 - (*this_mfile).size());
337 if (spaces < 2) spaces = 2;
338 text_t outspaces;
339 for (int i = 0; i < spaces; i++) outspaces.push_back (' ');
340 cout << text_t2ascii << outspaces;
341
342 found = false;
343 if (colspec) {
344 // collection specific - try collectdir/macros first
345 mfile = filename_cat (configinfo.collectdir, "macros", *this_mfile);
346 if (file_exists (mfile)) {
347 cout << text_t2ascii << "found (" << mfile << ")\n";
348 found = true;
349 }
350 }
351
352 if (!found) {
353 // try main macro directory
354 mfile = filename_cat (configinfo.gsdlhome, "macros", *this_mfile);
355 if (file_exists (mfile)) {
356 cout << text_t2ascii << "found (" << mfile << ")\n";
357 found = true;
358 }
359 }
360
361 if (!found)
362 cout << text_t2ascii << "NOT FOUND\n";
363
364 this_mfile ++;
365 }
366
367 cout << "------------------------------------------------------------\n\n"
368 << "Collections:\n"
369 << "------------\n"
370 << " Note that collections will only appear as \"running\" if\n"
371 << " their build.cfg files exist, are readable, contain a valid\n"
372 << " builddate field (i.e. > 0), and are in the collection's\n"
373 << " index directory (i.e. NOT the building directory)\n\n";
374
375 recptprotolistclass *protos = recpt.get_recptprotolist_ptr();
376 recptprotolistclass::iterator rprotolist_here = protos->begin();
377 recptprotolistclass::iterator rprotolist_end = protos->end();
378 bool found_valid_col = false;
379 while (rprotolist_here != rprotolist_end) {
380 if ((*rprotolist_here).p != NULL) {
381
382 text_tarray collist;
383 comerror_t err;
384 (*rprotolist_here).p->get_collection_list (collist, err, cerr);
385 if (err == noError) {
386 text_tarray::iterator collist_here = collist.begin();
387 text_tarray::iterator collist_end = collist.end();
388
389 while (collist_here != collist_end) {
390
391 cout << text_t2ascii << *collist_here;
392
393 int spaces = (22 - (*collist_here).size());
394 if (spaces < 2) spaces = 2;
395 text_t outspaces;
396 for (int i = 0; i < spaces; i++) outspaces.push_back (' ');
397 cout << text_t2ascii << outspaces;
398
399 ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr);
400 if (cinfo != NULL) {
401 if (cinfo->isPublic) cout << "public ";
402 else cout << "private";
403
404 if (cinfo->buildDate > 0) {
405 cout << " running ";
406 found_valid_col = true;
407 } else {
408 cout << " not running";
409 }
410 }
411
412 cout << "\n";
413
414 collist_here ++;
415 }
416 }
417 }
418 rprotolist_here ++;
419 }
420
421 if (!found_valid_col) {
422 cout << "WARNING: No \"running\" collections were found. You need to\n";
423 cout << " build one of the above collections\n";
424 }
425
426 cout << "\n------------------------------------------------------------\n";
427 cout << "------------------------------------------------------------\n\n";
428 cout << "receptionist running in command line debug mode\n";
429 cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n";
430
431}
432
433// cgiwrapper does everything necessary to output a page
434// using the cgi protocol. If this is being run for a particular
435// collection then "collection" should be set, otherwise it
436// should equal "".
437void cgiwrapper (receptionist &recpt, text_t collection) {
438
439 int numrequests = 0;
440 bool debug = false;
441 recptconf configinfo = recpt.get_configinfo ();
442
443 // find out whether this is being run as a cgi-script
444 // or a fastcgi script
445#ifdef USE_FASTCGI
446 fcgistreambuf outbuf;
447 int isfastcgi = !FCGX_IsCGI();
448 FCGX_Stream *fcgiin, *fcgiout, *fcgierr;
449 FCGX_ParamArray fcgienvp;
450#else
451 int isfastcgi = 0;
452#endif
453
454 // get the query string if it is not being run as a fastcgi
455 // script
456 text_t argstr = "";
457 cgiargsclass args;
458 char *aURIStr;
459 if (!isfastcgi) {
460 char *request_method_str = getenv("REQUEST_METHOD");
461 char *content_length_str = getenv("CONTENT_LENGTH");
462 if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
463 content_length_str != NULL) {
464 // POST form data
465 int content_length = text_t(content_length_str).getint();
466 if (content_length > 0) {
467 char c;
468 do {
469 cin.get(c);
470 if (cin.eof()) break;
471 argstr.push_back (c);
472 content_length--;
473 } while (content_length > 0);
474 }
475
476 } else {
477 aURIStr = getenv("QUERY_STRING");
478 if ((request_method_str != NULL && strcmp(request_method_str, "GET") == 0)
479 || aURIStr != NULL) {
480 // GET form data
481 if (aURIStr != NULL) argstr = aURIStr;
482 } else {
483 // debugging from command line
484 debug = true;
485 }
486 }
487 }
488
489 if (debug) {
490 cout << "Configuring Greenstone...\n";
491 cout << flush;
492 }
493
494 // init stuff - we can't output error pages directly with
495 // fastcgi so the pages are stored until we can output them
496 text_t errorpage;
497 outconvertclass text_t2ascii;
498
499 // set defaults
500 int maxrequests = 10000;
501 recpt.configure ("collection", collection);
502 recpt.configure ("httpimg", "/gsdl/images");
503 char *script_name = getenv("SCRIPT_NAME");
504 if (script_name != NULL) recpt.configure("gwcgi", script_name);
505 else recpt.configure("gwcgi", "/cgi-bin/gw");
506
507 // read in the configuration files.
508 text_t gsdlhome;
509 if (!site_cfg_read (recpt, gsdlhome, maxrequests)) {
510 // couldn't find the site configuration file
511 page_errorsitecfg (errorpage, debug, 0);
512 } else if (gsdlhome.empty()) {
513 // no gsdlhome in gsdlsite.cfg
514 page_errorsitecfg (errorpage, debug, 1);
515 } else if (!main_cfg_read (recpt, gsdlhome, collection)) {
516 // couldn't find the main configuration file
517 page_errormaincfg (gsdlhome, collection, debug, errorpage);
518 } else if (configinfo.collectinfo.empty()) {
519 // don't have any collections
520 page_errorcollect (gsdlhome, errorpage, debug);
521 }
522
523 if (errorpage.empty()) {
524
525 // initialise the library software
526 if (debug) {
527 cout << "Initializing...\n";
528 cout << flush;
529 }
530
531 text_t init_file = filename_cat (gsdlhome, "etc", "initout.txt");
532 char *iout = init_file.getcstr();
533 ofstream initout (iout);
534 delete iout;
535 if (!recpt.init(initout)) {
536 // an error occurred during the initialisation
537 initout.close();
538 page_errorinit(gsdlhome, debug, errorpage);
539 }
540 initout.close();
541 }
542
543 if (debug && errorpage.empty()) {
544 // get query string from command line
545 print_debug_info (recpt);
546 char cinURIStr[1024];
547 cin.get(cinURIStr, 1024);
548 argstr = cinURIStr;
549 }
550
551 // cgi scripts only deal with one request
552 if (!isfastcgi) maxrequests = 1;
553
554 // Page-request loop. If this is not being run as a fastcgi
555 // process then only one request will be processed and then
556 // the process will exit.
557 while (numrequests < maxrequests) {
558#ifdef USE_FASTCGI
559 if (isfastcgi) {
560 if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break;
561
562 char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp);
563 char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp);
564
565 if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
566 content_length_str != NULL) {
567 // POST form data
568 int content_length = text_t(content_length_str).getint();
569 if (content_length > 0) {
570 argstr.clear();
571 int c;
572 do {
573 c = FCGX_GetChar (fcgiin);
574 if (c < 0) break;
575 argstr.push_back (c);
576 content_length--;
577 } while (content_length > 0);
578 }
579
580 } else {
581 // GET form data
582 aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp);
583 if (aURIStr != NULL) argstr = aURIStr;
584 else argstr = "";
585 }
586 }
587#endif
588
589 // get output streams ready
590#ifdef USE_FASTCGI
591 outbuf.fcgisbreset ();
592 if (isfastcgi) outbuf.set_fcgx_stream (fcgiout);
593 else outbuf.set_other_ostream (&cout);
594 ostream pageout (&outbuf);
595#else
596#define pageout cout
597#endif
598
599 // if using fastcgi we'll load environment into a map,
600 // otherwise simply pass empty map (can't get environment
601 // variables using getenv() while using FCGX versions
602 // of fastcgi - at least I can't ;-) - Stefan)
603 text_tmap fastcgienv;
604#ifdef USE_FASTCGI
605 if (isfastcgi) {
606 for(; *fcgienvp != NULL; fcgienvp++) {
607 text_t fvalue = *fcgienvp;
608 text_t::const_iterator begin = fvalue.begin();
609 text_t::const_iterator end = fvalue.end();
610 text_t::const_iterator equals_sign = findchar (begin, end, '=');
611 if (equals_sign != end)
612 fastcgienv[substr(begin, equals_sign)] = substr(equals_sign+1, end);
613 }
614 }
615#endif
616
617 // temporarily need to configure gwcgi here when using fastcgi as I can't
618 // get it to pass the SCRIPT_NAME environment variable to the initial
619 // environment (if anyone can work out how to do this using the apache
620 // server, let me know). Note that this overrides the gwcgi field in
621 // site.cfg (which it shouldn't do) but I can't at present set gwcgi
622 // from site.cfg as I have old receptionists laying around that wouldn't
623 // appreciate it. The following 5 lines of code should be deleted once
624 // I either a: get the server to pass SCRIPT_NAME at initialization
625 // time or b: convert all the collections using old receptionists over
626 // to this version and uncomment gwcgi in the site.cfg file -- Stefan.
627#ifdef USE_FASTCGI
628 if (isfastcgi) {
629 recpt.configure("gwcgi", fastcgienv["SCRIPT_NAME"]);
630 }
631#endif
632
633
634 if (errorpage.empty()) {
635 text_t error_file = filename_cat (gsdlhome, "etc", "errout.txt");
636 char *eout = error_file.getcstr();
637 ofstream errout (eout, ios::app);
638 delete eout;
639 // note that the following line appears to cause a runtime
640 // error using debug versions of VC++ 6.0 (on windows)
641 cerr = errout;
642
643 // parse the cgi arguments and produce the resulting page if there
644 // has been no errors so far
645 if (!recpt.parse_cgi_args (argstr, args, errout, fastcgienv)) {
646 errout.close ();
647 page_errorparseargs(gsdlhome, debug, errorpage);
648 } else {
649 if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) {
650 errout.close ();
651 page_errorcgipage(gsdlhome, debug, errorpage);
652 }
653 recpt.log_cgi_args (args, errout, fastcgienv);
654 errout.close ();
655 }
656 }
657 // there was an error, output the error page
658 if (!errorpage.empty()) {
659 pageout << text_t2ascii << errorpage;
660 errorpage.clear();
661 numrequests = maxrequests; // make this the last page
662 }
663 pageout << flush;
664
665 // finish with the output streams
666#ifdef USE_FASTCGI
667 if (isfastcgi) FCGX_Finish();
668#endif
669
670 numrequests++;
671 }
672
673 return;
674}
Note: See TracBrowser for help on using the repository browser.