root/main/trunk/greenstone2/runtime-src/src/recpt/cgiwrapper.cpp @ 23389

Revision 23389, 31.5 KB (checked in by mdewsnip, 10 years ago)

Commented out the code (3 occurrences) from cgiwrapper that reads and returns the error.txt file content, as I think it's a really bad idea. The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack" the Greenstone site by supplying values such as site URLs for the CGI arguments -- this actually happens). Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information). Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it -- the only people who should need the contents of this file should have access to it through the file system. I think you can also view the contents of this file through the statusaction if you have a suitable login.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1/**********************************************************************
2 *
3 * cgiwrapper.cpp -- output pages using the cgi protocol
4 * Copyright (C) 1999  The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include <stdio.h>
27#include <cstring>
28#ifdef __WIN32__
29#include <fcntl.h>
30#endif
31
32#include "gsdlconf.h"
33#include "cgiwrapper.h"
34#include "gsdlsitecfg.h"
35#include "maincfg.h"
36#include "fileutil.h"
37#include "cgiutils.h"
38#include <stdlib.h>
39#include <assert.h>
40
41#if defined(GSDL_USE_OBJECTSPACE)
42#  include <ospace/std/iostream>
43#  include <ospace/std/fstream>
44#elif defined(GSDL_USE_IOS_H)
45#  include <iostream.h>
46#  include <fstream.h>
47#else
48#  include <iostream>
49#  include <fstream>
50#endif
51
52#ifdef USE_FASTCGI
53#include "fcgiapp.h"
54#endif
55
56#include "authenaction.h"
57#include "browseaction.h"
58#include "collectoraction.h"
59#include "depositoraction.h"
60#include "documentaction.h"
61#include "dynamicclassifieraction.h"
62#include "extlinkaction.h"
63#include "pageaction.h"
64#ifdef ENABLE_MGPP
65#include "phindaction.h"
66#endif
67#include "pingaction.h"
68#include "queryaction.h"
69
70#if defined(USE_SQLITE)
71#include "sqlqueryaction.h"
72#endif
73
74#include "tipaction.h"
75#include "statusaction.h"
76#include "usersaction.h"
77#include "configaction.h"
78
79#include "vlistbrowserclass.h"
80#include "hlistbrowserclass.h"
81#include "datelistbrowserclass.h"
82#include "invbrowserclass.h"
83#include "pagedbrowserclass.h"
84#include "htmlbrowserclass.h"
85#include "phindbrowserclass.h"
86
87
88#ifdef USE_FASTCGI
89// used to output the text from receptionist
90class fcgistreambuf : public streambuf {
91public:
92  fcgistreambuf ();
93  int sync ();
94  int overflow (int ch);
95  int underflow () {return EOF;}
96 
97  void fcgisbreset() {fcgx_stream = NULL; other_ostream = NULL;};
98  void set_fcgx_stream(FCGX_Stream *newone) {fcgx_stream=newone;};
99  void set_other_ostream(ostream *newone) {other_ostream=newone;};
100 
101private:
102  FCGX_Stream *fcgx_stream;
103  ostream *other_ostream;
104};
105
106fcgistreambuf::fcgistreambuf() {
107  fcgisbreset();
108  if (base() == ebuf()) allocate();
109  setp (base(), ebuf());
110};
111
112int fcgistreambuf::sync () {
113  if ((fcgx_stream != NULL) &&
114      (FCGX_PutStr (pbase(), out_waiting(), fcgx_stream) < 0)) {
115    fcgx_stream = NULL;
116  }
117
118  if (other_ostream != NULL) {
119    char *thepbase=pbase();
120    for (int i=0;i<out_waiting();++i) (*other_ostream).put(thepbase[i]);
121  }
122 
123  setp (pbase(), epptr());
124 
125  return 0;
126}
127
128int fcgistreambuf::overflow (int ch) {
129  if (sync () == EOF) return EOF;
130  if (ch != EOF) sputc (ch);
131  return 0;
132}
133
134#endif
135
136static void format_error_string (text_t &errorpage, const text_t &errortext, bool debug) {
137
138  errorpage.clear();
139
140  if (debug) {
141    errorpage += "\n";
142    errorpage += "ERROR: " + errortext;
143    errorpage += "\n";
144   
145  } else {
146
147    errorpage += "Content-type: text/html\n\n";
148   
149    errorpage += "<html>\n";
150    errorpage += "<head>\n";
151    errorpage += "<title>Error</title>\n";
152    errorpage += "</head>\n";
153    errorpage += "<body>\n";
154    errorpage += "<h2>Oops!</h2>\n";
155    errorpage += errortext;
156    errorpage += "</body>\n";
157    errorpage += "</html>\n";
158  }
159}
160
161static void page_errorcollect (const text_t &gsdlhome, text_t &errorpage, bool debug) {
162
163  text_t collecthome = filename_cat (gsdlhome, "collect");
164
165  text_t errortext = "No valid collections were found: Check that your collect directory\n";
166  errortext += "(" + collecthome + ") is readable and contains at least one valid collection.\n";
167  errortext += "Note that modelcol is NOT a valid collection.\n";
168  errortext += "If the path to your collect directory is wrong edit the 'gsdlhome' field\n";
169  errortext += "in your gsdlsite.cfg configuration file.\n";
170
171  format_error_string (errorpage, errortext, debug);
172}
173
174static void page_errorsitecfg (text_t &errorpage, bool debug, int mode) {
175
176  text_t errortext;
177
178  if (mode == 0) {
179    errortext += "The gsdlsite.cfg configuration file could not be found. This\n";
180    errortext += "file should contain configuration information relating to this\n";
181    errortext += "site's setup.\n";
182
183  } else if (mode == 1) {
184    errortext += "The gsdlsite.cfg configuration file does not contain a valid\n";
185    errortext += "gsdlhome entry.\n";
186  }
187
188  if (debug) {
189    errortext += "gsdlsite.cfg should reside in the directory from which the\n";
190    errortext += "library executable was run.\n";
191  } else {
192    errortext += "gsdlsite.cfg should reside in the same directory as the library\n";
193    errortext += "executable file.\n";
194  }
195
196  format_error_string (errorpage, errortext, debug);
197}
198
199
200static void page_errormaincfg (const text_t &gsdlhome, const text_t &collection,
201                   bool debug, text_t &errorpage) {
202
203  text_t errortext;
204
205  if (collection.empty()) {
206    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
207    errortext += "The main.cfg configuration file could not be found. This file\n";
208    errortext += "should contain configuration information relating to the\n";
209    errortext += "setup of the interface. As this receptionist is not being run\n";
210    errortext += "in collection specific mode the file should reside at\n";
211    errortext += main_cfg_file + ".\n";
212  } else {
213    text_t collect_cfg_file = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg");
214    text_t main_collect_cfg_file = filename_cat (gsdlhome, "etc", "collect.cfg");
215    text_t main_cfg_file = filename_cat (gsdlhome, "etc", "main.cfg");
216    errortext += "Either the collect.cfg or main.cfg configuration file could\n";
217    errortext += "not be found. This file should contain configuration information\n";
218    errortext += "relating to the setup of the interface. As this receptionist is\n";
219    errortext += "being run in collection specific mode the file should reside\n";
220    errortext += "at either " + collect_cfg_file + ",\n";
221    errortext += main_collect_cfg_file + " or " + main_cfg_file + ".\n";
222  }
223
224  format_error_string (errorpage, errortext, debug);
225}
226
227
228static void page_errorinit (const text_t &gsdlhome, bool debug, text_t &errorpage) {
229
230  text_t errortext = "An error occurred during the initialisation of the Greenstone Digital\n";
231  errortext += "Library software. It is likely that the software has not been setup\n";
232  errortext += "correctly.\n";
233
234  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
235  // This is all commented out because I think it's a really bad idea
236  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
237  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
238  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
239  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
240  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
241  // The only people who should need the contents of this file should have access to it through the file system
242  // I think you can also view the contents of this file through the statusaction if you have a suitable login
243//   char *efile = error_file.getcstr();
244//   ifstream errin (efile);
245//   delete []efile;
246//   if (errin) {
247//     errortext += "The error log, " + error_file + ", contains the\n";
248//     errortext += "following information:\n\n";
249//     if (!debug) errortext += "<pre>\n";
250
251//     char c;
252//     errin.get(c);
253//     while (!errin.eof ()) {
254//       errortext.push_back(c);
255//       errin.get(c);
256//     }
257   
258//     if (!debug) errortext += "</pre>\n";
259
260//     errin.close();
261
262//   } else {
263    errortext += "Please consult " + error_file + " for more information.\n";
264//   }
265
266  format_error_string (errorpage, errortext, debug);
267}
268
269static void page_errorparseargs (const text_t &gsdlhome, bool debug, text_t &errorpage) {
270
271  text_t errortext = "An error occurred during the parsing of the cgi arguments.\n";
272
273  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
274  // This is all commented out because I think it's a really bad idea
275  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
276  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
277  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
278  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
279  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
280  // The only people who should need the contents of this file should have access to it through the file system
281  // I think you can also view the contents of this file through the statusaction if you have a suitable login
282//   char *efile = error_file.getcstr();
283//   ifstream errin (efile);
284//   delete []efile;
285//   if (errin) {
286//     errortext += "The error log, " + error_file + ", contains the\n";
287//     errortext += "following information:\n\n";
288//     if (!debug) errortext += "<pre>\n";
289
290//     char c;
291//     errin.get(c);
292//     while (!errin.eof ()) {
293//       errortext.push_back(c);
294//       errin.get(c);
295//     }
296//     if (!debug) errortext += "</pre>\n";
297//     errin.close();
298
299//   } else {
300    errortext += "Please consult " + error_file + " for more information.\n";
301//   }
302
303  format_error_string (errorpage, errortext, debug);
304}
305
306static void page_errorcgipage (const text_t &gsdlhome, bool debug, text_t &errorpage) {
307
308  text_t errortext = "An error occurred during the construction of the cgi page.\n";
309
310  text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
311  // This is all commented out because I think it's a really bad idea
312  // The error.txt file may be very large, causing out of memory problems and even crashing the machine in extreme
313  //   cases where multiple processes are causing this type of error (e.g. automated processes that try to "hack"
314  //   the Greenstone site by supplying values such as site URLs for the CGI arguments -- this has happened)
315  // Also, the error.txt may contain information that shouldn't be exposed (such as usage or query information)
316  // Maybe this should be configurable through a main.cfg configuration setting, but I don't think it's worth it
317  // The only people who should need the contents of this file should have access to it through the file system
318  // I think you can also view the contents of this file through the statusaction if you have a suitable login
319//   char *efile = error_file.getcstr();
320//   ifstream errin (efile);
321//   delete []efile;
322//   if (errin) {
323//     errortext += "The error log, " + error_file + ", contains the\n";
324//     errortext += "following information:\n\n";
325//     if (!debug) errortext += "<pre>\n";
326
327//     char c;
328//     errin.get(c);
329//     while (!errin.eof ()) {
330//       errortext.push_back(c);
331//       errin.get(c);
332//     }
333//     if (!debug) errortext += "</pre>\n";
334//     errin.close();
335
336//   } else {
337    errortext += "Please consult " + error_file + " for more information.\n";
338//   }
339
340  format_error_string (errorpage, errortext, debug);
341}
342
343static void print_debug_info (receptionist &recpt) {
344
345  outconvertclass text_t2ascii;
346  const recptconf &configinfo = recpt.get_configinfo ();
347  text_t etc_dir = filename_cat (configinfo.gsdlhome, "etc");
348
349  cout << "\n";
350  cout << text_t2ascii
351       << "------------------------------------------------------------\n"
352       << "Configuration and initialization completed successfully.\n"
353       << "  Note that more debug information may be available in the\n"
354       << "  initialization and error log error.txt in " << etc_dir << ".\n"
355       << "------------------------------------------------------------\n\n";
356
357  bool colspec = false;
358  if (configinfo.collection.empty()) {
359    cout << "Receptionist is running in \"general\" (i.e. not \"collection "
360     << "specific\") mode.\n";
361  } else {
362    cout << text_t2ascii
363     << "Receptionist is running in \"collection specific\" mode.\n"
364     << "  collection=" << configinfo.collection << "\n"
365     << "  collection directory=" << configinfo.collectdir << "\n";
366    colspec = true;
367  }
368 
369  cout << text_t2ascii << "  gsdlhome=" << configinfo.gsdlhome << "\n";
370  if (!configinfo.collecthome.empty())
371    cout << text_t2ascii << "  collecthome=" << configinfo.collecthome << "\n";
372  if (!configinfo.dbhome.empty())
373    cout << text_t2ascii << "  dbhome=" << configinfo.dbhome << "\n";
374  cout << text_t2ascii << "  httpprefix=" << configinfo.httpprefix << "\n";
375  cout << text_t2ascii << "  httpweb=" << configinfo.httpweb << "\n";
376  cout << text_t2ascii << "  gwcgi=" << configinfo.gwcgi << "\n\n"
377       << "  Note that unless gwcgi has been set from a configuration\n"
378       << "  file it is dependent on environment variables set by your\n"
379       << "  webserver. Therefore it may not have the same value when run\n"
380       << "  from the command line as it would be when run from your\n"
381       << "  web server.\n";
382  if (configinfo.usecookies)
383    cout << "cookies are enabled\n";
384  else
385    cout << "cookies are disabled\n";
386  if (configinfo.logcgiargs)
387    cout << "logging is enabled\n";
388  else
389    cout << "logging is disabled\n";
390  cout << "------------------------------------------------------------\n\n";
391
392  text_tset::const_iterator this_mfile = configinfo.macrofiles.begin();
393  text_tset::const_iterator end_mfile = configinfo.macrofiles.end();
394  cout << "Macro Files:\n"
395       << "------------\n";
396  text_t mfile;
397  bool found;
398  while (this_mfile != end_mfile) {
399    cout << text_t2ascii << *this_mfile;
400    int spaces = (22 - (*this_mfile).size());
401    if (spaces < 2) spaces = 2;
402    text_t outspaces;
403    for (int i = 0; i < spaces; ++i) outspaces.push_back (' ');
404    cout << text_t2ascii << outspaces;
405
406    found = false;
407    if (colspec) {
408      // collection specific - try collectdir/macros first
409      mfile = filename_cat (configinfo.collectdir, "macros", *this_mfile);
410      if (file_exists (mfile)) {
411    cout << text_t2ascii << "found (" << mfile << ")\n";
412    found = true;
413      }
414    }
415 
416    if (!found) {
417      // try main macro directory
418      mfile = filename_cat (configinfo.gsdlhome, "macros", *this_mfile);
419      if (file_exists (mfile)) {
420    cout << text_t2ascii << "found (" << mfile << ")\n";
421    found = true;
422      }
423    }
424
425    if (!found)
426      cout << text_t2ascii << "NOT FOUND\n";
427
428    ++this_mfile;
429  }
430
431  cout << "------------------------------------------------------------\n\n"
432       << "Collections:\n"
433       << "------------\n"
434       << "  Note that collections will only appear as \"running\" if\n"
435       << "  their build.cfg files exist, are readable, contain a valid\n"
436       << "  builddate field (i.e. > 0), and are in the collection's\n"
437       << "  index directory (i.e. NOT the building directory)\n\n";
438
439  recptprotolistclass *protos = recpt.get_recptprotolist_ptr();
440  recptprotolistclass::iterator rprotolist_here = protos->begin();
441  recptprotolistclass::iterator rprotolist_end = protos->end();
442
443  bool is_z3950 = false;
444  bool found_valid_col = false;
445
446
447  while (rprotolist_here != rprotolist_end) {
448    comerror_t err;   
449    if ((*rprotolist_here).p == NULL) continue;
450    else if (is_z3950==false &&
451         (*rprotolist_here).p->get_protocol_name(err) == "z3950proto") {
452      cout << "\nZ39.50 Servers:   (always public)\n"
453       << "---------------\n";
454      is_z3950=true;
455    }
456
457    text_tarray collist;
458    (*rprotolist_here).p->get_collection_list (collist, err, cerr);
459    if (err == noError) {
460      text_tarray::iterator collist_here = collist.begin();
461      text_tarray::iterator collist_end = collist.end();
462     
463      while (collist_here != collist_end) {
464   
465    cout << text_t2ascii << *collist_here;
466   
467    int spaces = (22 - (*collist_here).size());
468    if (spaces < 2) spaces = 2;
469    text_t outspaces;
470    for (int i = 0; i < spaces; ++i) outspaces.push_back (' ');
471    cout << text_t2ascii << outspaces;
472   
473      ColInfoResponse_t *cinfo = recpt.get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, cerr);
474      if (cinfo != NULL) {
475        if (cinfo->isPublic) cout << "public ";
476        else cout << "private";
477
478        if (cinfo->buildDate > 0) {
479          cout << "   running    ";
480          found_valid_col = true;
481        } else {
482          cout << "   not running";
483        }
484      }
485
486      cout << "\n";
487
488      ++collist_here;
489      }
490    }
491    is_z3950=false;
492    ++rprotolist_here;
493  } // end of while loop
494
495  if (!found_valid_col) {
496    cout << "WARNING: No \"running\" collections were found. You need to\n";
497    cout << "         build one of the above collections\n";
498  }
499
500  cout << "\n------------------------------------------------------------\n";
501  cout << "------------------------------------------------------------\n\n";
502  cout << "receptionist running in command line debug mode\n";
503  cout << "enter cgi arguments as name=value pairs (e.g. 'a=p&p=home'):\n";
504
505}
506
507
508
509
510void add_all_actions(receptionist& recpt, userdbclass* udb, keydbclass* kdb)
511{
512  // the list of actions.
513
514#ifdef GSDL_USE_TIP_ACTION
515  tipaction* atipaction = new tipaction();
516  recpt.add_action (atipaction);
517#endif
518
519#ifdef GSDL_USE_STATUS_ACTION
520  statusaction *astatusaction = new statusaction();
521  astatusaction->set_receptionist (&recpt);
522  recpt.add_action (astatusaction);
523#endif
524
525  pageaction *apageaction = new pageaction();
526  apageaction->set_receptionist (&recpt);
527  recpt.add_action (apageaction);
528
529#ifdef GSDL_USE_PING_ACTION
530  recpt.add_action (new pingaction());
531#endif
532
533  queryaction *aqueryaction = new queryaction();
534  aqueryaction->set_receptionist (&recpt);
535  recpt.add_action (aqueryaction);
536
537#if defined(USE_SQLITE)
538  sqlqueryaction *asqlqueryaction = new sqlqueryaction();
539  asqlqueryaction->set_receptionist (&recpt);
540  recpt.add_action (asqlqueryaction);
541#endif
542
543  documentaction *adocumentaction = new documentaction();
544  adocumentaction->set_receptionist (&recpt);
545  recpt.add_action (adocumentaction);
546
547#ifdef GSDL_USE_USERS_ACTION
548  usersaction *ausersaction = new usersaction();
549  ausersaction->set_userdb(udb);
550  recpt.add_action (ausersaction);
551#endif
552
553#ifdef GSDL_USE_EXTLINK_ACTION
554  extlinkaction *aextlinkaction = new extlinkaction();
555  aextlinkaction->set_receptionist(&recpt);
556  recpt.add_action (aextlinkaction);
557#endif
558       
559#ifdef GSDL_USE_AUTHEN_ACTION
560  authenaction *aauthenaction = new authenaction();
561  aauthenaction->set_userdb(udb);
562  aauthenaction->set_keydb(kdb);
563  aauthenaction->set_receptionist(&recpt);
564  recpt.add_action (aauthenaction);
565#endif
566
567#ifdef GSDL_USE_COLLECTOR_ACTION
568  collectoraction *acollectoraction = new collectoraction();
569  acollectoraction->set_receptionist (&recpt);
570  recpt.add_action(acollectoraction);
571#endif
572
573#ifdef GSDL_USE_DEPOSITOR_ACTION
574  depositoraction *adepositoraction = new depositoraction();
575  adepositoraction->set_receptionist (&recpt);
576  recpt.add_action(adepositoraction);
577#endif
578
579#ifdef GSDL_USE_BROWSE_ACTION
580  browseaction *abrowseaction = new browseaction();
581  abrowseaction->set_receptionist (&recpt);
582  recpt.add_action(abrowseaction);
583#endif
584
585#ifdef GSDL_USE_PHIND_ACTION
586  // Phind uses MPPP,do we also need to check if ENABLE_MGPP is set??
587  phindaction *aphindaction = new phindaction();
588  recpt.add_action(aphindaction);
589#endif
590
591#ifdef GSDL_USE_GTI_ACTION
592  gtiaction *agtiaction = new gtiaction();
593  agtiaction->set_receptionist(&recpt);
594  recpt.add_action(agtiaction);
595#endif
596
597  dynamicclassifieraction *adynamicclassifieraction = new dynamicclassifieraction();
598  adynamicclassifieraction->set_receptionist(&recpt);
599  recpt.add_action(adynamicclassifieraction); 
600
601#if defined(USE_MYSQL) || defined(USE_ACCESS)
602  orderaction *aorderaction = new orderaction();
603  aorderaction->set_receptionist(&recpt);
604  recpt.add_action(aorderaction);
605#endif
606
607  // action that allows collections to be added, released etc.  when server
608  // is persistent (e.g. fastcgi or when Greenstone is configured as an
609  // Apache module).  Presumably this includes Windows server.exe as well
610
611  // Want to always include it in list of actions even if compiling
612  // Greenstone to be used in a non-persistent way (e.g. library.cgi).
613  // This is so the e-variable that is formed is consistent between the
614  // persisent executable and the non-persistent executable
615  //
616
617  configaction *aconfigaction = new configaction();
618  aconfigaction->set_receptionist(&recpt);
619  recpt.add_action(aconfigaction);
620}
621
622
623
624void add_all_browsers(receptionist& recpt)
625{
626  // list of browsers
627  vlistbrowserclass *avlistbrowserclass = new vlistbrowserclass();
628  avlistbrowserclass->set_receptionist(&recpt);
629  recpt.add_browser (avlistbrowserclass);
630  recpt.setdefaultbrowser ("VList");
631
632  hlistbrowserclass *ahlistbrowserclass = new hlistbrowserclass();
633  ahlistbrowserclass->set_receptionist(&recpt);
634  recpt.add_browser (ahlistbrowserclass);
635
636#ifdef GSDL_USE_DATELIST_BROWSER
637  datelistbrowserclass *adatelistbrowserclass = new datelistbrowserclass();
638  recpt.add_browser (adatelistbrowserclass);
639#endif
640
641  invbrowserclass *ainvbrowserclass = new invbrowserclass();
642  recpt.add_browser (ainvbrowserclass);
643
644#ifdef GSDL_USE_PAGED_BROWSER
645  pagedbrowserclass *apagedbrowserclass = new pagedbrowserclass();
646  recpt.add_browser (apagedbrowserclass);
647#endif
648
649#ifdef GSDL_USE_HTML_BROWSER
650  htmlbrowserclass *ahtmlbrowserclass = new htmlbrowserclass();
651  recpt.add_browser (ahtmlbrowserclass);
652#endif
653
654#ifdef GSDL_USE_PHIND_BROWSER
655  phindbrowserclass *aphindbrowserclass = new phindbrowserclass();;
656  recpt.add_browser (aphindbrowserclass);
657#endif
658}
659
660
661// cgiwrapper does everything necessary to output a page
662// using the cgi protocol. If this is being run for a particular
663// collection then "collection" should be set, otherwise it
664// should equal "".
665void cgiwrapper (receptionist &recpt, text_t collection) {
666  int numrequests = 0;
667  bool debug = false;
668  const recptconf &configinfo = recpt.get_configinfo ();
669
670  // find out whether this is being run as a cgi-script
671  // or a fastcgi script
672#ifdef USE_FASTCGI
673  fcgistreambuf outbuf;
674  int isfastcgi = !FCGX_IsCGI();
675  FCGX_Stream *fcgiin, *fcgiout, *fcgierr;
676  FCGX_ParamArray fcgienvp;
677#else
678  int isfastcgi = 0;
679#endif
680
681  // we need gsdlhome to do fileupload stuff, so moved this configure stuff before the get argstr stuff
682  // init stuff - we can't output error pages directly with
683  // fastcgi so the pages are stored until we can output them
684  text_t errorpage;
685  outconvertclass text_t2ascii;
686
687  // set defaults
688  int maxrequests = 10000;
689  recpt.configure ("collection", collection);
690  char *script_name = getenv("SCRIPT_NAME");
691  if (script_name != NULL) recpt.configure("gwcgi", script_name);
692  else recpt.configure("gwcgi", "/gsdl");
693
694  // read in the configuration files.
695  text_t gsdlhome;
696  text_t collecthome;
697  configurator gsdlconfigurator(&recpt);
698  if (!site_cfg_read (gsdlconfigurator, gsdlhome, collecthome, maxrequests)) {
699    // couldn't find the site configuration file
700    page_errorsitecfg (errorpage, debug, 0);
701  } else if (gsdlhome.empty()) {
702    // no gsdlhome in gsdlsite.cfg
703    page_errorsitecfg (errorpage, debug, 1);
704  } else if (!directory_exists(gsdlhome)) {
705    // gsdlhome not a valid directory
706    page_errorsitecfg (errorpage, debug, 1);
707  } else if (!main_cfg_read (recpt, gsdlhome, collecthome, collection)) {
708    // couldn't find the main configuration file
709    page_errormaincfg (gsdlhome, collection, debug, errorpage);
710  } else  if (configinfo.collectinfo.empty() && false) { // commented out for corba
711    // don't have any collections
712    page_errorcollect (gsdlhome, errorpage, debug);
713  }
714
715  // set up the httpweb variable if it hasn't been defined yet
716  if (configinfo.httpweb.empty()) {
717    recpt.configure("httpweb", configinfo.httpprefix+"/web");
718  }
719 
720  // get the query string if it is not being run as a fastcgi
721  // script
722  text_t argstr = g_EmptyText;
723  fileupload_tmap fileuploads;
724  cgiargsclass args;
725  char *aURIStr;
726  if (!isfastcgi) {
727    char *request_method_str = getenv("REQUEST_METHOD");
728    char *content_length_str = getenv("CONTENT_LENGTH");
729    if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
730    content_length_str != NULL)  {
731      // POST form data
732      long content_length = (content_length_str ? atoi(content_length_str) : 0);
733      if (content_length > 0) {
734#ifdef __WIN32__
735    // On Windows it is important that standard input be read in binary
736    // mode, otherwise end of line "<CR><LF>" is turned into <LF> only
737    // which breaks the MIME standard (and our parsing code!)
738
739    int result = _setmode( _fileno( stdin ), _O_BINARY );
740    if( result == -1 ) {
741      cerr << "Warning: Failed to set standard input to binary mode." << endl;
742      cerr << "         Parsing of multi-part MIME will most likely fail" << endl;
743    }
744#endif
745
746    long length = content_length;
747    unsigned char * buffer = new unsigned char[content_length];
748
749    int chars_read = fread(buffer,1,content_length,stdin);
750
751    if (chars_read != content_length) {
752      cerr << "Warning: mismatch between CONTENT_LENGTH and data read from standard in" << endl;
753    }
754
755    argstr.setcarr((char *)buffer, content_length);     
756
757    text_t content_type;
758    char *content_type_str = getenv("CONTENT_TYPE");
759    if (content_type_str) content_type = content_type_str;
760    argstr = parse_post_data(content_type, argstr, fileuploads, gsdlhome);
761      }
762    } else {
763      aURIStr = getenv("QUERY_STRING");
764      if ((request_method_str != NULL && strcmp(request_method_str, "GET") == 0)
765      || aURIStr != NULL) {
766    // GET form data
767    if (aURIStr != NULL) argstr = aURIStr;
768      } else {
769    // debugging from command line
770    debug = true;
771      }
772    }
773  }
774
775  if (debug) {
776    cout << "Configuring Greenstone...\n";
777    cout << flush;
778  }
779
780
781  if (errorpage.empty()) {
782
783    // initialise the library software
784    if (debug) {
785      cout << "Initializing...\n";
786      cout << flush;
787    }
788
789    text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
790    char *eout = error_file.getcstr();
791    ofstream errout (eout, ios::app);
792    delete []eout;
793    if (!recpt.init(errout)) {
794      // an error occurred during the initialisation
795      errout.close();
796      page_errorinit(gsdlhome, debug, errorpage);
797    }
798    errout.close();
799  }
800
801  if (debug && errorpage.empty()) {
802    // get query string from command line
803    print_debug_info (recpt);
804    char cinURIStr[1024];
805    cin.get(cinURIStr, 1024);
806    argstr = cinURIStr;
807  }
808
809  // cgi scripts only deal with one request
810  if (!isfastcgi) maxrequests = 1;
811
812  // Page-request loop. If this is not being run as a fastcgi
813  // process then only one request will be processed and then
814  // the process will exit.
815  while (numrequests < maxrequests) {
816#ifdef USE_FASTCGI
817    if (isfastcgi) {
818      if (FCGX_Accept(&fcgiin, &fcgiout, &fcgierr, &fcgienvp) < 0) break;
819
820      char *request_method_str = FCGX_GetParam ("REQUEST_METHOD", fcgienvp);
821      char *content_length_str = FCGX_GetParam ("CONTENT_LENGTH", fcgienvp);
822
823      if (request_method_str != NULL && strcmp(request_method_str, "POST") == 0 &&
824      content_length_str != NULL)  {
825    // POST form data
826    int content_length = text_t(content_length_str).getint();
827    if (content_length > 0) {
828      argstr.clear();
829      int c;
830      do {
831        c = FCGX_GetChar (fcgiin);
832        if (c < 0) break;
833        argstr.push_back (c);
834        --content_length;
835      } while (content_length > 0);
836    }
837
838      } else {
839    // GET form data
840    aURIStr = FCGX_GetParam("QUERY_STRING", fcgienvp);
841    if (aURIStr != NULL) argstr = aURIStr;
842    else argstr = g_EmptyText;
843      }
844    }
845#endif
846
847    // get output streams ready
848#ifdef USE_FASTCGI
849    outbuf.fcgisbreset ();
850    if (isfastcgi) outbuf.set_fcgx_stream (fcgiout);
851    else outbuf.set_other_ostream (&cout);
852    ostream pageout (&outbuf);
853#else
854#define pageout cout
855#endif
856
857    // if using fastcgi we'll load environment into a map,
858    // otherwise simply pass empty map (can't get environment
859    // variables using getenv() while using FCGX versions
860    // of fastcgi - at least I can't ;-) - Stefan)
861    text_tmap fastcgienv;
862#ifdef USE_FASTCGI
863    if (isfastcgi) {
864      for(; *fcgienvp != NULL; ++fcgienvp) {
865    text_t fvalue = *fcgienvp;
866    text_t::const_iterator begin = fvalue.begin();
867    text_t::const_iterator end = fvalue.end();
868    text_t::const_iterator equals_sign = findchar (begin, end, '=');
869    if (equals_sign != end)
870      fastcgienv[substr(begin, equals_sign)] = substr(equals_sign+1, end);
871      }
872    }
873#endif
874
875    // temporarily need to configure gwcgi here when using fastcgi as I can't
876    // get it to pass the SCRIPT_NAME environment variable to the initial
877    // environment (if anyone can work out how to do this using the apache
878    // server, let me know). Note that this overrides the gwcgi field in
879    // site.cfg (which it shouldn't do) but I can't at present set gwcgi
880    // from site.cfg as I have old receptionists laying around that wouldn't
881    // appreciate it. The following 5 lines of code should be deleted once
882    // I either a: get the server to pass SCRIPT_NAME at initialization
883    // time or b: convert all the collections using old receptionists over
884    // to this version and uncomment gwcgi in the site.cfg file -- Stefan.
885#ifdef USE_FASTCGI
886    if (isfastcgi) {
887      recpt.configure("gwcgi", fastcgienv["SCRIPT_NAME"]);
888    }
889#endif
890
891
892    // if there has been no error so far, perform the production of the
893    // output page
894    if (errorpage.empty()) {
895      text_t error_file = filename_cat (gsdlhome, "etc", "error.txt");
896      char *eout = error_file.getcstr();
897      ofstream errout (eout, ios::app);
898      delete []eout;
899
900#if defined(__WIN32__) && defined(GSDL_USE_IOS_H)
901      // old Windows compilers (VC++4.2)
902      cerr = errout;
903#else
904      // can't do this anymore according to c++ standard...
905      // cerr = errout;
906      // ... but can do this instead
907      streambuf* errbuf = cerr.rdbuf(errout.rdbuf());
908#endif
909
910      // parse the cgi arguments and produce the resulting page if there
911      // has been no errors so far
912      if (!recpt.parse_cgi_args (argstr, fileuploads, args, errout, fastcgienv)) {
913    errout.close ();
914    page_errorparseargs(gsdlhome, debug, errorpage);
915      } else {
916    // produce the output page
917
918    if (!recpt.produce_cgi_page (args, pageout, errout, fastcgienv)) {
919      errout.close ();
920      page_errorcgipage(gsdlhome, debug, errorpage);
921    }
922    recpt.log_cgi_args (args, errout, fastcgienv);
923    errout.close ();
924      }
925
926#if !defined(__WIN32__) || !defined(GSDL_USE_IOS_H)
927    // restore the cerr buffer
928    cerr.rdbuf(errbuf);
929#endif
930    }
931    // clean up any files that were uploaded
932    fileupload_tmap::const_iterator this_file = fileuploads.begin();
933    fileupload_tmap::const_iterator end_file = fileuploads.end();
934    while (this_file != end_file)
935      {
936    if (file_exists((*this_file).second.tmp_name))
937      {
938        char *thefile = (*this_file).second.tmp_name.getcstr();
939        unlink(thefile);
940        delete [] thefile;
941      }
942    ++this_file;
943      }
944   
945    // there was an error, output the error page
946    if (!errorpage.empty()) {
947      pageout << text_t2ascii << errorpage;
948      errorpage.clear();
949      numrequests = maxrequests; // make this the last page
950    }
951    pageout << flush;
952   
953    // finish with the output streams
954#ifdef USE_FASTCGI
955    if (isfastcgi) FCGX_Finish();
956#endif
957
958    ++numrequests;
959  }
960
961  return;
962}
Note: See TracBrowser for help on using the browser.