source: trunk/gsdl/src/recpt/collectoraction.cpp@ 7371

Last change on this file since 7371 was 7371, checked in by mdewsnip, 20 years ago

(Human Info) Allow some actions to be easily switched off.

  • Property svn:keywords set to Author Date Id Revision
File size: 67.2 KB
Line 
1/**********************************************************************
2 *
3 * collectoraction.cpp --
4 * Copyright (C) 2000 The New Zealand Digital Library Project
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "gsdl_modules_cfg.h"
27#ifdef GSDL_USE_COLLECTOR_ACTION
28
29// note that the collectoraction relies on having direct access to a
30// collections configuration file. this breaks the separation between
31// receptionist and collection server and so is not suitable (at least
32// in its current form) for use when collection servers are separate
33// from the receptionist (e.g. when using the CORBA protocol).
34
35// following line required to get fstream.filedesc() on darwin (Mac OS X)
36#define _STREAM_COMPAT 1
37// required for utsname on solaris???
38#define _XOPEN_SOURCE 1
39#define _XOPEN_SOURCE_EXTENDED 1
40
41#include "collectoraction.h"
42#include "OIDtools.h"
43#include "fileutil.h"
44#include "cfgread.h"
45#include "gsdltools.h"
46#include "gsdltimes.h"
47#include "nullproto.h"
48#include "argdb.h"
49#include "cgiutils.h"
50#include <stdio.h>
51#include <fcntl.h>
52
53#if !defined (__WIN32__)
54#include <sys/utsname.h>
55#include <unistd.h>
56#endif
57
58collectoraction::collectoraction () {
59
60 recpt = NULL;
61 disabled = true;
62 do_mkcol = false;
63 badsources = false;
64 failedsources.erase(failedsources.begin(), failedsources.end());
65 gsdlosc = NULL;
66 gsdlhomec = NULL;
67 pathc = NULL;
68
69 cgiarginfo arg_ainfo;
70 arg_ainfo.shortname = "a";
71 arg_ainfo.longname = "action";
72 arg_ainfo.multiplechar = true;
73 arg_ainfo.defaultstatus = cgiarginfo::weak;
74 arg_ainfo.argdefault = "collector";
75 arg_ainfo.savedarginfo = cgiarginfo::must;
76 argsinfo.addarginfo (NULL, arg_ainfo);
77
78 arg_ainfo.shortname = "p";
79 arg_ainfo.longname = "page";
80 arg_ainfo.multiplechar = true;
81 arg_ainfo.defaultstatus = cgiarginfo::weak;
82 arg_ainfo.argdefault = "intro";
83 arg_ainfo.savedarginfo = cgiarginfo::must;
84 argsinfo.addarginfo (NULL, arg_ainfo);
85
86 // temporary directory name for this collector
87 // session
88 arg_ainfo.shortname = "bc1tmp";
89 arg_ainfo.longname = "collector specific";
90 arg_ainfo.multiplechar = true;
91 arg_ainfo.defaultstatus = cgiarginfo::weak;
92 arg_ainfo.argdefault = "";
93 arg_ainfo.savedarginfo = cgiarginfo::must;
94 argsinfo.addarginfo (NULL, arg_ainfo);
95
96 arg_ainfo.shortname = "bc1fullname";
97 arg_ainfo.longname = "collector specific";
98 arg_ainfo.multiplechar = true;
99 arg_ainfo.defaultstatus = cgiarginfo::weak;
100 arg_ainfo.argdefault = "";
101 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
102 argsinfo.addarginfo (NULL, arg_ainfo);
103
104 arg_ainfo.shortname = "bc1dirname";
105 arg_ainfo.longname = "collector specific";
106 arg_ainfo.multiplechar = true;
107 arg_ainfo.defaultstatus = cgiarginfo::weak;
108 arg_ainfo.argdefault = "";
109 arg_ainfo.savedarginfo = cgiarginfo::must;
110 argsinfo.addarginfo (NULL, arg_ainfo);
111
112 arg_ainfo.shortname = "bc1contactemail";
113 arg_ainfo.longname = "collector specific";
114 arg_ainfo.multiplechar = true;
115 arg_ainfo.defaultstatus = cgiarginfo::weak;
116 arg_ainfo.argdefault = "";
117 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
118 argsinfo.addarginfo (NULL, arg_ainfo);
119
120 arg_ainfo.shortname = "bc1aboutdesc";
121 arg_ainfo.longname = "collector specific";
122 arg_ainfo.multiplechar = true;
123 arg_ainfo.defaultstatus = cgiarginfo::weak;
124 arg_ainfo.argdefault = "";
125 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
126 argsinfo.addarginfo (NULL, arg_ainfo);
127
128 arg_ainfo.shortname = "bc1clone";
129 arg_ainfo.longname = "collector specific";
130 arg_ainfo.multiplechar = false;
131 arg_ainfo.defaultstatus = cgiarginfo::weak;
132 arg_ainfo.argdefault = "0";
133 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
134 argsinfo.addarginfo (NULL, arg_ainfo);
135
136 arg_ainfo.shortname = "bc1clonecol";
137 arg_ainfo.longname = "collector specific";
138 arg_ainfo.multiplechar = true;
139 arg_ainfo.defaultstatus = cgiarginfo::weak;
140 arg_ainfo.argdefault = "";
141 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
142 argsinfo.addarginfo (NULL, arg_ainfo);
143
144 // set when cloning option has changed
145 arg_ainfo.shortname = "bc1clonechanged";
146 arg_ainfo.longname = "collector specific";
147 arg_ainfo.multiplechar = false;
148 arg_ainfo.defaultstatus = cgiarginfo::weak;
149 arg_ainfo.argdefault = "0";
150 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
151 argsinfo.addarginfo (NULL, arg_ainfo);
152
153 // only set when one of the fields was changed in
154 // the "collection info" page
155 arg_ainfo.shortname = "bc1infochanged";
156 arg_ainfo.longname = "collector specific";
157 arg_ainfo.multiplechar = false;
158 arg_ainfo.defaultstatus = cgiarginfo::weak;
159 arg_ainfo.argdefault = "0";
160 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
161 argsinfo.addarginfo (NULL, arg_ainfo);
162
163 // only set when cfg file is altered from within
164 // "configure collection" page
165 arg_ainfo.shortname = "bc1cfgchanged";
166 arg_ainfo.longname = "collector specific";
167 arg_ainfo.multiplechar = false;
168 arg_ainfo.defaultstatus = cgiarginfo::weak;
169 arg_ainfo.argdefault = "0";
170 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
171 argsinfo.addarginfo (NULL, arg_ainfo);
172
173 arg_ainfo.shortname = "cfgfile";
174 arg_ainfo.longname = "configuration file contents";
175 arg_ainfo.multiplechar = true;
176 arg_ainfo.defaultstatus = cgiarginfo::weak;
177 arg_ainfo.argdefault = "";
178 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
179 argsinfo.addarginfo (NULL, arg_ainfo);
180
181 arg_ainfo.shortname = "bc1dodelete";
182 arg_ainfo.longname = "collector specific";
183 arg_ainfo.multiplechar = false;
184 arg_ainfo.defaultstatus = cgiarginfo::weak;
185 arg_ainfo.argdefault = "0";
186 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
187 argsinfo.addarginfo (NULL, arg_ainfo);
188
189 // will be set if we arrived at the "configure collection" page
190 // via the "changing an existing collection" page
191 arg_ainfo.shortname = "bc1econf";
192 arg_ainfo.longname = "collector specific";
193 arg_ainfo.multiplechar = false;
194 arg_ainfo.defaultstatus = cgiarginfo::weak;
195 arg_ainfo.argdefault = "0";
196 arg_ainfo.savedarginfo = cgiarginfo::must;
197 argsinfo.addarginfo (NULL, arg_ainfo);
198
199 // will be set if we arrived at the "source data" page
200 // via the "changing an existing collection" page
201 arg_ainfo.shortname = "bc1esrce";
202 arg_ainfo.longname = "collector specific";
203 arg_ainfo.multiplechar = false;
204 arg_ainfo.defaultstatus = cgiarginfo::weak;
205 arg_ainfo.argdefault = "0";
206 arg_ainfo.savedarginfo = cgiarginfo::must;
207 argsinfo.addarginfo (NULL, arg_ainfo);
208
209 arg_ainfo.shortname = "bc1inputnum";
210 arg_ainfo.longname = "collector specific";
211 arg_ainfo.multiplechar = true;
212 arg_ainfo.defaultstatus = cgiarginfo::weak;
213 arg_ainfo.argdefault = "3";
214 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
215 argsinfo.addarginfo (NULL, arg_ainfo);
216
217 arg_ainfo.shortname = "bc1input";
218 arg_ainfo.longname = "collector specific";
219 arg_ainfo.multiplechar = true;
220 arg_ainfo.multiplevalue = true;
221 arg_ainfo.defaultstatus = cgiarginfo::weak;
222 arg_ainfo.argdefault = "";
223 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
224 argsinfo.addarginfo (NULL, arg_ainfo);
225
226 arg_ainfo.shortname = "bc1inputtype";
227 arg_ainfo.longname = "collector specific";
228 arg_ainfo.multiplechar = true;
229 arg_ainfo.multiplevalue = true;
230 arg_ainfo.defaultstatus = cgiarginfo::weak;
231 arg_ainfo.argdefault = "";
232 arg_ainfo.savedarginfo = cgiarginfo::mustnot; // saved on disk
233 argsinfo.addarginfo (NULL, arg_ainfo);
234
235 // will be set when we've just come from the "source data" page
236 arg_ainfo.shortname = "bc1fromsrce";
237 arg_ainfo.longname = "collector specific";
238 arg_ainfo.multiplechar = false;
239 arg_ainfo.multiplevalue = false;
240 arg_ainfo.defaultstatus = cgiarginfo::weak;
241 arg_ainfo.argdefault = "0";
242 arg_ainfo.savedarginfo = cgiarginfo::mustnot;
243 argsinfo.addarginfo (NULL, arg_ainfo);
244}
245
246collectoraction::~collectoraction () {
247 if (gsdlosc != NULL) delete gsdlosc;
248 if (gsdlhomec != NULL) delete gsdlhomec;
249 if (pathc != NULL) delete pathc;
250}
251
252
253void collectoraction::configure (const text_t &key, const text_tarray &cfgline) {
254 if ((key == "collector") && (cfgline.size() == 1) &&
255 (cfgline[0] == "true" || cfgline[0] == "on" || cfgline[0] == "enabled")) {
256 disabled = false;
257 } else {
258 // call the parent class to deal with the things which
259 // are not dealt with here
260 action::configure (key, cfgline);
261 }
262}
263
264
265bool collectoraction::init (ostream & /*logout*/) {
266
267 // set up GSDLOS, GSDLHOME and PATH environment variables
268 text_t gsdlos, path;
269 unsigned int path_separator = ':';
270#if defined (__WIN32__)
271 gsdlos = "windows";
272 path_separator = ';';
273
274 path = filename_cat (gsdlhome, "bin", "windows", "perl", "bin;");
275
276#else
277 struct utsname *buf = new struct utsname();
278 int i = uname (buf);
279 if (i == -1) gsdlos = "linux"; // uname failed
280 else gsdlos.setcstr (buf->sysname);
281 delete buf;
282 lc (gsdlos);
283#endif
284
285 pathc = getenv ("PATH");
286 path += filename_cat (gsdlhome, "bin", gsdlos);
287 path.push_back (path_separator);
288 path += filename_cat (gsdlhome, "bin", "script");
289 if (pathc != NULL) {
290 path.push_back (path_separator);
291 path += pathc;
292 }
293 path = "PATH=" + path;
294
295 gsdlos = "GSDLOS=" + gsdlos;
296 text_t setgsdlhome = "GSDLHOME=" + gsdlhome;
297
298 // these will be cleaned up in the destructor
299 gsdlosc = gsdlos.getcstr();
300 gsdlhomec = setgsdlhome.getcstr();
301 pathc = path.getcstr();
302
303 putenv (gsdlosc);
304 putenv (gsdlhomec);
305 putenv (pathc);
306
307 return true;
308}
309
310bool collectoraction::check_cgiargs (cgiargsinfoclass &argsinfo, cgiargsclass &args,
311 recptprotolistclass * /*protos*/, ostream &logout) {
312
313 text_t &current_page = args["p"];
314
315 // note that the "bildstatus" and "bildframe1" pages don't actually do anything
316 // functional so we don't need to worry about authenticating them (it's the
317 // underlying "bild" page that does the building (and creates the frameset))
318 // This helps us overcome a bit of a problem we have with multiple pages trying
319 // to read from the key.db database at the same time.
320 if (current_page != "intro" && current_page != "bildstatus" && current_page != "bildframe1") {
321 // authenticate the user if authentication is available
322 args["uan"] = 1;
323 args["ug"] = "colbuilder";
324 }
325
326 if (current_page == "new" || current_page == "existing") {
327
328 // assign (and create) a temporary directory
329 if (assign_tmpname (args, logout)==false) {
330 // there was an error creating the tmp dir
331 message="tmpfail";
332 return true; // true because we could still parse the arguments
333 }
334
335 // clean up any old builds left laying about in the tmp directory
336 // (note that it's possible this could take some time if there's a huge
337 // partially built collection laying about so we'll make it an asynchronous
338 // system call)
339 gsdl_system ("perl -S cleantmp.pl", false, logout);
340 }
341
342 if (current_page != "intro" && current_page != "bildstatus" &&
343 current_page != "bildframe1" && current_page != "new") {
344 // update arguments that were saved to the harddrive
345 text_tmap saved_args;
346 saved_args["bc1fullname"] = "";
347 saved_args["bc1contactemail"] = "";
348 saved_args["bc1aboutdesc"] = "";
349 saved_args["bc1clone"] = "";
350 saved_args["bc1clonecol"] = "";
351 saved_args["bc1inputnum"] = "";
352 saved_args["bc1input"] = "";
353 saved_args["bc1inputtype"] = "";
354
355 // update the argdb database with any arguments that were set
356 // by previous page
357 text_tmap::iterator here = saved_args.begin();
358 text_tmap::iterator end = saved_args.end();
359 while (here != end) {
360 if (args.lookupcgiarg((*here).first).source != cgiarg_t::default_arg) {
361 (*here).second = args[(*here).first];
362 }
363 here++;
364 }
365
366 text_t argfile = filename_cat(gsdlhome, "tmp", args["bc1tmp"], "argdb.db");
367 argdb *args_on_disk = new argdb(argfile);
368 if (!args_on_disk->update_args(saved_args)) {
369 // error
370 logout << "collectoraction: argdb::update_args failed (" << argfile << ")\n";
371 }
372
373 // update args from argdb
374 saved_args.erase(saved_args.begin(), saved_args.end());
375 if (!args_on_disk->get_args(saved_args)) {
376 // error
377 logout << "collectoraction: argdb::get_args failed (" << argfile << ")\n";
378 }
379 delete args_on_disk;
380 here = saved_args.begin();
381 end = saved_args.end();
382 while (here != end) {
383 if (!(*here).second.empty()) {
384 args[(*here).first] = (*here).second;
385 }
386 here ++;
387 }
388 }
389
390 if (args["bc1infochanged"] == "1") {
391
392 if (args["bc1dirname"].empty()) {
393 // we've just come from the "collection information" page for the
394 // first time so we'll need to create the collection with mkcol.pl
395 // and set up bc1dirname - we do this part here instead of in do_action
396 // because the bc1dirname argument must be set to its new value before
397 // the compressedoptions macros are set.
398 args["bc1dirname"] = get_directory_name (args["bc1fullname"]);
399
400 text_t createfile = filename_cat (gsdlhome, "tmp", args["bc1tmp"], ".create");
401 if (!file_exists (createfile)) {
402 // we could do the mkcol.pl here but I guess it's nicer to do it in do_action()
403 do_mkcol = true;
404 } else {
405 // .create file already exists but bc1dirname wasn't set ... this should only be
406 // able to occur when the "reload" (and possibly the "back" and "forward" buttons)
407 // have been used to get us here.
408 // we'll check that the bc1dirname directory exists (in case of the unlikely
409 // possibility that get_directory_name returned a different value this time
410 // than it did originally).
411 text_t coldir = filename_cat (get_collectdir(args), args["bc1dirname"]);
412 if (!directory_exists (coldir)) {
413 message = "reloaderror";
414 return true;
415 }
416 }
417 } else {
418 // "collection information" has been changed after collection already exists
419 // so we'll need to update the cfg file.
420 update_cfgfile_partial (args, false, logout);
421 }
422 }
423
424 if (args["bc1cfgchanged"] == "1") {
425 // configuration file has been changed from the "configure collection"
426 // page. we need to update the file on disk and catch bc1 arguments up
427 // with changes.
428 update_cfgfile_complete (args, logout);
429 }
430
431 if (args["bc1clonechanged"] == "1") {
432 // cloning option has been changed on "source data" page. if it was turned
433 // on we want to create a new collect.cfg file using the bc1clonecol cfg file
434 // as a model (we'll save the old file as collect.cfg.org). if cloning was
435 // turned off we'll revert to using the collect.cfg.org file (which will need
436 // updating in case the bc1 arguments have been altered since cloning was
437 // turned on).
438 update_cfgfile_clone (args, logout);
439
440 // if cloning has just been turned on we'll also copy the rest of the files
441 // (excluding collect.cfg which we've already done) from the cloned collections
442 // etc directory to the new collection.
443 if (args["bc1clone"] == "1") {
444 text_t clone_etc = filename_cat(gsdlhome, "collect", args["bc1clonecol"], "etc");
445 text_t new_etc = filename_cat(get_collectdir(args), args["bc1dirname"], "etc");
446 text_tarray files;
447
448 if (read_dir (clone_etc, files)) {
449 text_tarray::const_iterator here = files.begin();
450 text_tarray::const_iterator end = files.end();
451 while (here != end) {
452 if (*here != "collect.cfg" && *here != "collect.cfg.org") {
453 file_copy (filename_cat(clone_etc, *here), filename_cat(new_etc, *here));
454 }
455 here ++;
456 }
457 } else {
458 outconvertclass text_t2ascii;
459 logout <<text_t2ascii << "collectoraction::check_cgiargs couldn't read from "
460 << clone_etc << " directory\n";
461 }
462 }
463 }
464
465 if (current_page == "bildstatus" || current_page == "bildcancel") {
466 // if .final file exists then build has finished
467 text_t fbld = filename_cat (gsdlhome, "tmp", args["bc1tmp"], args["bc1dirname"] + ".bld.final");
468 if (file_exists (fbld)) {
469 char *fbldc = fbld.getcstr();
470 ifstream fbld_in (fbldc);
471 if (fbld_in) {
472 failcode = fbld_in.get();
473 fbld_in.close();
474 if (failcode == '0') {
475 // success - we need to create and configure a collection server for the
476 // newly built collection (for fastcgi and local library where
477 // initialization isn't going to be redone when the user clicks the
478 // "view your new collection" button
479 create_colserver (args["bc1dirname"], logout);
480 current_page = "bilddone";
481 }
482 else current_page = "bildfail";
483 } else {
484 // assume build failed (we shouldn't get here though ... right?)
485 current_page = "bildfail";
486 }
487 delete fbldc;
488 }
489 }
490
491 if (args["bc1fromsrce"] == "1") {
492
493 // we've just come from the "source data" page so we need to check that
494 // input sources are valid
495 if (!check_sources(args, logout)) {
496 args["p"] = "srce";
497 }
498 }
499
500 return true;
501}
502
503void collectoraction::update_cfgfile_clone (cgiargsclass &args, ostream &logout) {
504
505 text_t tmpdir = filename_cat(gsdlhome, "tmp", args["bc1tmp"]);
506 text_t cfgfile = filename_cat(tmpdir, args["bc1dirname"], "etc", "collect.cfg");
507 if (!file_exists (cfgfile)) {
508 message = "tmpfail";
509 return;
510 }
511
512 text_t cfgfile_org = filename_cat (tmpdir, "collect.cfg.org");
513
514 if (args["bc1clone"] == "1") {
515 // cloning was turned on
516
517 text_t cfgfile_clone = filename_cat(gsdlhome, "collect", args["bc1clonecol"], "etc", "collect.cfg");
518 if (file_exists (cfgfile_clone)) {
519 // if .org file doesn't exist already create it
520 if (!file_exists (cfgfile_org)) {
521 if (!file_copy (cfgfile, cfgfile_org)) {
522 message = "tmpfail";
523 return;
524 }
525 }
526 // copy clone collections cfg file to new collection
527 if (!file_copy (cfgfile_clone, cfgfile)) {
528 message = "tmpfail";
529 return;
530 }
531 // update the new cfg file
532 update_cfgfile_partial (args, true, logout);
533
534 } else {
535 // can't clone non-existant or read-protected collection
536 message = "clonefail";
537 }
538
539 } else {
540 // cloning has been turned off having been on at some point. the .org file
541 // should exist, if it doesn't we'll bail out and leave the user with the
542 // cloned copy
543 if (file_exists (cfgfile_org)) {
544 // copy original back again and update it with any recent changes
545 if (file_copy (cfgfile_org, cfgfile)) {
546 update_cfgfile_partial (args, false, logout);
547 } else {
548 message = "tmpfail";
549 }
550 }
551 }
552}
553
554// update configuration file on disk to match bc1 arguments
555// there's a special case if the clone option is true as certain parts of a
556// config file should not be cloned (e.g. the iconcollection stuff)
557void collectoraction::update_cfgfile_partial (cgiargsclass &args, bool clone, ostream &logout) {
558
559 text_t cfgfile = filename_cat(get_collectdir(args), args["bc1dirname"], "etc", "collect.cfg");
560 char *cfgfilec = cfgfile.getcstr();
561
562#if defined (__WIN32__)
563 // make sure collect.cfg isn't read-only
564 _chmod (cfgfilec, _S_IREAD | _S_IWRITE);
565#endif
566
567 vector<text_tarray> cfgarray;
568
569 // read in cfg file
570 ifstream cfg_in (cfgfilec);
571 if (cfg_in) {
572 text_tarray cfgline;
573 while (read_cfg_line(cfg_in, cfgline) >= 0) {
574 if (cfgline.size () >= 2) {
575 if (cfgline[0] == "creator" || cfgline[0] == "maintainer") {
576 cfgline[1] = args["bc1contactemail"];
577 } else if (cfgline[0] == "collectionmeta") {
578 if (cfgline[1] == "collectionname") {
579 cfgline[2] = args["bc1fullname"];
580 } else if (cfgline[1] == "collectionextra") {
581 cfgline[2] = carriage_replace (args["bc1aboutdesc"], 0);
582 } else if (clone && (cfgline[1] == "iconcollection" ||
583 cfgline[1] == "iconcollectionsmall")) {
584 cfgline[2] = "";
585 }
586 }
587 }
588 cfgarray.push_back (cfgline);
589 }
590 cfg_in.close();
591
592 // now write cfg file back out
593 int fd=open(cfgfilec, O_WRONLY | O_CREAT | O_TRUNC
594#if defined(__WIN32__)
595 | O_BINARY
596#endif
597 );
598
599 if (fd != -1) {
600 // lock the file
601 int lock_val = 1;
602 GSDL_LOCK_FILE (fd);
603 if (lock_val != 0) {
604 logout << "Error: Couldn't lock file " << cfgfilec << "\n";
605 close(fd);
606 message = "tmpfail";
607
608 } else {
609
610 vector<text_tarray>::const_iterator this_line = cfgarray.begin();
611 vector<text_tarray>::const_iterator end_line = cfgarray.end();
612 while (this_line != end_line) {
613 write_cfg_line (fd, *this_line);
614 this_line ++;
615 }
616 GSDL_UNLOCK_FILE (fd);
617 close(fd);
618 }
619
620 } else {
621 logout << "collectoraction::update_cfgfile_partial: unable to open "
622 << cfgfilec << " for output\n";
623 message = "tmpfail";
624 }
625
626 } else {
627 logout << "collectoraction::update_cfgfile_partial: unable to open "
628 << cfgfilec << " for input\n";
629 message = "tmpfail";
630 }
631
632 delete cfgfilec;
633}
634
635// replace configuration file on disk with that in the cfgfile argument and
636// catch other bc1 arguments up with those the new cfgfile contains
637void collectoraction::update_cfgfile_complete (cgiargsclass &args, ostream &logout) {
638
639 text_t cfgfile = filename_cat(get_collectdir(args), args["bc1dirname"], "etc", "collect.cfg");
640 char *cfgfilec = cfgfile.getcstr();
641
642#ifdef __WIN32__
643 // make sure collect.cfg isn't read-only
644 _chmod (cfgfilec, _S_IREAD | _S_IWRITE);
645#endif
646
647 int fd=open(cfgfilec, O_WRONLY | O_CREAT | O_TRUNC
648#if defined(__WIN32__)
649 | O_BINARY
650#endif
651 );
652
653 if (fd) {
654 // lock the file
655 int lock_val = 1;
656 GSDL_LOCK_FILE (fd);
657 if (lock_val != 0) {
658 logout << "Error: Couldn't lock file " << cfgfilec << "\n";
659 close(fd);
660 message = "tmpfail";
661
662 } else {
663
664 outconvertclass text_t2ascii;
665 text_t2ascii.setinput(&args["cfgfile"]);
666 size_t buffersize=args["cfgfile"].size();
667 char *buffer=new char[buffersize];
668 buffer[0]='\n'; // just in case something goes wrong...
669 size_t num_chars;
670 convertclass::status_t status;
671 text_t2ascii.convert(buffer, buffersize, num_chars, status);
672 // ignore status - assume it is "finished" as buffer is big enough
673 write(fd, buffer, num_chars);
674 GSDL_UNLOCK_FILE (fd);
675 close(fd);
676 delete buffer;
677
678 // now that we've written the file we'll read it back again and
679 // update our bc1 arguments
680 ifstream cfg_in (cfgfilec);
681 if (cfg_in) {
682 text_tarray cfgline;
683 while (read_cfg_line(cfg_in, cfgline) >= 0) {
684 if (cfgline.size () >= 2) {
685 if (cfgline[0] == "creator") {
686 args["bc1contactemail"] = cfgline[1];
687 } else if (cfgline[0] == "collectionmeta") {
688 if (cfgline[1] == "collectionname") {
689 args["bc1fullname"] = cfgline[2];
690 } else if (cfgline[1] == "collectionextra") {
691 args["bc1aboutdesc"] = carriage_replace (cfgline[2], 1);
692 }
693 }
694 }
695 }
696 cfg_in.close();
697 } else {
698 logout << "collectoraction::update_cfgfile_complete: unable to open "
699 << cfgfilec << " for input\n";
700 message = "tmpfail";
701 }
702 }
703 } else {
704 logout << "collectoraction::update_cfgfile_complete: unable to open "
705 << cfgfilec << " for output\n";
706 message = "tmpfail";
707 }
708
709 delete cfgfilec;
710}
711
712void collectoraction::get_cgihead_info (cgiargsclass &/*args*/, recptprotolistclass * /*protos*/,
713 response_t &response,text_t &response_data,
714 ostream &/*logout*/) {
715 response = content;
716 response_data = "text/html";
717}
718
719// return html for buttons used in collector bar
720// color may be "green", "grey", or "yellow"
721// type may be:
722// "info" --> "collection information" button
723// "srce" --> "source data" button
724// "conf" --> "configure collection" button
725// "bild" --> "build collection" button
726// "view" --> "view collection" button
727// if enabled is true button will be flashy rollover type and
728// will be hyperlinked
729
730text_t collectoraction::get_button (const text_t &thispage, const text_t &color,
731 const text_t &type, bool enabled) {
732
733 if ((color != "green" && color != "grey" && color != "yellow") ||
734 (type != "info" && type != "srce" && type != "conf" && type != "bild" && type != "view"))
735 return "";
736
737 text_t prefix = "gc";
738 if (color == "grey") prefix = "nc";
739 else if (color == "yellow") prefix = "yc";
740
741 text_t httpicon = "httpicon" + prefix + type;
742
743 if (enabled) {
744 text_t gsmacro = "_gsimage_";
745 if (thispage == "info" || thispage == "srce" || thispage == "conf" ||
746 thispage == "bildcancel" || thispage == "bildfail") {
747 gsmacro = "_gsjimage_";
748 } else if (type == "view") {
749 // view button is special case as it needs a target=_top
750 gsmacro = "_gstimage_";
751 }
752 return "<td>" + gsmacro + "(_collector:http" + type + "_,_collector:" + httpicon +
753 "of_,_collector:" + httpicon + "on_," + type + ",_collector:text" + type + "_)</td>\n";
754 } else {
755 return "<td>_icon" + prefix + type + "of_</td>\n";
756 }
757}
758
759// set the _fullnamemenu_ macro (and _warnindex_ and _selectedindex_ if
760// we're on the "srce" page)
761void collectoraction::set_fullnamemenu (displayclass &disp, cgiargsclass &args,
762 recptprotolistclass *protos, ostream &logout) {
763
764 if (recpt == NULL) {
765 logout << "ERROR (collectoraction::set_fullnamemenu): This action does not contain\n"
766 << " information about any receptionists. The method set_receptionist was\n"
767 << " probably not called from the module which instantiated this action.\n";
768 return;
769 }
770
771 text_t &current_page = args["p"];
772 text_t currentname = args["bc1dirname"];
773 if (current_page == "srce") currentname = args["bc1clonecol"];
774
775 text_tarray dirnames;
776 text_tarray fullnames;
777 vector<bool> write_protected;
778 bool is_selected = false;
779 int selected_index = 0;
780 int index = 0;
781
782 recptprotolistclass::iterator rprotolist_here = protos->begin();
783 recptprotolistclass::iterator rprotolist_end = protos->end();
784 while (rprotolist_here != rprotolist_end) {
785 if ((*rprotolist_here).p != NULL) {
786
787 // don't include z39.50 collections
788 comerror_t err = noError;
789 if ((*rprotolist_here).p->get_protocol_name (err) == "z3950proto") {
790 rprotolist_here ++;
791 continue;
792 }
793
794 text_tarray collist;
795 (*rprotolist_here).p->get_collection_list (collist, err, logout);
796 if (err == noError) {
797 text_tarray::iterator collist_here = collist.begin();
798 text_tarray::iterator collist_end = collist.end();
799 FilterResponse_t response;
800 text_tset metadata;
801 metadata.insert ("collectionname");
802 while (collist_here != collist_end) {
803 ColInfoResponse_t *cinfo = recpt->get_collectinfo_ptr ((*rprotolist_here).p, *collist_here, logout);
804 if (cinfo != NULL) {
805 text_t collectionname = *collist_here;
806 if (!cinfo->collectionmeta["collectionname"].empty()) {
807 // get collection name from the collection cfg file
808 collectionname = cinfo->collectionmeta["collectionname"];
809 } else if (get_info ("collection", *collist_here, metadata, false,
810 (*rprotolist_here).p, response, logout)) {
811 // get collection name from gdbm file
812 collectionname = response.docInfo[0].metadata["collectionname"].values[0];
813 }
814 dirnames.push_back(*collist_here);
815 fullnames.push_back(collectionname);
816 // check to see if the collection is writable
817 if (collection_protected (*collist_here)) write_protected.push_back(true);
818 else write_protected.push_back(false);
819
820 if (*collist_here == currentname) {
821 is_selected = true;
822 selected_index = index;
823 }
824 index ++;
825 }
826 collist_here ++;
827 }
828 }
829 }
830 rprotolist_here ++;
831 }
832
833 bool first = true;
834 text_t warnindex;
835 text_t fullnamemenu = "<select name=\"bc1dirname\">\n";
836 if (current_page == "srce") {
837 fullnamemenu = "<select name=\"bc1clonecol\" onChange=\"menuchange();\">\n";
838 fullnamemenu += "<option value=defaultstructure";
839 if (!is_selected) fullnamemenu += " selected>";
840 else fullnamemenu.push_back('>');
841 fullnamemenu += "_collector:textdefaultstructure_\n";
842 }
843 for (int i = 0; i < index; i ++) {
844 // don't want write protected collections in list on "change existing
845 // collection" page
846 if (write_protected[i] && current_page == "existing") continue;
847 fullnamemenu += "<option value=\"" + dirnames[i] + "\"";
848 if ((i == 0 && !is_selected && current_page != "srce") ||
849 (is_selected && i == selected_index)) {
850 fullnamemenu += " selected";
851 selected_index++;
852 is_selected = false;
853 }
854 fullnamemenu.push_back ('>');
855 fullnamemenu += fullnames[i];
856 fullnamemenu.push_back ('\n');
857
858 // add to Warnindex if collection uses any dubious plugins
859 // (if creating clone collection list)
860 if (current_page == "srce") {
861 if (first) warnindex += "0,";
862 else warnindex.push_back(',');
863 if (uses_weird_plugin (dirnames[i])) {
864 warnindex += text_t (1);
865 } else {
866 warnindex += text_t (0);
867 }
868 }
869 first = false;
870 }
871 fullnamemenu += "</select>\n";
872
873 disp.setmacro ("fullnamemenu", "collector", fullnamemenu);
874 if (current_page == "srce") {
875 disp.setmacro ("warnindex", "collector", warnindex);
876 disp.setmacro ("selectedindex", "collector", text_t(selected_index));
877 }
878}
879
880// set _sourcelist_ and _badsources_ macros
881void collectoraction::set_inputsourceboxes (displayclass &disp, cgiargsclass &args,
882 ostream &logout) {
883
884 if (badsources) disp.setmacro ("badsources", "collector", "1");
885
886 text_t sourcelist = get_source_box(args["bc1input"], args["bc1inputnum"].getint(),
887 args["bc1inputtype"]);
888
889 disp.setmacro("sourcelist", "collector", sourcelist);
890
891 // reset badsources and failedsources variables
892 badsources = false;
893 failedsources.erase(failedsources.begin(), failedsources.end());
894}
895
896text_t collectoraction::get_source_box (text_t inputarglist, int numboxes,
897 text_t inputtypelist) {
898
899 text_tarray inputvalues;
900 splitchar (inputarglist.begin(), inputarglist.end(), ',', inputvalues);
901 // remove any empty values from the end of the array
902 if (inputvalues.size()) {
903 text_tarray::iterator l = inputvalues.end() - 1;
904 text_tarray::iterator b = inputvalues.begin();
905 while ((*l).empty() && l >= b) {
906 l--;
907 }
908 inputvalues.erase(l+1, inputvalues.end());
909 }
910
911 text_tarray inputtypes;
912 splitchar (inputtypelist.begin(), inputtypelist.end(), ',', inputtypes);
913
914 int numvalues = inputvalues.size();
915 int numtypes = inputtypes.size();
916
917 text_t last = "file://";
918 text_t rv;
919 for (int i = 0; i < numboxes; i++) {
920 rv += "<nobr><select name=\"bc1inputtype\">\n";
921 rv += "<option value=\"file://\"";
922 if ((i < numtypes && inputtypes[i] == "file://") ||
923 (numboxes == 3 && i == 0 && numvalues == 0) ||
924 (i >= 3 && i >= numvalues && last == "file://")) {
925 rv += " selected";
926 last = "file://";
927 }
928 rv += ">file://\n";
929 rv += "<option value=\"http://\"";
930 if ((i < numtypes && inputtypes[i] == "http://") ||
931 (numboxes == 3 && i == 1 && numvalues == 0) ||
932 (i >= 3 && i >= numvalues && last == "http://")) {
933 rv += " selected";
934 last = "http://";
935 }
936 rv += ">http://\n";
937 rv += "<option value=\"ftp://\"";
938 if ((i < numtypes && inputtypes[i] == "ftp://") ||
939 (numboxes == 3 && i == 2 && numvalues == 0) ||
940 (i >= 3 && i >= numvalues && last == "ftp://")) {
941 rv += " selected";
942 last = "ftp://";
943 }
944 rv += ">ftp://\n";
945 rv += "</select>\n";
946 rv += "<input type=text name=\"bc1input\" value=\"";
947 if (i < numvalues) {
948 rv += dm_safe(decode_commas(inputvalues[i]));
949 }
950 rv += "\" size=50>";
951 if (badsources) {
952 if ((i < numvalues) && (!inputvalues[i].empty())) {
953 if (failedsources[decode_commas(inputvalues[i])] == "1") {
954 rv += "_iconcross_";
955 } else {
956 rv += "_icontick_";
957 }
958 } else {
959 rv += "_iconblank_";
960 }
961 }
962 if (i+1 == numboxes) {
963 if (!badsources) rv += "_iconblank_";
964 rv += "_imagemore_</nobr><br>";
965 } else {
966 rv += "</nobr><br>\n";
967 }
968 }
969
970 return rv;
971}
972
973// set the _cfgfile_ macro
974void collectoraction::set_cfgfile (displayclass &disp, cgiargsclass &args, ostream &logout) {
975
976 text_t &collection = args["bc1dirname"];
977 if (collection.empty()) {
978 message = "nocollection";
979 return;
980 }
981
982 // read in collect.cfg
983 text_t cfgfile = filename_cat(get_collectdir(args), collection, "etc", "collect.cfg");
984 char *cfgfilec = cfgfile.getcstr();
985
986#ifdef GSDL_USE_IOS_H
987 ifstream cfg_ifs (cfgfilec, ios::in | ios::nocreate);
988#else
989 ifstream cfg_ifs (cfgfilec, ios::in);
990#endif
991
992 if (cfg_ifs) {
993 // read in collect.cfg
994 text_t cfgtext;
995 char c;
996 cfg_ifs.get(c);
997 while (!cfg_ifs.eof ()) {
998 cfgtext.push_back(c);
999 cfg_ifs.get(c);
1000 }
1001 cfg_ifs.close();
1002
1003 // define it as a macro
1004 disp.setmacro("cfgfile", "collector", dm_safe(cfgtext));
1005
1006 } else {
1007 logout << "collectoraction::set_cfgfile: couldn't open configuration file ("
1008 << cfgfilec << ") for reading\n";
1009 message = "tmpfail";
1010 }
1011 delete cfgfilec;
1012}
1013
1014// set the _statusline_ macro
1015void collectoraction::set_statusline (displayclass &disp, cgiargsclass &args, ostream & /*logout*/) {
1016
1017 // the build command creates .bld.download, .bld.import, and .bld.build files (in that
1018 // order) and deletes them (also in that order) when each stage is complete. the .bld
1019 // file is the concatenation of all these files.
1020 text_t bld_file = filename_cat (gsdlhome, "tmp", args["bc1tmp"], args["bc1dirname"] + ".bld");
1021 text_t statusline;
1022
1023 if (file_exists (bld_file + ".download")) {
1024 statusline = "_collector:textdownloadingfiles_<br>\n";
1025 statusline += dm_safe(file_tail(bld_file + ".download", 1, 0));
1026 } else if (file_exists (bld_file + ".import")) {
1027 statusline = "_collector:textimportingcollection_<br>\n";
1028 statusline += dm_safe(file_tail(bld_file + ".import", 1, 0));
1029 } else if (file_exists (bld_file + ".build")) {
1030 statusline = "_collector:textbuildingcollection_<br>\n";
1031 statusline += dm_safe(file_tail(bld_file + ".build", 1, 0));
1032 } else {
1033 statusline += "_collector:textcreatingcollection_<br>\n";
1034 statusline += dm_safe(file_tail(bld_file, 1, 0));
1035 }
1036
1037 disp.setmacro ("statusline", "collector", statusline);
1038
1039}
1040
1041void collectoraction::define_internal_macros (displayclass &disp, cgiargsclass &args,
1042 recptprotolistclass *protos, ostream &logout) {
1043
1044 // define_internal_macros sets the following macros:
1045 // _collectorbar_
1046 // _pagescriptextra_
1047 // _fullnamemenu_ -- if displaying the "source data" page or the "changing existing
1048 // collection" page
1049 // _cfgfile_ -- if displaying the "configure collection" page
1050 // _statusline_ -- if displaying the bildstatus page
1051 // _header_ -- may be set for pages that require it
1052 // _textfailmsg_ -- set to different messages depending on failcode returned
1053 // by build script (if build fails)
1054 // _faillog_ - set to last 6 lines of .bld file if build failed
1055 // _gsdlhome_ - the gsdlhome path (dm_safe)
1056 // _sourcelist_ -- "input source" text boxes
1057 // _badsources_ -- will be set to "1" if we've come from the
1058 // "source data" page and there's a problem
1059 // with the input sources
1060
1061 text_t &collector_page = args["p"];
1062 int esrce = args["bc1esrce"].getint();
1063 int econf = args["bc1econf"].getint();
1064
1065 // set _pagescriptextra_ macro to _cpagescriptextra_
1066 disp.setmacro ("pagescriptextra", "collector", "_" + collector_page + "scriptextra_");
1067
1068 if (collector_page == "bildstatus" || collector_page == "bilddone" ||
1069 collector_page == "bildfail" || collector_page == "bildframe1") {
1070 disp.setmacro ("header", "collector", "_" + collector_page + "header_");
1071 }
1072
1073 // set the collectorbar macro
1074 text_t collectorbar = "<table border=0 cellspacing=4 cellpadding=0><tr>\n";
1075
1076 if (collector_page == "new") {
1077 collectorbar += "<td>_icongreyarrow_</td>\n";
1078 collectorbar += get_button (collector_page, "green", "info", true);
1079 collectorbar += "<td>_icongreyarrow_</td>\n";
1080 collectorbar += get_button (collector_page, "grey", "srce", false);
1081 collectorbar += "<td>_icongreyarrow_</td>\n";
1082 collectorbar += get_button (collector_page, "grey", "conf", false);
1083 collectorbar += "<td>_icongreyarrow_</td>\n";
1084 collectorbar += get_button (collector_page, "grey", "bild", false);
1085 collectorbar += "<td>_icongreyarrow_</td>\n";
1086 collectorbar += get_button (collector_page, "grey", "view", false);
1087
1088 } else if (collector_page == "info") {
1089 collectorbar += "<td>_icongreyarrow_</td>\n";
1090 collectorbar += get_button (collector_page, "yellow", "info", false);
1091 collectorbar += "<td>_icongreyarrow_</td>\n";
1092 collectorbar += get_button (collector_page, "green", "srce", true);
1093 collectorbar += "<td>_icongreyarrow_</td>\n";
1094 collectorbar += get_button (collector_page, "grey", "conf", false);
1095 collectorbar += "<td>_icongreyarrow_</td>\n";
1096 collectorbar += get_button (collector_page, "grey", "bild", false);
1097 collectorbar += "<td>_icongreyarrow_</td>\n";
1098 collectorbar += get_button (collector_page, "grey", "view", false);
1099 collectorbar += "</tr><tr><td></td><td align=center>_icongreyuparrow_</td><td colspan=8></td>\n";
1100
1101 } else if (collector_page == "srce") {
1102 collectorbar += "<td>_icongreyarrow_</td>\n";
1103 if (esrce == 1) {
1104 // if we came from the "change an existing collection" page previous button(s)
1105 // are disabled
1106 collectorbar += get_button (collector_page, "grey", "info", false);
1107 } else {
1108 collectorbar += get_button (collector_page, "yellow", "info", true);
1109 }
1110 collectorbar += "<td>_icongreyarrow_</td>\n";
1111 collectorbar += get_button (collector_page, "yellow", "srce", false);
1112 collectorbar += "<td>_icongreyarrow_</td>\n";
1113 collectorbar += get_button (collector_page, "green", "conf", true);
1114 collectorbar += "<td>_icongreyarrow_</td>\n";
1115 collectorbar += get_button (collector_page, "green", "bild", true);
1116 collectorbar += "<td>_icongreyarrow_</td>\n";
1117 collectorbar += get_button (collector_page, "grey", "view", false);
1118 collectorbar += "</tr><tr><td colspan=3></td><td align=center>_icongreyuparrow_</td><td colspan=6></td>\n";
1119
1120 } else if (collector_page == "conf") {
1121 collectorbar += "<td>_icongreyarrow_</td>\n";
1122 // disable appropriate buttons if we came from "change an existing collection"
1123 // page
1124 if (esrce == 1 || econf == 1) {
1125 collectorbar += get_button (collector_page, "grey", "info", false);
1126 } else {
1127 collectorbar += get_button (collector_page, "yellow", "info", true);
1128 }
1129 collectorbar += "<td>_icongreyarrow_</td>\n";
1130 if (econf == 1) {
1131 collectorbar += get_button (collector_page, "grey", "srce", false);
1132 } else {
1133 collectorbar += get_button (collector_page, "yellow", "srce", true);
1134 }
1135 collectorbar += "<td>_icongreyarrow_</td>\n";
1136 collectorbar += get_button (collector_page, "yellow", "conf", false);
1137 collectorbar += "<td>_icongreyarrow_</td>\n";
1138 collectorbar += get_button (collector_page, "green", "bild", true);
1139 collectorbar += "<td>_icongreyarrow_</td>\n";
1140 collectorbar += get_button (collector_page, "grey", "view", false);
1141 collectorbar += "</tr><tr><td colspan=5></td><td align=center>_icongreyuparrow_</td><td colspan=4></td>\n";
1142
1143 } else if (collector_page == "bilddone") {
1144 collectorbar += "<td>_icongreyarrow_</td>\n";
1145 // all previous buttons grey after build was completed
1146 collectorbar += get_button (collector_page, "grey", "info", false);
1147 collectorbar += "<td>_icongreyarrow_</td>\n";
1148 collectorbar += get_button (collector_page, "grey", "srce", false);
1149 collectorbar += "<td>_icongreyarrow_</td>\n";
1150 collectorbar += get_button (collector_page, "grey", "conf", false);
1151 collectorbar += "<td>_icongreyarrow_</td>\n";
1152 collectorbar += get_button (collector_page, "yellow", "bild", false);
1153 collectorbar += "<td>_icongreyarrow_</td>\n";
1154 collectorbar += get_button (collector_page, "green", "view", true);
1155 collectorbar += "</tr><tr><td colspan=7></td><td align=center>_icongreyuparrow_</td><td colspan=2></td>\n";
1156
1157 } else if (collector_page == "bildcancel" || collector_page == "bildfail") {
1158 collectorbar += "<td>_icongreyarrow_</td>\n";
1159 // disable appropriate buttons if we came from "change an existing collection"
1160 // page
1161 if (esrce == 1 || econf == 1) {
1162 collectorbar += get_button (collector_page, "grey", "info", false);
1163 } else {
1164 collectorbar += get_button (collector_page, "yellow", "info", true);
1165 }
1166 collectorbar += "<td>_icongreyarrow_</td>\n";
1167 if (econf == 1) {
1168 collectorbar += get_button (collector_page, "grey", "srce", false);
1169 } else {
1170 collectorbar += get_button (collector_page, "yellow", "srce", true);
1171 }
1172 collectorbar += "<td>_icongreyarrow_</td>\n";
1173 collectorbar += get_button (collector_page, "yellow", "conf", true);
1174 collectorbar += "<td>_icongreyarrow_</td>\n";
1175 collectorbar += get_button (collector_page, "yellow", "bild", true);
1176 collectorbar += "<td>_icongreyarrow_</td>\n";
1177 collectorbar += get_button (collector_page, "grey", "view", false);
1178 }
1179
1180 collectorbar += "</tr></table>\n";
1181 disp.setmacro ("collectorbar", "collector", collectorbar);
1182
1183 if (collector_page == "bildfail") {
1184
1185 text_t textfailmsg = "_textfailmsg";
1186 textfailmsg.push_back(failcode);
1187 textfailmsg.push_back('_');
1188 disp.setmacro("textfailmsg", "collector", textfailmsg);
1189
1190 text_t bldlog = filename_cat(gsdlhome, "tmp", args["bc1tmp"], args["bc1dirname"] + ".bld");
1191 text_t rawlog = file_tail (bldlog, 6, 0);
1192 // we'll shove in some <br> tags where \n's occur
1193 text_t faillog;
1194 text_t::const_iterator here = rawlog.begin();
1195 text_t::const_iterator end = rawlog.end();
1196 while (here != end) {
1197 if (*here == '\n') faillog += "<br>";
1198 faillog.push_back (*here);
1199 here ++;
1200 }
1201 disp.setmacro ("faillog", "collector", dm_safe(faillog));
1202 }
1203
1204 if (collector_page == "srce" || collector_page == "existing")
1205 set_fullnamemenu (disp, args, protos, logout);
1206 if (collector_page == "conf")
1207 set_cfgfile (disp, args, logout);
1208 if (collector_page == "bildstatus")
1209 set_statusline (disp, args, logout);
1210 if (collector_page == "srce") {
1211 set_inputsourceboxes (disp, args, logout);
1212 }
1213
1214 disp.setmacro ("gsdlhome", "collector", dm_safe(gsdlhome));
1215}
1216
1217bool collectoraction::do_action (cgiargsclass &args, recptprotolistclass * /*protos*/,
1218 browsermapclass * /*browsers*/, displayclass &disp,
1219 outconvertclass &outconvert, ostream &textout,
1220 ostream &logout) {
1221
1222 // make sure the collector is enabled
1223 if (disabled) {
1224 textout << outconvert
1225 << "<html>\n"
1226 << "<head>\n"
1227 << "<title>Collector disabled</title>\n"
1228 << "</head>\n"
1229 << "<body bgcolor=\"#ffffff\" text=\"#000000\" link=\"#006666\" "
1230 << "alink=\"#cc9900\" vlink=\"#666633\">\n"
1231 << "<h2>Facility disabled</h2>\n"
1232 << "Sorry, the Collector end-user collection building facility is currently disabled\n"
1233 << "\n</body>\n"
1234 << "</html>\n";
1235 return true;
1236 }
1237
1238 text_t &collector_page = args["p"];
1239 text_t &collection = args["bc1dirname"];
1240
1241 // make sure we have perl (we won't bother with this check for the
1242 // building status pages to avoid slowing things down unneccessarily)
1243 if (collector_page != "bildstatus" && collector_page != "bildframe1" && !perl_ok(logout)) {
1244 textout << outconvert
1245 << "<html>\n"
1246 << "<head>\n"
1247 << "<title>Perl not found</title>\n"
1248 << "</head>\n"
1249 << "<body bgcolor=\"#ffffff\" text=\"#000000\" link=\"#006666\" "
1250 << "alink=\"#cc9900\" vlink=\"#666633\">\n"
1251 << "<h2>Perl not found</h2>\n"
1252 << "Greenstone could not detect perl on this system. It is therefore not\n"
1253 << "possible to build a Greenstone collection, either from the Collector or the \n"
1254 << "command-line tools, or to use the Collector for any other task.\n"
1255 << "<p>Please refer to the Greenstone Installer's Guide for details on\n"
1256 << "installing perl on your system.\n"
1257 << "\n</body>\n"
1258 << "</html>\n";
1259 return true;
1260
1261 }
1262
1263 if (collector_page == "bild") {
1264 // do the work (download, import, build)
1265 gsdl_build (args, logout);
1266
1267 if (message.empty()) {
1268 // bild page is a frameset so we don't want headers and stuff
1269 textout << outconvert << disp << ("_collector:bildcontent_\n");
1270 }
1271 }
1272
1273 if (do_mkcol == true) {
1274 // execute mkcol.pl (do_mkcol is set from within check_cgiargs)
1275 gsdl_mkcol (args, logout);
1276 do_mkcol = false; // reset for fast-cgi
1277 }
1278
1279 if (args["bc1dodelete"] == "1") {
1280 // delete bcidirname collection
1281 if (collection_protected (collection)) {
1282 message = "delinvalid";
1283
1284 } else {
1285
1286 const recptconf &rcinfo = recpt->get_configinfo ();
1287 bool emailuserevents = rcinfo.EmailUserEvents;
1288
1289 // get collection maintainer email from collect.cfg before we
1290 // delete it
1291 text_t colmaintainer;
1292 text_t cfgfile = filename_cat(gsdlhome, "collect", collection, "etc", "collect.cfg");
1293 char *cfgfilec = cfgfile.getcstr();
1294 ifstream cfg_in (cfgfilec);
1295 delete cfgfilec;
1296 if (cfg_in) {
1297 text_tarray cfgline;
1298 while (read_cfg_line(cfg_in, cfgline) >= 0) {
1299 if (cfgline.size () == 2 && cfgline[0] == "maintainer") {
1300 colmaintainer = cfgline[1];
1301 break;
1302 }
1303 }
1304 cfg_in.close();
1305 }
1306 if (colmaintainer.empty()) {
1307 logout << outconvert
1308 << "collectoraction::do_action WARNING: Collection being deleted ("
1309 << collection << ") has no maintainer address. EmailUserEvents "
1310 << "disabled\n";
1311 emailuserevents = false;
1312 }
1313
1314 // first we need to free up the collection's collection server
1315 // we must do this for the local library (and I guess when using
1316 // fastcgi too) as you can't delete the gdbm file while it's
1317 // being kept open by the collection server
1318 remove_colservr (collection, logout);
1319
1320 text_t delete_cmd = "perl -S delcol.pl -f " + collection;
1321 int rv = gsdl_system (delete_cmd, true, logout);
1322 if (rv != 0) {
1323 // deletion failed -- permissions?
1324 message = "delpermission";
1325 } else {
1326 message = "delsuccess";
1327 }
1328
1329 // log the event
1330 if (rcinfo.LogEvents == CollectorEvents || rcinfo.LogEvents == AllEvents) {
1331
1332 text_t eventlog = filename_cat (gsdlhome, "etc", "events.txt");
1333 char *eventlogt = eventlog.getcstr();
1334 ofstream eventl (eventlogt, ios::app);
1335 delete eventlogt;
1336
1337 if (eventl) {
1338 eventl << outconvert << "[Collector Event]\n"
1339 << "Date: " << get_date (true) << "\n"
1340 << "Greenstone Username: " << args["un"] << "\n"
1341 << "Collection: " << collection << "\n"
1342 << "Collection Maintainer: " << colmaintainer << "\n"
1343 << "GSDLHOME: " << gsdlhome << "\n";
1344
1345 if (message == "delsuccess") {
1346 eventl << outconvert
1347 << "The " << collection << " collection was successfully deleted\n\n";
1348 } else {
1349 eventl << outconvert
1350 << "Attempt to delete the " << collection << " collection failed\n\n";
1351 }
1352 eventl.close();
1353
1354 } else {
1355 logout << outconvert << "collectoraction::do_action ERROR: Couldn't open "
1356 << "event log file " << eventlog << " for appending during collection "
1357 << "deletion. LogEvents disabled\n";
1358 }
1359 }
1360
1361 if (rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents || emailuserevents) {
1362 // use sendmail.pl perl script to send email events
1363 text_t tmpmailfile = filename_cat (gsdlhome, "tmp", args["bc1tmp"], "event.txt");
1364 char *tmpmailfilec = tmpmailfile.getcstr();
1365 ofstream tmpfile (tmpmailfilec);
1366 delete tmpmailfilec;
1367 if (tmpfile) {
1368 tmpfile << outconvert << "[Collector Event]\n"
1369 << "Date: " << get_date (true) << "\n"
1370 << "Greenstone Username: " << args["un"] << "\n"
1371 << "Collection: " << collection << "\n"
1372 << "Collection Maintainer: " << colmaintainer << "\n"
1373 << "GSDLHOME: " << gsdlhome << "\n";
1374 if (message == "delsuccess") {
1375 tmpfile << outconvert
1376 << "The " << collection << " collection was successfully deleted\n\n";
1377 } else {
1378 tmpfile << outconvert
1379 << "Attempt to delete the " << collection << " collection failed\n\n";
1380 }
1381 tmpfile.close();
1382 text_t to;
1383 if (rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents) to += rcinfo.maintainer;
1384 if (emailuserevents) {
1385 if (!to.empty()) to.push_back (',');
1386 to += colmaintainer;
1387 }
1388 text_t sendmail_cmd = "perl -S sendmail.pl -to \"" + to + "\" -from \"" + rcinfo.maintainer;
1389 sendmail_cmd += "\" -smtp \"" + rcinfo.MailServer + "\" -subject \"Greenstone Collector Event\"";
1390 sendmail_cmd += " -msgfile \"" + tmpmailfile + "\"";
1391
1392 gsdl_system (sendmail_cmd, false, logout);
1393
1394 } else {
1395 logout << outconvert << "collectoraction::do_action ERROR: Couldn't open "
1396 << "temporary event log file " << tmpmailfile << " during collection "
1397 << "deletion. EmailEvents and EmailUserEvents disabled\n";
1398 }
1399 }
1400 }
1401 }
1402
1403 if (collector_page == "bildcancel" || collector_page == "bildfail") {
1404 // cancel the build (we'll also use the cancel_build script to tidy
1405 // up if the build failed)
1406 gsdl_cancel_build (args, logout);
1407 }
1408
1409 if (collector_page == "expt") {
1410
1411 // export the collection - we'll do a synchronous system call to
1412 // exportcol.pl as that's the easiest way to do it. if it becomes a
1413 // problem that it's taking too long to export a large collection then
1414 // we may have to revisit this.
1415 text_t tmpfile = filename_cat (gsdlhome, "tmp", collection + "_export.txt");
1416 text_t export_cmd = "perl -S exportcol.pl -out \"" + tmpfile + "\" " + collection;
1417 gsdl_system (export_cmd, true, logout);
1418 if (file_exists (tmpfile)) {
1419 text_t returnline = file_tail (tmpfile, 1, 0);
1420 if (returnline.size() > 23 && (substr(returnline.begin(), returnline.begin()+23) == "exportcol.pl succeeded:")) {
1421 // success
1422 message = "exptsuccess";
1423 } else {
1424 message = "exptfail";
1425 }
1426 } else {
1427 message = "exptfail";
1428 }
1429 }
1430
1431 if (message.empty()) {
1432 if (collector_page != "bild") {
1433 // output page ("bild" page was already output above)
1434 textout << outconvert << disp << ("_collector:header_\n")
1435 << ("_collector:" + collector_page + "content_\n")
1436 << ("_collector:footer_\n");
1437 }
1438 } else {
1439 // message was set somewhere (probably an error), output message page
1440 textout << outconvert << disp << ("_collector:header_\n")
1441 << ("_collector:" + message + "content_\n")
1442 << ("_collector:footer_\n");
1443 message.clear();
1444 }
1445 return true;
1446}
1447
1448// if sw = 0 replace all carriage returns in intext with the string "\n"
1449// else replace all occurances of "\n" with a carriage return
1450text_t collectoraction::carriage_replace (const text_t &intext, int sw) {
1451
1452 text_t outtext;
1453 text_t::const_iterator here = intext.begin();
1454 text_t::const_iterator end = intext.end();
1455 while (here != end) {
1456 if (sw == 0) {
1457 if (*here == '\n') {
1458 if ((here+1) != end && *(here+1) == '\r') here ++;
1459 outtext += "\\n";
1460 } else if (*here == '\r') {
1461 if ((here+1) != end && *(here+1) == '\n') here ++;
1462 outtext += "\\n";
1463 } else {
1464 outtext.push_back (*here);
1465 }
1466 } else if (*here == '\\' && (here+1) != end && *(here+1) == 'n') {
1467 outtext.push_back ('\n');
1468 here ++;
1469 } else {
1470 outtext.push_back (*here);
1471 }
1472 here ++;
1473 }
1474 return outtext;
1475}
1476
1477// create a short directory name from fullname
1478text_t collectoraction::get_directory_name (const text_t &fullname) {
1479
1480 text_t shortname;
1481 if (fullname.empty()) {
1482 shortname = "coll";
1483
1484 } else {
1485
1486 // first make all lowercase and remove any dodgy characters
1487 // (i.e. anything not [a-z]
1488 text_t::const_iterator here = fullname.begin();
1489 text_t::const_iterator end = fullname.end();
1490 while (here != end) {
1491 if ((*here >= 'A' && *here <= 'Z') || (*here >= 'a' && *here <= 'z') ||
1492 (*here == ' ')) {
1493 if (*here >= 'A' && *here <= 'Z') shortname.push_back (*here+32);
1494 else if (*here == ' ') {
1495 while ((*(here+1)) == ' ') here ++;
1496 shortname.push_back (*here);
1497 } else shortname.push_back (*here);
1498 }
1499 here ++;
1500 }
1501
1502 text_tarray words;
1503 splitchar (shortname.begin(), shortname.end(), ' ', words);
1504 int num_words = words.size();
1505
1506 if (num_words == 0) {
1507 shortname = "coll";
1508
1509 } else {
1510
1511 shortname.clear();
1512 int use_words = (num_words <= 6) ? num_words : 6;
1513 unsigned int substr_len = 6 / use_words;
1514
1515 for (int i = 0; i < use_words; i++) {
1516 if (words[i].size() < substr_len) shortname += words[i];
1517 else shortname += substr (words[i].begin(), words[i].begin()+substr_len);
1518 }
1519 }
1520 }
1521
1522 // check to see if shortname is unique
1523 text_t fulldirname = filename_cat (gsdlhome, "collect", shortname);
1524 if (directory_exists (fulldirname)) {
1525 int version = 0;
1526 text_t newname;
1527 do {
1528 version ++;
1529 newname = shortname;
1530 newname.push_back ('v');
1531 newname.appendint (version);
1532 fulldirname = filename_cat (gsdlhome, "collect", newname);
1533 } while (directory_exists (fulldirname));
1534
1535 shortname = newname;
1536 }
1537
1538 return shortname;
1539}
1540
1541// tests if collection is write protected (currently just checks if
1542// collect.cfg file is writable
1543bool collectoraction::collection_protected (const text_t &collection) {
1544 text_t cfgfile = filename_cat(gsdlhome, "collect", collection, "etc", "collect.cfg");
1545 if (file_writable(cfgfile)) return false;
1546 return true;
1547}
1548
1549// assigns a temporary directory name for this collector session
1550// and creates temporary directory
1551// returns false if it couldn't create the directory
1552bool collectoraction::assign_tmpname (cgiargsclass &args, ostream &logout) {
1553
1554 int i = 0;
1555 text_t tmpname = "tbuild";
1556 while (directory_exists (filename_cat (gsdlhome, "tmp", tmpname + text_t(i)))) {
1557 i++;
1558 }
1559 tmpname.appendint (i);
1560
1561 text_t fulltmpdir = filename_cat (gsdlhome, "tmp", tmpname);
1562 if (!mk_dir (fulltmpdir)) {
1563 outconvertclass text_t2ascii;
1564 logout << text_t2ascii << "collectoraction::assign_tmpname unable to create directory ("
1565 << fulltmpdir << ")\n";
1566 return false;
1567 }
1568
1569 args["bc1tmp"] = tmpname;
1570 return true;
1571}
1572
1573void collectoraction::gsdl_mkcol (cgiargsclass &args, ostream &logout) {
1574
1575 text_t tmpdir = filename_cat (gsdlhome, "tmp", args["bc1tmp"]);
1576 if (!directory_exists (tmpdir)) {
1577 message = "tmpfail";
1578 return;
1579 }
1580
1581 text_t &collection = args["bc1dirname"];
1582 if (collection.empty()) {
1583 message = "nocollection";
1584 return;
1585 }
1586
1587 // check for a .create file - if it exists then we've already created the collection
1588 text_t createfile = filename_cat (tmpdir, ".create");
1589 if (file_exists (createfile)) {
1590 return;
1591 }
1592
1593 // set up options
1594 text_t options = "-quiet -creator \"" + args["bc1contactemail"] + "\"";
1595 options += " -title \"" + args["bc1fullname"] + "\"";
1596 options += " -about \"" + carriage_replace (args["bc1aboutdesc"] + "_collectorextra_", 0) + "\"";
1597 options += " -collectdir \"" + remove_trailing_slashes(tmpdir) + "\" ";
1598
1599 text_t optionfile = filename_cat (tmpdir, "mkcol.opt");
1600 char *optionfilec = optionfile.getcstr();
1601 ofstream ofile_out (optionfilec);
1602 delete optionfilec;
1603 if (!ofile_out) {
1604 message = "tmpfail";
1605 return;
1606 }
1607 outconvertclass text_t2ascii;
1608 ofile_out << text_t2ascii << options << "\n";
1609 ofile_out.close();
1610
1611 // run mkcol.pl
1612 text_t mkcol_cmd = "perl -S mkcol.pl -optionfile \"" + optionfile;
1613 mkcol_cmd += "\" " + collection;
1614 gsdl_system (mkcol_cmd, true, logout);
1615
1616 // make sure it went ok
1617 text_t cfgfile = filename_cat (tmpdir, collection, "etc", "collect.cfg");
1618 if (!file_writable (cfgfile)) {
1619 message = "mkcolfail";
1620 } else {
1621 // create the .create file (this file is just a place holder to let any future
1622 // pages know that the collection already exists).
1623 char *createfilec = createfile.getcstr();
1624 ofstream cfile_out (createfilec);
1625 delete createfilec;
1626 if (cfile_out) {
1627 cfile_out << "collection created\n";
1628 cfile_out.close();
1629 } else {
1630 message = "tmpfail";
1631 return;
1632 }
1633 }
1634}
1635
1636void collectoraction::gsdl_build (cgiargsclass &args, ostream &logout) {
1637
1638 outconvertclass text_t2ascii;
1639
1640 text_t tmpdir = filename_cat (gsdlhome, "tmp", args["bc1tmp"]);
1641 if (!directory_exists (tmpdir)) {
1642 message = "tmpfail";
1643 return;
1644 }
1645
1646 text_t &collection = args["bc1dirname"];
1647 if (collection.empty()) {
1648 message = "nocollection";
1649 return;
1650 }
1651
1652 // check for a .build file - if it exists then we've already built
1653 // the collection (or are in the process of building it)
1654 text_t buildfile = filename_cat (tmpdir, ".build");
1655 if (file_exists (buildfile)) {
1656 return;
1657 } else {
1658 // create the .build file (this file is just a place holder to let any future
1659 // pages know that we've already been here)
1660 char *buildfilec = buildfile.getcstr();
1661 ofstream bfile_out (buildfilec);
1662 delete buildfilec;
1663 if (bfile_out) {
1664 bfile_out << "collection building\n";
1665 bfile_out.close();
1666 } else {
1667 message = "tmpfail";
1668 return;
1669 }
1670 }
1671
1672 const recptconf &rcinfo = recpt->get_configinfo ();
1673
1674 // create the event header file if LogEvents, EmailEvents or
1675 // EmailUserEvents options are turned on.
1676 bool logevents =
1677 (rcinfo.LogEvents == CollectorEvents || rcinfo.LogEvents == AllEvents ||
1678 rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents ||
1679 rcinfo.EmailUserEvents);
1680 text_t ehead_file = filename_cat (tmpdir, "ehead.txt");
1681 if (logevents) {
1682 if (!create_event_header_file (ehead_file, args, logout)) {
1683 logevents = false;
1684 }
1685 }
1686
1687 text_t collectdir = get_collectdir (args);
1688
1689 // set up build options
1690 text_t options = "-make_writable -remove_import -out \"";
1691 options += filename_cat (tmpdir, collection + ".bld");
1692 options += "\" -collectdir \"" + collectdir + "\" -statsfile \"";
1693 options += filename_cat(collectdir, collection, "etc", "import.log") + "\"";
1694
1695 if (args["bc1esrce"] == 1) {
1696 // we're adding data to an existing collection
1697 options += " -save_archives -append";
1698 }
1699
1700 text_tarray inputvalues, inputtypes;
1701 splitchar (args["bc1input"].begin(), args["bc1input"].end(), ',', inputvalues);
1702 splitchar (args["bc1inputtype"].begin(), args["bc1inputtype"].end(), ',', inputtypes);
1703 int numvalues = inputvalues.size();
1704 int numtypes = inputtypes.size();
1705 for (int i = 0; i < numvalues; i++) {
1706 if (!inputvalues[i].empty()) {
1707 text_t type = "file://"; // default
1708 if (i < numtypes) type = inputtypes[i];
1709 options += " -download \"" +
1710 remove_trailing_slashes(type + format_url(decode_commas(inputvalues[i]))) + "\"";
1711 }
1712 }
1713
1714 if (logevents) {
1715 if (rcinfo.LogEvents == CollectorEvents || rcinfo.LogEvents == AllEvents)
1716 options += " -log_events";
1717 if (rcinfo.EmailEvents == CollectorEvents || rcinfo.EmailEvents == AllEvents) {
1718 options += " -mail_server " + rcinfo.MailServer;
1719 options += " -email_events " + rcinfo.maintainer;
1720 if (rcinfo.EmailUserEvents) options += "," + args["bc1contactemail"];
1721 } else if (rcinfo.EmailUserEvents) {
1722 options += " -mail_server " + rcinfo.MailServer;
1723 options += " -email_events " + args["bc1contactemail"];
1724 }
1725 options += " -event_header " + ehead_file;
1726 }
1727
1728 text_t optionfile = filename_cat (tmpdir, "build.opt");
1729 char *optionfilec = optionfile.getcstr();
1730 ofstream ofile_out (optionfilec);
1731 delete optionfilec;
1732 if (!ofile_out) {
1733 message = "tmpfail";
1734 return;
1735 }
1736 ofile_out << text_t2ascii << options << "\n";
1737 ofile_out.close();
1738
1739 // if we're altering an existing collection we need to kill off
1740 // the existing collection server - we do this for the local library
1741 // (and any other persistent version of the library) as the existing
1742 // gdbm file can't be deleted while the collection server holds it open
1743 if ((args["bc1econf"] == 1) || (args["bc1esrce"] == 1)) {
1744 remove_colservr (collection, logout);
1745 }
1746
1747 // set up the build command - build.bat has some issues with quoting
1748 // on win2k when gsdlhome contains spaces so we'll avoid using
1749 // "perl -S" here in favor of calling the "build" perl script explicitly
1750 text_t build_cmd = "perl \"" + filename_cat (gsdlhome, "bin", "script", "build");
1751 build_cmd += "\" -optionfile \"" + optionfile + "\" " + collection;
1752 // run build command in background (i.e. asynchronously)
1753 gsdl_system (build_cmd, false, logout);
1754}
1755
1756void collectoraction::gsdl_cancel_build (cgiargsclass &args, ostream &logout) {
1757 // I really wanted to do what this perl script does from within the library
1758 // c++ code. I ran into some problems though (like how do you write a portable
1759 // "rm -r" in c++?). One day I'll spend some time sorting it out ... maybe.
1760 text_t cancel_cmd = "perl -S cancel_build.pl -collectdir \"";
1761 cancel_cmd += filename_cat (gsdlhome, "tmp", args["bc1tmp"]) + "\" ";
1762 cancel_cmd += args["bc1dirname"];
1763 // To be on the safe side we'll make this a synchronous call
1764 // so that all tidying up is done before the user has a chance
1765 // to do anything else (like start rebuilding their collection).
1766 // This means that for a big collection where there's lots of
1767 // stuff to delete etc. it might take a while before the "build
1768 // cancelled" page appears.
1769 gsdl_system (cancel_cmd, true, logout);
1770}
1771
1772text_t collectoraction::get_collectdir (cgiargsclass &args) {
1773
1774 if ((args["bc1econf"] == 1) || (args["bc1esrce"] == 1)) {
1775 // we're adding to a collection in place
1776 return filename_cat(gsdlhome, "collect");
1777
1778 } else {
1779 return filename_cat (gsdlhome, "tmp", args["bc1tmp"]);
1780 }
1781}
1782
1783// checks to see if any of the plugins in pluginset occur in
1784// collections configuration file
1785bool collectoraction::uses_weird_plugin (const text_t &collection) {
1786
1787 text_tset pluginset;
1788 pluginset.insert ("HBPlug");
1789
1790 text_t cfgfile_content;
1791 text_t cfgfile_name = filename_cat (gsdlhome, "collect", collection, "etc", "collect.cfg");
1792 text_t pluginstr, pluginname;
1793
1794 if (read_file (cfgfile_name, cfgfile_content)) {
1795 text_t::const_iterator here = cfgfile_content.begin();
1796 text_t::const_iterator end = cfgfile_content.end();
1797 while (here != end) {
1798 here = findchar (here, end, 'p');
1799 if (here == end) break;
1800 if ((here+6 < end) && (substr (here, here+6) == "plugin")) {
1801 getdelimitstr (here+6, end, '\n', pluginstr);
1802 text_t::const_iterator hp = pluginstr.begin();
1803 text_t::const_iterator ep = pluginstr.end();
1804 bool found = false;
1805 // remove any leading whitespace, trailing options etc.
1806 while (hp != ep) {
1807 if (*hp == '\t' || *hp == ' ' || *hp == '\n') {
1808 if (found) break;
1809 } else {
1810 pluginname.push_back (*hp);
1811 found = true;
1812 }
1813 hp ++;
1814 }
1815 text_tset::const_iterator it = pluginset.find (pluginname);
1816 if (it != pluginset.end()) return true; // found matching plugin
1817 pluginname.clear();
1818 }
1819 here ++;
1820 }
1821 }
1822 return false;
1823}
1824
1825// create and initialize a new collection server and
1826// add it to the null protocol.
1827void collectoraction::create_colserver (const text_t &collection, ostream &logout) {
1828
1829 recptprotolistclass *protos = recpt->get_recptprotolist_ptr();
1830 recptprotolistclass::iterator rprotolist_here = protos->begin();
1831 recptprotolistclass::iterator rprotolist_end = protos->end();
1832 while (rprotolist_here != rprotolist_end) {
1833 comerror_t err = noError;
1834 if ((*rprotolist_here).p != NULL) {
1835 if ((*rprotolist_here).p->get_protocol_name (err) == "nullproto") {
1836 // create collection server and add it to nullproto
1837 (*rprotolist_here).p->add_collection (collection, recpt, gsdlhome, gsdlhome);
1838 // make sure gsdlhome is configured
1839 text_tarray tmp;
1840 tmp.push_back (gsdlhome);
1841 (*rprotolist_here).p->configure ("gsdlhome", tmp, err);
1842 // re-initialize the null protocol
1843 if (!(*rprotolist_here).p->init (err, logout)) {
1844 logout << "collectoraction::create_colserver: nullproto init failed\n";
1845 }
1846 return;
1847 }
1848 }
1849 rprotolist_here ++;
1850 }
1851
1852 logout << "collectoraction::create_colserver: no valid nullproto found\n";
1853}
1854
1855// delete a collection server from the null protocol
1856void collectoraction::remove_colservr (const text_t &collection, ostream &logout) {
1857
1858 recpt->uncache_collection (collection);
1859
1860 recptprotolistclass *protos = recpt->get_recptprotolist_ptr();
1861 recptprotolistclass::iterator rprotolist_here = protos->begin();
1862 recptprotolistclass::iterator rprotolist_end = protos->end();
1863 while (rprotolist_here != rprotolist_end) {
1864 comerror_t err = noError;
1865 if ((*rprotolist_here).p != NULL) {
1866 if ((*rprotolist_here).p->get_protocol_name (err) == "nullproto") {
1867 (*rprotolist_here).p->remove_collection (collection, logout);
1868 return;
1869 }
1870 }
1871 rprotolist_here ++;
1872 }
1873
1874 logout << "collectoraction::create_colserver: no valid nullproto found\n";
1875}
1876
1877bool collectoraction::create_event_header_file (const text_t &filename, cgiargsclass &args,
1878 ostream &logout) {
1879
1880 outconvertclass text_t2ascii;
1881 char *filenamec = filename.getcstr();
1882 ofstream eheadfile (filenamec);
1883 delete filenamec;
1884
1885 if (eheadfile) {
1886 eheadfile << text_t2ascii << get_event_header (args);
1887 eheadfile.close();
1888 return true;
1889 }
1890
1891 logout << text_t2ascii << "collectoraction::create_event_header ERROR: Couldn't create "
1892 << "Event Header file " << filename << ". Event logging disabled\n";
1893 return false;
1894}
1895
1896text_t collectoraction::get_event_header (cgiargsclass &args) {
1897 text_t header = "Greenstone Username: " + args["un"] + "\n";
1898 header += "Collection: " + args["bc1dirname"] + "\n";
1899 header += "Collection Creator: " + args["bc1contactemail"] + "\n";
1900 header += "GSDLHOME: " + gsdlhome + "\n";
1901 header += "Build Location: " + get_collectdir(args) + "\n";
1902
1903 return header;
1904}
1905
1906bool collectoraction::check_sources (cgiargsclass &args, ostream &logout) {
1907
1908 bool found = false;
1909
1910 text_tarray inputvalues;
1911 splitchar (args["bc1input"].begin(), args["bc1input"].end(), ',', inputvalues);
1912
1913 text_tarray inputtypes;
1914 splitchar (args["bc1inputtype"].begin(), args["bc1inputtype"].end(), ',', inputtypes);
1915
1916 int numvalues = inputvalues.size();
1917 int numtypes = inputtypes.size();
1918
1919 for (int i = 0; i < numvalues; i++) {
1920 text_t value = format_url(decode_commas(inputvalues[i]));
1921 text_t type = "file://"; // default
1922 if (!value.empty()) {
1923 found = true;
1924 if (i >= numtypes || inputtypes[i].empty()) {
1925 logout << "collectoraction::check_sources: WARNING type not set\n";
1926 } else {
1927 type = inputtypes[i];
1928 }
1929 if (type == "file://") {
1930 if (!file_exists(value) && !directory_exists(value)) {
1931 failedsources[decode_commas(inputvalues[i])] = "1";
1932 badsources = true;
1933 }
1934 } else if (type == "http://") {
1935 if (gsdl_system ("perl -S ping.pl -quiet http://" + value, true, logout)) {
1936 failedsources[decode_commas(inputvalues[i])] = "1";
1937 badsources = true;
1938 }
1939 } else if (type == "ftp://") {
1940 if (gsdl_system ("perl -S ping.pl -quiet ftp://" + value, true, logout)) {
1941 failedsources[decode_commas(inputvalues[i])] = "1";
1942 badsources = true;
1943 }
1944 }
1945 }
1946 }
1947
1948 // set badsources if there weren't any sources at all
1949 if (!found) badsources = true;
1950
1951 if (badsources) return false;
1952 return true;
1953}
1954
1955// format_url simply strips "http://", "ftp://", or "file://" off the
1956// beginning of url if they're there
1957text_t collectoraction::format_url (const text_t &url) {
1958 text_t::const_iterator begin = url.begin();
1959 text_t::const_iterator end = url.end();
1960
1961 if (url.size() >= 7) {
1962 text_t prefix = substr(begin, begin+7);
1963 if (prefix == "http://" || prefix == "file://") {
1964 return substr(begin+7, end);
1965 }
1966 }
1967 if (url.size() >= 6) {
1968 if (substr(begin, begin+6) == "ftp://") {
1969 return substr(begin+6, end);
1970 }
1971 }
1972 return url;
1973}
1974
1975text_t collectoraction::remove_trailing_slashes (text_t str) {
1976
1977 while (*(str.end()-1) == '\\') {
1978 str.pop_back();
1979 }
1980 return str;
1981}
1982
1983#endif //GSDL_USE_COLLECTOR_ACTION
Note: See TracBrowser for help on using the repository browser.