source: trunk/gsdl/bin/script/buildcol.pl@ 6945

Last change on this file since 6945 was 6945, checked in by mdewsnip, 20 years ago

Updated the resource bundle handling code some more. Strings are first looked for in a language specific resource bundle (if specified). If not found there, the default resource bundle is checked. If still not found, the English resource bundle is checked. These resource bundles are loaded on an as-needed basis.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 17.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# 11/04/03 Added usage datastructure - John Thompson
29
30package buildcol;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
39}
40
41use colcfg;
42use parsargv;
43use util;
44use FileHandle;
45use gsprintf;
46use printusage;
47
48my $mode_list =
49 [ { 'name' => "all",
50 'desc' => "{buildcol.mode.all}" },
51 { 'name' => "compress_text",
52 'desc' => "{buildcol.mode.compress_text}" },
53 { 'name' => "build_index",
54 'desc' => "{buildcol.mode.build_index}" },
55 { 'name' => "infodb",
56 'desc' => "{buildcol.mode.infodb}" } ];
57
58my $arguments =
59 [ { 'name' => "allclassifications",
60 'desc' => "{buildcol.allclassifications}",
61 'type' => "flag",
62 'reqd' => "no",
63 'modegli' => "2" },
64 { 'name' => "archivedir",
65 'desc' => "{buildcol.archivedir}",
66 'type' => "string",
67 'reqd' => "no",
68 'hiddengli' => "yes" },
69 { 'name' => "builddir",
70 'desc' => "{buildcol.builddir}",
71 'type' => "string",
72 'reqd' => "no",
73 'hiddengli' => "yes" },
74# { 'name' => "cachedir",
75# 'desc' => "{buildcol.cachedir}",
76# 'type' => "string",
77# 'reqd' => "no" },
78 { 'name' => "collectdir",
79 'desc' => "{buildcol.collectdir}",
80 'type' => "string",
81 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
82 'reqd' => "no",
83 'hiddengli' => "yes" },
84 { 'name' => "create_images",
85 'desc' => "{buildcol.create_images}",
86 'type' => "flag",
87 'reqd' => "no",
88 'modegli' => "4" },
89 { 'name' => "debug",
90 'desc' => "{buildcol.debug}",
91 'type' => "flag",
92 'reqd' => "no",
93 'hiddengli' => "yes" },
94 { 'name' => "faillog",
95 'desc' => "{buildcol.faillog}",
96 'type' => "string",
97 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
98 'reqd' => "no",
99 'modegli' => "4" },
100 { 'name' => "index",
101 'desc' => "{buildcol.index}",
102 'type' => "string",
103 'reqd' => "no",
104 'modegli' => "3" },
105 { 'name' => "keepold",
106 'desc' => "{buildcol.keepold}",
107 'type' => "flag",
108 'reqd' => "no",
109 'hiddengli' => "yes" },
110 { 'name' => "language",
111 'desc' => "{scripts.language}",
112 'type' => "string",
113 'reqd' => "no",
114 'modegli' => "4" },
115 { 'name' => "maxdocs",
116 'desc' => "{buildcol.maxdocs}",
117 'type' => "int",
118 'reqd' => "no",
119 'hiddengli' => "yes" },
120 { 'name' => "mode",
121 'desc' => "{buildcol.mode}",
122 'type' => "enum",
123 'list' => $mode_list,
124 'deft' => "all",
125 'reqd' => "no",
126 'modegli' => "4" },
127 { 'name' => "no_strip_html",
128 'desc' => "{buildcol.no_strip_html}",
129 'type' => "flag",
130 'reqd' => "no",
131 'modegli' => "4" },
132 { 'name' => "no_text",
133 'desc' => "{buildcol.no_text}",
134 'type' => "flag",
135 'reqd' => "no",
136 'modegli' => "3" },
137 { 'name' => "out",
138 'desc' => "{buildcol.out}",
139 'type' => "string",
140 'deft' => "STDERR",
141 'reqd' => "no",
142 'hiddengli' => "yes" },
143 { 'name' => "verbosity",
144 'desc' => "{buildcol.verbosity}",
145 'type' => "int",
146 'deft' => "2",
147 'reqd' => "no",
148 'modegli' => "4" } ];
149
150my $options = { 'name' => "buildcol.pl",
151 'desc' => "{buildcol.desc}",
152 'args' => $arguments };
153
154sub gsprintf
155{
156 return &gsprintf::gsprintf(@_);
157}
158
159
160# sub print_usage {
161# print STDOUT "\n";
162# print STDOUT "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
163# print STDOUT " usage: $0 [options] collection-name\n\n";
164# print STDOUT " options:\n";
165# print STDOUT " -verbosity number 0=none, 3=lots\n";
166# print STDOUT " -archivedir directory Where the archives live\n";
167# print STDOUT " -builddir directory Where to put the built indexes\n";
168# print STDOUT " -maxdocs number Maximum number of documents to build\n";
169# print STDOUT " -debug Print output to STDOUT\n";
170# print STDOUT " -mode all|compress_text|build_index|infodb\n";
171# print STDOUT " -index indexname Index to build (will build all in\n";
172# print STDOUT " config file if not set)\n";
173# print STDOUT " -keepold will not destroy the current contents of the\n";
174# print STDOUT " building directory\n";
175# print STDOUT " -no_text Don't store compressed text. This option is\n";
176# print STDOUT " useful for minimizing the size of the built\n";
177# print STDOUT " indexes if you intend always to display the\n";
178# print STDOUT " original documents at run time (i.e. you won't\n";
179# print STDOUT " be able to retrieve the compressed text version)\n";
180# print STDOUT " -allclassifications Don't remove empty classifications\n";
181# print STDOUT " -create_images Attempt to create default images for new\n";
182# print STDOUT " collection. This relies on the Gimp being\n";
183# print STDOUT " installed along with relevant perl modules\n";
184# print STDOUT " to allow scripting from perl\n";
185# print STDOUT " -collectdir directory Collection directory (defaults to " .
186# &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
187# print STDOUT " -out Filename or handle to print output status to.\n";
188# print STDOUT " The default is STDERR\n";
189# print STDOUT " -no_strip_html Do not strip the html tags from the indexed text\n";
190# print STDOUT " (only used for mgpp collections).\n\n";
191# print STDOUT " -faillog name Fail log filename. This log receives the filenames\n";
192# print STDOUT " of any files which fail to be processed (defaults.\n";
193# print STDOUT " to " .
194# &util::filename_cat("<collectdir>", "colname", "etc", "fail.log") . ")\n";
195# print STDOUT " [Type \"perl -S buildcol.pl | more\" if this help text scrolled off your screen]";
196# print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
197# }
198
199
200&main();
201
202
203sub main
204{
205 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
206 $debug, $mode, $indexname, $keepold, $allclassifications,
207 $create_images, $collectdir, $out, $buildtype, $textindex,
208 $no_strip_html, $no_text, $faillog, $gli);
209
210 # ***** 11-04-03 - John Thompson *****
211 my $xml = 0;
212 # ************************************
213
214 # note that no defaults are passed for most options as they're set
215 # later (after we check the collect.cfg file)
216 if (!parsargv::parse(\@ARGV,
217 'language/.*/', \$language,
218 'verbosity/\d+/', \$verbosity,
219 'archivedir/.*/', \$archivedir,
220 'cachedir/.*/', \$cachedir, # UNDOCUMENTED
221 'builddir/.*/', \$builddir,
222 'maxdocs/^\-?\d+/', \$maxdocs,
223 'debug', \$debug,
224 'mode/^(all|compress_text|build_index|infodb)$/', \$mode,
225 'index/.*/', \$indexname,
226 'no_text', \$no_text,
227 'keepold', \$keepold,
228 'allclassifications', \$allclassifications,
229 'create_images', \$create_images,
230 'collectdir/.*/', \$collectdir,
231 'out/.*/STDERR', \$out,
232 'no_strip_html', \$no_strip_html,
233 'faillog/.*/', \$faillog,
234 'gli', \$gli,
235 q^xml^, \$xml)) {
236 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
237 die "\n";
238 }
239
240 # If $language has been specified, load the appropriate resource bundle
241 # (Otherwise, the default resource bundle will be loaded automatically)
242 if ($language) {
243 &gsprintf::load_language_specific_resource_bundle($language);
244 }
245
246 if ($xml) {
247 &PrintUsage::print_xml_usage($options);
248 die "\n";
249 }
250
251 $textindex = "";
252 my $close_out = 0;
253 if ($out !~ /^(STDERR|STDOUT)$/i) {
254 open (OUT, ">$out") ||
255 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
256 $out = "buildcol::OUT";
257 $close_out = 1;
258 }
259 $out->autoflush(1);
260
261 # get and check the collection
262 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
263 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
264 die "\n";
265 }
266
267 if ($faillog eq "") {
268 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
269 }
270 # note that we're appending to the faillog here (import.pl clears it each time)
271 # this could potentially create a situation where the faillog keeps being added
272 # to over multiple builds (if the import process is being skipped)
273 open (FAILLOG, ">>$faillog") ||
274 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
275 $faillog = 'buildcol::FAILLOG';
276 $faillog->autoflush(1);
277
278 # read the configuration file
279 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
280 if (-e $configfilename) {
281 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
282
283 if ($verbosity !~ /\d+/) {
284 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
285 $verbosity = $collectcfg->{'verbosity'};
286 } else {
287 $verbosity = 2; # the default
288 }
289 }
290 # we use searchtype for determining buildtype, but for old versions, use buildtype
291 if (defined $collectcfg->{'searchtype'}) {
292 $buildtype = "mgpp";
293 }
294 elsif (defined $collectcfg->{'buildtype'}) {
295 $buildtype = $collectcfg->{'buildtype'};
296 } else {
297 $buildtype = "mg"; #mg is the default
298 }
299 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
300 $archivedir = $collectcfg->{'archivedir'};
301 }
302 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
303 $cachedir = $collectcfg->{'cachedir'};
304 }
305 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
306 $builddir = $collectcfg->{'builddir'};
307 }
308 if ($maxdocs !~ /\-?\d+/) {
309 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
310 $maxdocs = $collectcfg->{'maxdocs'};
311 } else {
312 $maxdocs = -1; # the default
313 }
314 }
315 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
316 $debug = 1;
317 }
318 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
319 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
320 $mode = $collectcfg->{'mode'};
321 } else {
322 $mode = "all"; # the default
323 }
324 }
325 if (defined $collectcfg->{'index'} && $indexname eq "") {
326 $indexname = $collectcfg->{'index'};
327 }
328 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
329 if ($collectcfg->{'no_text'} =~ /^true$/i) {
330 $no_text = 1;
331 }
332 }
333 if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) {
334 if ($collectcfg->{'allclassifications'} =~ /^true$/i) {
335 $allclassifications = 1;
336 }
337 }
338 if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
339 $keepold = 1;
340 }
341 if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
342 $create_images = 1;
343 }
344 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
345 $textindex = $collectcfg->{'textcompress'};
346 }
347 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
348 $gli = 1;
349 }
350
351 } else {
352 &gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die;
353 }
354
355 $gli = 0 unless defined $gli;
356
357 print STDERR "<Build>\n" if $gli;
358
359 #set the text index
360 if ($buildtype eq "mgpp") {
361 if ($textindex eq "") {
362 $textindex = "text";
363 }
364 }
365 else {
366 $textindex = "section:text";
367 }
368
369 # create default images if required
370 if ($create_images) {
371 my $collection_name = $collection;
372 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'}
373 if defined $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'};
374 &create_images ($collection_name);
375 }
376
377 # fill in the default archives and building directories if none
378 # were supplied, turn all \ into / and remove trailing /
379 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
380 $archivedir =~ s/[\\\/]+/\//g;
381 $archivedir =~ s/\/$//;
382 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
383 $builddir =~ s/[\\\/]+/\//g;
384 $builddir =~ s/\/$//;
385
386 # update the archive cache if needed
387 if ($cachedir) {
388 &gsprintf($out, "{buildcol.updating_archive_cache}\n")
389 if ($verbosity >= 1);
390
391 $cachedir =~ s/[\\\/]+$//;
392 $cachedir .= "/collect/$collection" unless
393 $cachedir =~ /collect\/$collection/;
394
395 $realarchivedir = "$cachedir/archives";
396 $realbuilddir = "$cachedir/building";
397 &util::mk_all_dir ($realarchivedir);
398 &util::mk_all_dir ($realbuilddir);
399 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
400
401 } else {
402 $realarchivedir = $archivedir;
403 $realbuilddir = $builddir;
404 }
405
406 # build it in realbuilddir
407 &util::mk_all_dir ($realbuilddir);
408
409
410 # if a builder class has been created for this collection, use it
411 # otherwise, use the mg or mgpp builder
412 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
413 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
414 $buildertype = "${collection}builder";
415 } else {
416 $builderdir = "$ENV{'GSDLHOME'}/perllib";
417 if ($buildtype eq "mgpp") {
418 $buildertype = "mgppbuilder";
419 }
420 else {
421 $buildertype = "mgbuilder";
422 }
423 }
424
425 require "$builderdir/$buildertype.pm";
426
427 eval("\$builder = new $buildertype(\$collection, " .
428 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
429 "\$maxdocs, \$debug, \$keepold, \$allclassifications, " .
430 "\$out, \$no_text, \$faillog, \$gli)");
431 die "$@" if $@;
432
433 $builder->init();
434
435 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
436 $builder->set_strip_html(0);
437 }
438 if ($mode =~ /^all$/i) {
439 $builder->compress_text($textindex);
440 $builder->build_indexes($indexname);
441 $builder->make_infodatabase();
442 $builder->collect_specific();
443 } elsif ($mode =~ /^compress_text$/i) {
444 $builder->compress_text($textindex);
445 } elsif ($mode =~ /^build_index$/i) {
446 $builder->build_indexes($indexname);
447 } elsif ($mode =~ /^infodb$/i) {
448 $builder->make_infodatabase();
449 } else {
450 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
451 }
452
453 $builder->make_auxiliary_files() if !$debug;
454 $builder->deinit();
455
456 if (($realbuilddir ne $builddir) && !$debug) {
457 &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
458 if ($verbosity >= 1);
459 &util::rm_r ($builddir);
460 &util::cp_r ($realbuilddir, $builddir);
461 }
462
463 close OUT if $close_out;
464 close FAILLOG;
465
466 print STDERR "</Build>\n" if $gli;
467}
468
469sub create_images {
470 my ($collection_name) = @_;
471
472 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon-1.2.pl");
473 if (!-e $image_script) {
474 &gsprintf($out, "{buildcol.no_image_script}", $image_script);
475 &gsprintf($out, "{buildcol.no_default_images}\n\n");
476 return;
477 }
478
479 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
480
481 &util::mk_all_dir ($imagedir);
482
483 # create the images
484 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
485 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
486
487 # update the collect.cfg configuration file (this will need
488 # to be changed when the config file format changes)
489 if (!open (CFGFILE, $configfilename)) {
490 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
491 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
492 return;
493 }
494
495 my $line = ""; my $file = "";
496 my $found = 0; my $foundsm = 0;
497 while (defined ($line = <CFGFILE>)) {
498 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
499 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
500 $found = 1;
501 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
502 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
503 $foundsm = 1;
504 }
505 $file .= $line;
506 }
507 close CFGFILE;
508
509 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
510 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
511
512 if (!open (CFGFILE, ">$configfilename")) {
513 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
514 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
515 return;
516 }
517 print CFGFILE $file;
518 close CFGFILE;
519}
Note: See TracBrowser for help on using the repository browser.