source: trunk/gsdl/bin/script/buildcol.pl@ 10215

Last change on this file since 10215 was 10215, checked in by kjdon, 19 years ago

added Jeffrey's changes for parsing arguments - now all parsing is done from the arguments structure, not using parsarg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 15.2 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# 11/04/03 Added usage datastructure - John Thompson
29
30package buildcol;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
39}
40
41use colcfg;
42use parsargv;
43use util;
44use FileHandle;
45use gsprintf;
46use printusage;
47use parse2;
48
49my $mode_list =
50 [ { 'name' => "all",
51 'desc' => "{buildcol.mode.all}" },
52 { 'name' => "compress_text",
53 'desc' => "{buildcol.mode.compress_text}" },
54 { 'name' => "build_index",
55 'desc' => "{buildcol.mode.build_index}" },
56 { 'name' => "infodb",
57 'desc' => "{buildcol.mode.infodb}" } ];
58
59my $arguments =
60 [ { 'name' => "remove_empty_classifications",
61 'desc' => "{buildcol.remove_empty_classifications}",
62 'type' => "flag",
63 'reqd' => "no",
64 'modegli' => "3" },
65 { 'name' => "archivedir",
66 'desc' => "{buildcol.archivedir}",
67 'type' => "string",
68 'reqd' => "no",
69 'hiddengli' => "yes" },
70 { 'name' => "builddir",
71 'desc' => "{buildcol.builddir}",
72 'type' => "string",
73 'reqd' => "no",
74 'hiddengli' => "yes" },
75# { 'name' => "cachedir",
76# 'desc' => "{buildcol.cachedir}",
77# 'type' => "string",
78# 'reqd' => "no" },
79 { 'name' => "collectdir",
80 'desc' => "{buildcol.collectdir}",
81 'type' => "string",
82 # parsearg left "" as default
83 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
84 'reqd' => "no",
85 'hiddengli' => "yes" },
86 { 'name' => "create_images",
87 'desc' => "{buildcol.create_images}",
88 'type' => "flag",
89 'reqd' => "no",
90 'modegli' => "4" },
91 { 'name' => "debug",
92 'desc' => "{buildcol.debug}",
93 'type' => "flag",
94 'reqd' => "no",
95 'hiddengli' => "yes" },
96 { 'name' => "faillog",
97 'desc' => "{buildcol.faillog}",
98 'type' => "string",
99 # parsearg left "" as default
100 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
101 'reqd' => "no",
102 'modegli' => "4" },
103 { 'name' => "index",
104 'desc' => "{buildcol.index}",
105 'type' => "string",
106 'reqd' => "no",
107 'modegli' => "3" },
108 { 'name' => "keepold",
109 'desc' => "{buildcol.keepold}",
110 'type' => "flag",
111 'reqd' => "no",
112 'hiddengli' => "yes" },
113 { 'name' => "language",
114 'desc' => "{scripts.language}",
115 'type' => "string",
116 'reqd' => "no",
117 'modegli' => "4" },
118 { 'name' => "maxdocs",
119 'desc' => "{buildcol.maxdocs}",
120 'type' => "int",
121 'reqd' => "no",
122 'hiddengli' => "yes" },
123 { 'name' => "mode",
124 'desc' => "{buildcol.mode}",
125 'type' => "enum",
126 'list' => $mode_list,
127 # parsearg left "" as default
128# 'deft' => "all",
129 'reqd' => "no",
130 'modegli' => "4" },
131 { 'name' => "no_strip_html",
132 'desc' => "{buildcol.no_strip_html}",
133 'type' => "flag",
134 'reqd' => "no",
135 'modegli' => "4" },
136 { 'name' => "no_text",
137 'desc' => "{buildcol.no_text}",
138 'type' => "flag",
139 'reqd' => "no",
140 'modegli' => "3" },
141 { 'name' => "out",
142 'desc' => "{buildcol.out}",
143 'type' => "string",
144 'deft' => "STDERR",
145 'reqd' => "no",
146 'hiddengli' => "yes" },
147 { 'name' => "verbosity",
148 'desc' => "{buildcol.verbosity}",
149 'type' => "int",
150 # parsearg left "" as default
151 #'deft' => "2",
152 'reqd' => "no",
153 'modegli' => "4" },
154 { 'name' => "gli",
155 'desc' => "",
156 'type' => "flag",
157 'reqd' => "no",
158 'hiddengli' => "yes" },
159 { 'name' => "xml",
160 'desc' => "",
161 'type' => "flag",
162 'reqd' => "no",
163 'hiddengli' => "yes" } ];
164
165my $options = { 'name' => "buildcol.pl",
166 'desc' => "{buildcol.desc}",
167 'args' => $arguments };
168
169sub gsprintf
170{
171 return &gsprintf::gsprintf(@_);
172}
173
174
175&main();
176
177
178sub main
179{
180 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
181 $debug, $mode, $indexname, $keepold, $remove_empty_classifications,
182 $create_images, $collectdir, $out, $buildtype, $textindex,
183 $no_strip_html, $no_text, $faillog, $gli);
184
185 my $xml = 0;
186
187 my $hashParsingResult = {};
188 my $blnParseFailed = "false";
189 # general options available to all plugins
190 $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
191 # If there are more than one argument left after parsing, it mean user input too many arguments.
192 # Error occoured will return 0
193 if($intArgLeftinAfterParsing != 1)
194 {
195 $blnParseFailed = "true";
196 }
197 if($blnParseFailed eq "true")
198 {
199 print "";
200 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
201 die "\n";
202 }
203 foreach my $strVariable (keys %$hashParsingResult)
204 {
205 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
206 }
207
208 # If $language has been specified, load the appropriate resource bundle
209 # (Otherwise, the default resource bundle will be loaded automatically)
210 if ($language) {
211 &gsprintf::load_language_specific_resource_bundle($language);
212 }
213
214 if ($xml) {
215 &PrintUsage::print_xml_usage($options);
216 print "\n";
217 return;
218 }
219
220 if ($gli) { # the gli wants strings to be in UTF-8
221 &gsprintf::output_strings_in_UTF8;
222 }
223
224 $textindex = "";
225 my $close_out = 0;
226 if ($out !~ /^(STDERR|STDOUT)$/i) {
227 open (OUT, ">$out") ||
228 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
229 $out = "buildcol::OUT";
230 $close_out = 1;
231 }
232 $out->autoflush(1);
233
234 # get and check the collection
235 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
236 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
237 die "\n";
238 }
239
240 if ($faillog eq "") {
241 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
242 }
243 # note that we're appending to the faillog here (import.pl clears it each time)
244 # this could potentially create a situation where the faillog keeps being added
245 # to over multiple builds (if the import process is being skipped)
246 open (FAILLOG, ">>$faillog") ||
247 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
248 $faillog = 'buildcol::FAILLOG';
249 $faillog->autoflush(1);
250
251 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
252
253 # read the configuration file
254 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
255 if (-e $configfilename) {
256 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
257
258 if ($verbosity !~ /\d+/) {
259 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
260 $verbosity = $collectcfg->{'verbosity'};
261 } else {
262 $verbosity = 2; # the default
263 }
264 }
265 # we use searchtype for determining buildtype, but for old versions, use buildtype
266 if (defined $collectcfg->{'buildtype'}) {
267 $buildtype = $collectcfg->{'buildtype'};
268 } elsif (defined $collectcfg->{'searchtype'}) {
269 $buildtype = "mgpp";
270 } else {
271 $buildtype = "mg"; #mg is the default
272 }
273 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
274 $archivedir = $collectcfg->{'archivedir'};
275 }
276 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
277 $cachedir = $collectcfg->{'cachedir'};
278 }
279 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
280 $builddir = $collectcfg->{'builddir'};
281 }
282 if ($maxdocs !~ /\-?\d+/) {
283 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
284 $maxdocs = $collectcfg->{'maxdocs'};
285 } else {
286 $maxdocs = -1; # the default
287 }
288 }
289 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
290 $debug = 1;
291 }
292 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
293 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
294 $mode = $collectcfg->{'mode'};
295 } else {
296 $mode = "all"; # the default
297 }
298 }
299 if (defined $collectcfg->{'index'} && $indexname eq "") {
300 $indexname = $collectcfg->{'index'};
301 }
302 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
303 if ($collectcfg->{'no_text'} =~ /^true$/i) {
304 $no_text = 1;
305 }
306 }
307 if (defined $collectcfg->{'no_strip_html'} && $no_strip_html == 0) {
308 if ($collectcfg->{'no_strip_html'} =~ /^true$/i) {
309 $no_strip_html = 1;
310 }
311 }
312 if (defined $collectcfg->{'remove_empty_classifications'} && $remove_empty_classifications == 0) {
313 if ($collectcfg->{'remove_empty_classifications'} =~ /^true$/i) {
314 $remove_empty_classifications = 1;
315 }
316 }
317 if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
318 $keepold = 1;
319 }
320 if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
321 $create_images = 1;
322 }
323 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
324 $textindex = $collectcfg->{'textcompress'};
325 }
326 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
327 $gli = 1;
328 }
329
330 } else {
331 &gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die;
332 }
333
334 $gli = 0 unless defined $gli;
335
336 print STDERR "<Build>\n" if $gli;
337
338 #set the text index
339 if (($buildtype eq "mgpp") || ($buildtype eq "lucene")) {
340 if ($textindex eq "") {
341 $textindex = "text";
342 }
343 }
344 else {
345 $textindex = "section:text";
346 }
347
348 # create default images if required
349 if ($create_images) {
350 my $collection_name = $collection;
351 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'}
352 if defined $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'};
353 &create_images ($collection_name);
354 }
355
356 # fill in the default archives and building directories if none
357 # were supplied, turn all \ into / and remove trailing /
358 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
359 $archivedir =~ s/[\\\/]+/\//g;
360 $archivedir =~ s/\/$//;
361 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
362 $builddir =~ s/[\\\/]+/\//g;
363 $builddir =~ s/\/$//;
364
365 # update the archive cache if needed
366 if ($cachedir) {
367 &gsprintf($out, "{buildcol.updating_archive_cache}\n")
368 if ($verbosity >= 1);
369
370 $cachedir =~ s/[\\\/]+$//;
371 $cachedir .= "/collect/$collection" unless
372 $cachedir =~ /collect\/$collection/;
373
374 $realarchivedir = "$cachedir/archives";
375 $realbuilddir = "$cachedir/building";
376 &util::mk_all_dir ($realarchivedir);
377 &util::mk_all_dir ($realbuilddir);
378 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
379
380 } else {
381 $realarchivedir = $archivedir;
382 $realbuilddir = $builddir;
383 }
384
385 # build it in realbuilddir
386 &util::mk_all_dir ($realbuilddir);
387
388
389 # if a builder class has been created for this collection, use it
390 # otherwise, use the mg or mgpp builder
391 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
392 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
393 $buildertype = "${collection}builder";
394 } else {
395 $builderdir = "$ENV{'GSDLHOME'}/perllib";
396 if ($buildtype eq "lucene") {
397 $buildertype = "lucenebuilder";
398 }
399 elsif ($buildtype eq "mgpp") {
400 $buildertype = "mgppbuilder";
401 }
402 else {
403 $buildertype = "mgbuilder";
404 }
405 }
406
407 require "$builderdir/$buildertype.pm";
408
409 eval("\$builder = new $buildertype(\$collection, " .
410 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
411 "\$maxdocs, \$debug, \$keepold, \$remove_empty_classifications, " .
412 "\$out, \$no_text, \$faillog, \$gli)");
413 die "$@" if $@;
414
415 $builder->init();
416
417 if (($buildertype eq "mgppbuilder") && $no_strip_html) {
418 $builder->set_strip_html(0);
419 }
420
421 if ($mode =~ /^all$/i) {
422 $builder->compress_text($textindex);
423 $builder->build_indexes($indexname);
424 $builder->make_infodatabase();
425 $builder->collect_specific();
426 } elsif ($mode =~ /^compress_text$/i) {
427 $builder->compress_text($textindex);
428 } elsif ($mode =~ /^build_index$/i) {
429 $builder->build_indexes($indexname);
430 } elsif ($mode =~ /^infodb$/i) {
431 $builder->make_infodatabase();
432 } else {
433 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
434 }
435
436 $builder->make_auxiliary_files() if !$debug;
437 $builder->deinit();
438
439 if (($realbuilddir ne $builddir) && !$debug) {
440 &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
441 if ($verbosity >= 1);
442 &util::rm_r ($builddir);
443 &util::cp_r ($realbuilddir, $builddir);
444 }
445
446 close OUT if $close_out;
447 close FAILLOG;
448
449 print STDERR "</Build>\n" if $gli;
450}
451
452sub create_images {
453 my ($collection_name) = @_;
454
455 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon-1.2.pl");
456 if (!-e $image_script) {
457 &gsprintf($out, "{buildcol.no_image_script}", $image_script);
458 &gsprintf($out, "{buildcol.no_default_images}\n\n");
459 return;
460 }
461
462 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
463
464 &util::mk_all_dir ($imagedir);
465
466 # create the images
467 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
468 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
469
470 # update the collect.cfg configuration file (this will need
471 # to be changed when the config file format changes)
472 if (!open (CFGFILE, $configfilename)) {
473 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
474 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
475 return;
476 }
477
478 my $line = ""; my $file = "";
479 my $found = 0; my $foundsm = 0;
480 while (defined ($line = <CFGFILE>)) {
481 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
482 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
483 $found = 1;
484 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
485 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
486 $foundsm = 1;
487 }
488 $file .= $line;
489 }
490 close CFGFILE;
491
492 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
493 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
494
495 if (!open (CFGFILE, ">$configfilename")) {
496 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
497 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
498 return;
499 }
500 print CFGFILE $file;
501 close CFGFILE;
502}
Note: See TracBrowser for help on using the repository browser.