source: trunk/gsdl/bin/script/buildcol.pl@ 7101

Last change on this file since 7101 was 7101, checked in by kjdon, 20 years ago

removed the old commented out print usage stuff, added gli arg if it didn't have it, if gli arg is set, output strings in utf-8

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 14.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# 11/04/03 Added usage datastructure - John Thompson
29
30package buildcol;
31
32BEGIN {
33 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
34 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
39}
40
41use colcfg;
42use parsargv;
43use util;
44use FileHandle;
45use gsprintf;
46use printusage;
47
48my $mode_list =
49 [ { 'name' => "all",
50 'desc' => "{buildcol.mode.all}" },
51 { 'name' => "compress_text",
52 'desc' => "{buildcol.mode.compress_text}" },
53 { 'name' => "build_index",
54 'desc' => "{buildcol.mode.build_index}" },
55 { 'name' => "infodb",
56 'desc' => "{buildcol.mode.infodb}" } ];
57
58my $arguments =
59 [ { 'name' => "allclassifications",
60 'desc' => "{buildcol.allclassifications}",
61 'type' => "flag",
62 'reqd' => "no",
63 'modegli' => "2" },
64 { 'name' => "archivedir",
65 'desc' => "{buildcol.archivedir}",
66 'type' => "string",
67 'reqd' => "no",
68 'hiddengli' => "yes" },
69 { 'name' => "builddir",
70 'desc' => "{buildcol.builddir}",
71 'type' => "string",
72 'reqd' => "no",
73 'hiddengli' => "yes" },
74# { 'name' => "cachedir",
75# 'desc' => "{buildcol.cachedir}",
76# 'type' => "string",
77# 'reqd' => "no" },
78 { 'name' => "collectdir",
79 'desc' => "{buildcol.collectdir}",
80 'type' => "string",
81 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
82 'reqd' => "no",
83 'hiddengli' => "yes" },
84 { 'name' => "create_images",
85 'desc' => "{buildcol.create_images}",
86 'type' => "flag",
87 'reqd' => "no",
88 'modegli' => "4" },
89 { 'name' => "debug",
90 'desc' => "{buildcol.debug}",
91 'type' => "flag",
92 'reqd' => "no",
93 'hiddengli' => "yes" },
94 { 'name' => "faillog",
95 'desc' => "{buildcol.faillog}",
96 'type' => "string",
97 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
98 'reqd' => "no",
99 'modegli' => "4" },
100 { 'name' => "index",
101 'desc' => "{buildcol.index}",
102 'type' => "string",
103 'reqd' => "no",
104 'modegli' => "3" },
105 { 'name' => "keepold",
106 'desc' => "{buildcol.keepold}",
107 'type' => "flag",
108 'reqd' => "no",
109 'hiddengli' => "yes" },
110 { 'name' => "language",
111 'desc' => "{scripts.language}",
112 'type' => "string",
113 'reqd' => "no",
114 'modegli' => "4" },
115 { 'name' => "maxdocs",
116 'desc' => "{buildcol.maxdocs}",
117 'type' => "int",
118 'reqd' => "no",
119 'hiddengli' => "yes" },
120 { 'name' => "mode",
121 'desc' => "{buildcol.mode}",
122 'type' => "enum",
123 'list' => $mode_list,
124 'deft' => "all",
125 'reqd' => "no",
126 'modegli' => "4" },
127 { 'name' => "no_strip_html",
128 'desc' => "{buildcol.no_strip_html}",
129 'type' => "flag",
130 'reqd' => "no",
131 'modegli' => "4" },
132 { 'name' => "no_text",
133 'desc' => "{buildcol.no_text}",
134 'type' => "flag",
135 'reqd' => "no",
136 'modegli' => "3" },
137 { 'name' => "out",
138 'desc' => "{buildcol.out}",
139 'type' => "string",
140 'deft' => "STDERR",
141 'reqd' => "no",
142 'hiddengli' => "yes" },
143 { 'name' => "verbosity",
144 'desc' => "{buildcol.verbosity}",
145 'type' => "int",
146 'deft' => "2",
147 'reqd' => "no",
148 'modegli' => "4" } ];
149
150my $options = { 'name' => "buildcol.pl",
151 'desc' => "{buildcol.desc}",
152 'args' => $arguments };
153
154sub gsprintf
155{
156 return &gsprintf::gsprintf(@_);
157}
158
159
160&main();
161
162
163sub main
164{
165 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
166 $debug, $mode, $indexname, $keepold, $allclassifications,
167 $create_images, $collectdir, $out, $buildtype, $textindex,
168 $no_strip_html, $no_text, $faillog, $gli);
169
170 # ***** 11-04-03 - John Thompson *****
171 my $xml = 0;
172 # ************************************
173
174 # note that no defaults are passed for most options as they're set
175 # later (after we check the collect.cfg file)
176 if (!parsargv::parse(\@ARGV,
177 'language/.*/', \$language,
178 'verbosity/\d+/', \$verbosity,
179 'archivedir/.*/', \$archivedir,
180 'cachedir/.*/', \$cachedir, # UNDOCUMENTED
181 'builddir/.*/', \$builddir,
182 'maxdocs/^\-?\d+/', \$maxdocs,
183 'debug', \$debug,
184 'mode/^(all|compress_text|build_index|infodb)$/', \$mode,
185 'index/.*/', \$indexname,
186 'no_text', \$no_text,
187 'keepold', \$keepold,
188 'allclassifications', \$allclassifications,
189 'create_images', \$create_images,
190 'collectdir/.*/', \$collectdir,
191 'out/.*/STDERR', \$out,
192 'no_strip_html', \$no_strip_html,
193 'faillog/.*/', \$faillog,
194 'gli', \$gli,
195 q^xml^, \$xml)) {
196 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
197 die "\n";
198 }
199
200 # If $language has been specified, load the appropriate resource bundle
201 # (Otherwise, the default resource bundle will be loaded automatically)
202 if ($language) {
203 &gsprintf::load_language_specific_resource_bundle($language);
204 }
205
206 if ($xml) {
207 &PrintUsage::print_xml_usage($options);
208 die "\n";
209 }
210
211 if ($gli) { # the gli wants strings to be in UTF-8
212 &gsprintf::output_strings_in_UTF8;
213 }
214
215 $textindex = "";
216 my $close_out = 0;
217 if ($out !~ /^(STDERR|STDOUT)$/i) {
218 open (OUT, ">$out") ||
219 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
220 $out = "buildcol::OUT";
221 $close_out = 1;
222 }
223 $out->autoflush(1);
224
225 # get and check the collection
226 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
227 &PrintUsage::print_txt_usage($options, "{buildcol.params}");
228 die "\n";
229 }
230
231 if ($faillog eq "") {
232 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
233 }
234 # note that we're appending to the faillog here (import.pl clears it each time)
235 # this could potentially create a situation where the faillog keeps being added
236 # to over multiple builds (if the import process is being skipped)
237 open (FAILLOG, ">>$faillog") ||
238 (&gsprintf(STDERR, "{common.cannot_open_fail_log}\n", $faillog) && die);
239 $faillog = 'buildcol::FAILLOG';
240 $faillog->autoflush(1);
241
242 # read the configuration file
243 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
244 if (-e $configfilename) {
245 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
246
247 if ($verbosity !~ /\d+/) {
248 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
249 $verbosity = $collectcfg->{'verbosity'};
250 } else {
251 $verbosity = 2; # the default
252 }
253 }
254 # we use searchtype for determining buildtype, but for old versions, use buildtype
255 if (defined $collectcfg->{'searchtype'}) {
256 $buildtype = "mgpp";
257 }
258 elsif (defined $collectcfg->{'buildtype'}) {
259 $buildtype = $collectcfg->{'buildtype'};
260 } else {
261 $buildtype = "mg"; #mg is the default
262 }
263 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
264 $archivedir = $collectcfg->{'archivedir'};
265 }
266 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
267 $cachedir = $collectcfg->{'cachedir'};
268 }
269 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
270 $builddir = $collectcfg->{'builddir'};
271 }
272 if ($maxdocs !~ /\-?\d+/) {
273 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
274 $maxdocs = $collectcfg->{'maxdocs'};
275 } else {
276 $maxdocs = -1; # the default
277 }
278 }
279 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
280 $debug = 1;
281 }
282 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
283 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
284 $mode = $collectcfg->{'mode'};
285 } else {
286 $mode = "all"; # the default
287 }
288 }
289 if (defined $collectcfg->{'index'} && $indexname eq "") {
290 $indexname = $collectcfg->{'index'};
291 }
292 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
293 if ($collectcfg->{'no_text'} =~ /^true$/i) {
294 $no_text = 1;
295 }
296 }
297 if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) {
298 if ($collectcfg->{'allclassifications'} =~ /^true$/i) {
299 $allclassifications = 1;
300 }
301 }
302 if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
303 $keepold = 1;
304 }
305 if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
306 $create_images = 1;
307 }
308 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
309 $textindex = $collectcfg->{'textcompress'};
310 }
311 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
312 $gli = 1;
313 }
314
315 } else {
316 &gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die;
317 }
318
319 $gli = 0 unless defined $gli;
320
321 print STDERR "<Build>\n" if $gli;
322
323 #set the text index
324 if ($buildtype eq "mgpp") {
325 if ($textindex eq "") {
326 $textindex = "text";
327 }
328 }
329 else {
330 $textindex = "section:text";
331 }
332
333 # create default images if required
334 if ($create_images) {
335 my $collection_name = $collection;
336 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'}
337 if defined $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'};
338 &create_images ($collection_name);
339 }
340
341 # fill in the default archives and building directories if none
342 # were supplied, turn all \ into / and remove trailing /
343 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
344 $archivedir =~ s/[\\\/]+/\//g;
345 $archivedir =~ s/\/$//;
346 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
347 $builddir =~ s/[\\\/]+/\//g;
348 $builddir =~ s/\/$//;
349
350 # update the archive cache if needed
351 if ($cachedir) {
352 &gsprintf($out, "{buildcol.updating_archive_cache}\n")
353 if ($verbosity >= 1);
354
355 $cachedir =~ s/[\\\/]+$//;
356 $cachedir .= "/collect/$collection" unless
357 $cachedir =~ /collect\/$collection/;
358
359 $realarchivedir = "$cachedir/archives";
360 $realbuilddir = "$cachedir/building";
361 &util::mk_all_dir ($realarchivedir);
362 &util::mk_all_dir ($realbuilddir);
363 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
364
365 } else {
366 $realarchivedir = $archivedir;
367 $realbuilddir = $builddir;
368 }
369
370 # build it in realbuilddir
371 &util::mk_all_dir ($realbuilddir);
372
373
374 # if a builder class has been created for this collection, use it
375 # otherwise, use the mg or mgpp builder
376 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
377 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
378 $buildertype = "${collection}builder";
379 } else {
380 $builderdir = "$ENV{'GSDLHOME'}/perllib";
381 if ($buildtype eq "mgpp") {
382 $buildertype = "mgppbuilder";
383 }
384 else {
385 $buildertype = "mgbuilder";
386 }
387 }
388
389 require "$builderdir/$buildertype.pm";
390
391 eval("\$builder = new $buildertype(\$collection, " .
392 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
393 "\$maxdocs, \$debug, \$keepold, \$allclassifications, " .
394 "\$out, \$no_text, \$faillog, \$gli)");
395 die "$@" if $@;
396
397 $builder->init();
398
399 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
400 $builder->set_strip_html(0);
401 }
402 if ($mode =~ /^all$/i) {
403 $builder->compress_text($textindex);
404 $builder->build_indexes($indexname);
405 $builder->make_infodatabase();
406 $builder->collect_specific();
407 } elsif ($mode =~ /^compress_text$/i) {
408 $builder->compress_text($textindex);
409 } elsif ($mode =~ /^build_index$/i) {
410 $builder->build_indexes($indexname);
411 } elsif ($mode =~ /^infodb$/i) {
412 $builder->make_infodatabase();
413 } else {
414 (&gsprintf(STDERR, "{buildcol.unknown_mode}\n", $mode) && die);
415 }
416
417 $builder->make_auxiliary_files() if !$debug;
418 $builder->deinit();
419
420 if (($realbuilddir ne $builddir) && !$debug) {
421 &gsprintf($out, "{buildcol.copying_back_cached_build}\n")
422 if ($verbosity >= 1);
423 &util::rm_r ($builddir);
424 &util::cp_r ($realbuilddir, $builddir);
425 }
426
427 close OUT if $close_out;
428 close FAILLOG;
429
430 print STDERR "</Build>\n" if $gli;
431}
432
433sub create_images {
434 my ($collection_name) = @_;
435
436 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon-1.2.pl");
437 if (!-e $image_script) {
438 &gsprintf($out, "{buildcol.no_image_script}", $image_script);
439 &gsprintf($out, "{buildcol.no_default_images}\n\n");
440 return;
441 }
442
443 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
444
445 &util::mk_all_dir ($imagedir);
446
447 # create the images
448 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
449 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
450
451 # update the collect.cfg configuration file (this will need
452 # to be changed when the config file format changes)
453 if (!open (CFGFILE, $configfilename)) {
454 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
455 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
456 return;
457 }
458
459 my $line = ""; my $file = "";
460 my $found = 0; my $foundsm = 0;
461 while (defined ($line = <CFGFILE>)) {
462 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
463 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
464 $found = 1;
465 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
466 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
467 $foundsm = 1;
468 }
469 $file .= $line;
470 }
471 close CFGFILE;
472
473 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
474 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
475
476 if (!open (CFGFILE, ">$configfilename")) {
477 &gsprintf($out, "{buildcol.cannot_open_cfg_file}\n", $configfilename);
478 &gsprintf($out, "{buildcol.unlinked_col_images}\n");
479 return;
480 }
481 print CFGFILE $file;
482 close CFGFILE;
483}
Note: See TracBrowser for help on using the repository browser.