source: trunk/gsdl/bin/script/buildcol.pl@ 2447

Last change on this file since 2447 was 2359, checked in by sjboddie, 23 years ago

Altered the help text a little for mkcol.pl, import.pl, buildcol.pl, and
build so that they now suggest using the "more" pager if the help text
scrolls off the screen (brought about by usability studies under DOS).
Note that this means some debug info that was once printed to STDERR is
now being printed to STDOUT.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.9 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package buildcol;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
36}
37
38use colcfg;
39use parsargv;
40use util;
41use FileHandle;
42
43&main();
44
45sub print_usage {
46 print STDOUT "\n";
47 print STDOUT "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
48 print STDOUT " usage: $0 [options] collection-name\n\n";
49 print STDOUT " options:\n";
50 print STDOUT " -verbosity number 0=none, 3=lots\n";
51 print STDOUT " -archivedir directory Where the archives live\n";
52 print STDOUT " -builddir directory Where to put the built indexes\n";
53 print STDOUT " -maxdocs number Maximum number of documents to build\n";
54 print STDOUT " -debug Print output to STDOUT\n";
55 print STDOUT " -mode all|compress_text|build_index|infodb\n";
56 print STDOUT " -index indexname Index to build (will build all in\n";
57 print STDOUT " config file if not set)\n";
58 print STDOUT " -keepold will not destroy the current contents of the\n";
59 print STDOUT " building directory\n";
60 print STDOUT " -no_text Don't store compressed text. This option is\n";
61 print STDOUT " useful for minimizing the size of the built\n";
62 print STDOUT " indexes if you intend always to display the\n";
63 print STDOUT " original documents at run time (i.e. you won't\n";
64 print STDOUT " be able to retrieve the compressed text version)\n";
65 print STDOUT " -allclassifications Don't remove empty classifications\n";
66 print STDOUT " -create_images Attempt to create default images for new\n";
67 print STDOUT " collection. This relies on the Gimp being\n";
68 print STDOUT " installed along with relevant perl modules\n";
69 print STDOUT " to allow scripting from perl\n";
70 print STDOUT " -collectdir directory Collection directory (defaults to " .
71 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
72 print STDOUT " -out Filename or handle to print output status to.\n";
73 print STDOUT " The default is STDERR\n";
74 print STDOUT " -buildtype mg|mgpp This will override the config file setting.\n";
75 print STDOUT " (default is mg)\n";
76 print STDOUT " -no_strip_html Do not strip the html tags from the indexed text\n";
77 print STDOUT " (only used for mgpp collections).\n\n";
78 print STDOUT " [Type \"perl -S buildcol.pl | more\" if this help text scrolled off your screen]";
79 print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
80}
81
82
83sub main
84{
85 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
86 $debug, $mode, $indexname, $keepold, $allclassifications,
87 $create_images, $collectdir, $out, $buildtype, $textindex,
88 $no_strip_html, $no_text);
89
90 # note that no defaults are passed for most options as they're set
91 # later (after we check the collect.cfg file)
92 if (!parsargv::parse(\@ARGV,
93 'verbosity/\d+/', \$verbosity,
94 'archivedir/.*/', \$archivedir,
95 'cachedir/.*/', \$cachedir,
96 'builddir/.*/', \$builddir,
97 'maxdocs/^\-?\d+/', \$maxdocs,
98 'debug', \$debug,
99 'mode/^(all|compress_text|build_index|infodb)$/', \$mode,
100 'index/.*/', \$indexname,
101 'no_text', \$no_text,
102 'keepold', \$keepold,
103 'allclassifications', \$allclassifications,
104 'create_images', \$create_images,
105 'collectdir/.*/', \$collectdir,
106 'out/.*/STDERR', \$out,
107 'no_strip_html', \$no_strip_html,
108 'buildtype/^(mg|mgpp)$/', \$buildtype)) {
109 &print_usage();
110 die "\n";
111 }
112
113 $textindex = "";
114 my $close_out = 0;
115 if ($out !~ /^(STDERR|STDOUT)$/i) {
116 open (OUT, ">$out") || die "Couldn't open output file $out\n";
117 $out = "buildcol::OUT";
118 $close_out = 1;
119 }
120 $out->autoflush(1);
121
122 # get and check the collection
123 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
124 &print_usage();
125 die "\n";
126 }
127
128 # read the configuration file
129 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
130 if (-e $configfilename) {
131 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
132
133 if ($verbosity !~ /\d+/) {
134 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
135 $verbosity = $collectcfg->{'verbosity'};
136 } else {
137 $verbosity = 2; # the default
138 }
139 }
140 if (defined $collectcfg->{'buildtype'} && $buildtype eq "") {
141 $buildtype = $collectcfg->{'buildtype'};
142 }
143 if ($buildtype eq "") {
144 $buildtype = "mg"; # mg is the default
145 }
146 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
147 $archivedir = $collectcfg->{'archivedir'};
148 }
149 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
150 $cachedir = $collectcfg->{'cachedir'};
151 }
152 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
153 $builddir = $collectcfg->{'builddir'};
154 }
155 if ($maxdocs !~ /\-?\d+/) {
156 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
157 $maxdocs = $collectcfg->{'maxdocs'};
158 } else {
159 $maxdocs = -1; # the default
160 }
161 }
162 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
163 $debug = 1;
164 }
165 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
166 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
167 $mode = $collectcfg->{'mode'};
168 } else {
169 $mode = "all"; # the default
170 }
171 }
172 if (defined $collectcfg->{'index'} && $indexname eq "") {
173 $indexname = $collectcfg->{'index'};
174 }
175 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
176 if ($collectcfg->{'no_text'} =~ /^true$/i) {
177 $no_text = 1;
178 }
179 }
180 if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) {
181 if ($collectcfg->{'allclassifications'} =~ /^true$/i) {
182 $allclassifications = 1;
183 }
184 }
185 if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
186 $keepold = 1;
187 }
188 if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
189 $create_images = 1;
190 }
191 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
192 $textindex = $collectcfg->{'textcompress'};
193 }
194
195 } else {
196 die "Couldn't find the configuration file $configfilename\n";
197 }
198
199 #mgpp doesn't work yet on windows
200 if ($buildtype eq "mgpp" && $ENV{'GSDLOS'} =~ /^windows$/) {
201 die "mgpp doesn't work on windows\n";
202 }
203
204 #set the text index
205 if ($buildtype eq "mgpp") {
206 if ($textindex eq "") {
207 $textindex = "text";
208 }
209 }
210 else {
211 $textindex = "section:text";
212 }
213
214 # create default images if required
215 if ($create_images) {
216 my $collection_name = $collection;
217 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}
218 if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
219
220 &create_images ($collection_name);
221 }
222
223 # fill in the default archives and building directories if none
224 # were supplied, turn all \ into / and remove trailing /
225 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
226 $archivedir =~ s/[\\\/]+/\//g;
227 $archivedir =~ s/\/$//;
228 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
229 $builddir =~ s/[\\\/]+/\//g;
230 $builddir =~ s/\/$//;
231
232 # update the archive cache if needed
233 if ($cachedir) {
234 print $out "Updating archive cache\n" if ($verbosity >= 1);
235
236 $cachedir =~ s/[\\\/]+$//;
237 $cachedir .= "/collect/$collection" unless
238 $cachedir =~ /collect\/$collection/;
239
240 $realarchivedir = "$cachedir/archives";
241 $realbuilddir = "$cachedir/building";
242 &util::mk_all_dir ($realarchivedir);
243 &util::mk_all_dir ($realbuilddir);
244 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
245
246 } else {
247 $realarchivedir = $archivedir;
248 $realbuilddir = $builddir;
249 }
250
251 # build it in realbuilddir
252 &util::mk_all_dir ($realbuilddir);
253
254
255 # if a builder class has been created for this collection, use it
256 # otherwise, use the mg or mgpp builder
257 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
258 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
259 $buildertype = "${collection}builder";
260 } else {
261 $builderdir = "$ENV{'GSDLHOME'}/perllib";
262 if ($buildtype eq "mgpp") {
263 $buildertype = "mgppbuilder";
264 }
265 else {
266 $buildertype = "mgbuilder";
267 }
268 }
269
270 require "$builderdir/$buildertype.pm";
271
272 eval("\$builder = new $buildertype(\$collection, " .
273 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
274 "\$maxdocs, \$debug, \$keepold, \$allclassifications, " .
275 "\$out, \$no_text)");
276 die "$@" if $@;
277
278 $builder->init();
279
280 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
281 $builder->set_strip_html(0);
282 }
283 if ($mode =~ /^all$/i) {
284 $builder->compress_text($textindex);
285 $builder->build_indexes($indexname);
286 $builder->make_infodatabase();
287 $builder->collect_specific();
288 } elsif ($mode =~ /^compress_text$/i) {
289 $builder->compress_text($textindex);
290 } elsif ($mode =~ /^build_index$/i) {
291 $builder->build_indexes($indexname);
292 } elsif ($mode =~ /^infodb$/i) {
293 $builder->make_infodatabase();
294 } else {
295 die "unknown mode: $mode\n";
296 }
297
298 $builder->make_auxiliary_files() if !$debug;
299 $builder->deinit();
300
301 if (($realbuilddir ne $builddir) && !$debug) {
302 print $out "Copying back the cached build\n" if ($verbosity >= 1);
303 &util::rm_r ($builddir);
304 &util::cp_r ($realbuilddir, $builddir);
305 }
306
307 close OUT if $close_out;
308}
309
310sub create_images {
311 my ($collection_name) = @_;
312
313 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
314 if (!-e $image_script) {
315 print $out "WARNING: Image making script ($image_script) could not be found\n";
316 print $out " Default images will not be generated\n\n";
317 return;
318 }
319
320 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
321
322 &util::mk_all_dir ($imagedir);
323
324 # create the images
325 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
326 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
327
328 # update the collect.cfg configuration file (this will need
329 # to be changed when the config file format changes)
330 if (!open (CFGFILE, $configfilename)) {
331 print $out "WARNING: Couldn't open config file ($configfilename)\n";
332 print $out " for updating so collection images may not be linked correctly\n";
333 return;
334 }
335
336 my $line = ""; my $file = "";
337 my $found = 0; my $foundsm = 0;
338 while (defined ($line = <CFGFILE>)) {
339 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
340 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
341 $found = 1;
342 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
343 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
344 $foundsm = 1;
345 }
346 $file .= $line;
347 }
348 close CFGFILE;
349
350 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
351 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
352
353 if (!open (CFGFILE, ">$configfilename")) {
354 print $out "WARNING: Couldn't open config file ($configfilename)\n";
355 print $out " for updating so collection images may not be linked correctly\n";
356 return;
357 }
358 print CFGFILE $file;
359 close CFGFILE;
360}
361
Note: See TracBrowser for help on using the repository browser.