source: trunk/gsdl/bin/script/buildcol.pl@ 2524

Last change on this file since 2524 was 2524, checked in by kjm18, 23 years ago

removed "exit if mgpp & windows"

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 12.8 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package buildcol;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
36}
37
38use colcfg;
39use parsargv;
40use util;
41use FileHandle;
42
43&main();
44
45sub print_usage {
46 print STDOUT "\n";
47 print STDOUT "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
48 print STDOUT " usage: $0 [options] collection-name\n\n";
49 print STDOUT " options:\n";
50 print STDOUT " -verbosity number 0=none, 3=lots\n";
51 print STDOUT " -archivedir directory Where the archives live\n";
52 print STDOUT " -builddir directory Where to put the built indexes\n";
53 print STDOUT " -maxdocs number Maximum number of documents to build\n";
54 print STDOUT " -debug Print output to STDOUT\n";
55 print STDOUT " -mode all|compress_text|build_index|infodb\n";
56 print STDOUT " -index indexname Index to build (will build all in\n";
57 print STDOUT " config file if not set)\n";
58 print STDOUT " -keepold will not destroy the current contents of the\n";
59 print STDOUT " building directory\n";
60 print STDOUT " -no_text Don't store compressed text. This option is\n";
61 print STDOUT " useful for minimizing the size of the built\n";
62 print STDOUT " indexes if you intend always to display the\n";
63 print STDOUT " original documents at run time (i.e. you won't\n";
64 print STDOUT " be able to retrieve the compressed text version)\n";
65 print STDOUT " -allclassifications Don't remove empty classifications\n";
66 print STDOUT " -create_images Attempt to create default images for new\n";
67 print STDOUT " collection. This relies on the Gimp being\n";
68 print STDOUT " installed along with relevant perl modules\n";
69 print STDOUT " to allow scripting from perl\n";
70 print STDOUT " -collectdir directory Collection directory (defaults to " .
71 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
72 print STDOUT " -out Filename or handle to print output status to.\n";
73 print STDOUT " The default is STDERR\n";
74 print STDOUT " -buildtype mg|mgpp This will override the config file setting.\n";
75 print STDOUT " (default is mg)\n";
76 print STDOUT " -no_strip_html Do not strip the html tags from the indexed text\n";
77 print STDOUT " (only used for mgpp collections).\n\n";
78 print STDOUT " [Type \"perl -S buildcol.pl | more\" if this help text scrolled off your screen]";
79 print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
80}
81
82
83sub main
84{
85 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
86 $debug, $mode, $indexname, $keepold, $allclassifications,
87 $create_images, $collectdir, $out, $buildtype, $textindex,
88 $no_strip_html, $no_text);
89
90 # note that no defaults are passed for most options as they're set
91 # later (after we check the collect.cfg file)
92 if (!parsargv::parse(\@ARGV,
93 'verbosity/\d+/', \$verbosity,
94 'archivedir/.*/', \$archivedir,
95 'cachedir/.*/', \$cachedir,
96 'builddir/.*/', \$builddir,
97 'maxdocs/^\-?\d+/', \$maxdocs,
98 'debug', \$debug,
99 'mode/^(all|compress_text|build_index|infodb)$/', \$mode,
100 'index/.*/', \$indexname,
101 'no_text', \$no_text,
102 'keepold', \$keepold,
103 'allclassifications', \$allclassifications,
104 'create_images', \$create_images,
105 'collectdir/.*/', \$collectdir,
106 'out/.*/STDERR', \$out,
107 'no_strip_html', \$no_strip_html,
108 'buildtype/^(mg|mgpp)$/', \$buildtype)) {
109 &print_usage();
110 die "\n";
111 }
112
113 $textindex = "";
114 my $close_out = 0;
115 if ($out !~ /^(STDERR|STDOUT)$/i) {
116 open (OUT, ">$out") || die "Couldn't open output file $out\n";
117 $out = "buildcol::OUT";
118 $close_out = 1;
119 }
120 $out->autoflush(1);
121
122 # get and check the collection
123 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
124 &print_usage();
125 die "\n";
126 }
127
128 # read the configuration file
129 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
130 if (-e $configfilename) {
131 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
132
133 if ($verbosity !~ /\d+/) {
134 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
135 $verbosity = $collectcfg->{'verbosity'};
136 } else {
137 $verbosity = 2; # the default
138 }
139 }
140 if (defined $collectcfg->{'buildtype'} && $buildtype eq "") {
141 $buildtype = $collectcfg->{'buildtype'};
142 }
143 if ($buildtype eq "") {
144 $buildtype = "mg"; # mg is the default
145 }
146 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
147 $archivedir = $collectcfg->{'archivedir'};
148 }
149 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
150 $cachedir = $collectcfg->{'cachedir'};
151 }
152 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
153 $builddir = $collectcfg->{'builddir'};
154 }
155 if ($maxdocs !~ /\-?\d+/) {
156 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
157 $maxdocs = $collectcfg->{'maxdocs'};
158 } else {
159 $maxdocs = -1; # the default
160 }
161 }
162 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
163 $debug = 1;
164 }
165 if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
166 if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
167 $mode = $collectcfg->{'mode'};
168 } else {
169 $mode = "all"; # the default
170 }
171 }
172 if (defined $collectcfg->{'index'} && $indexname eq "") {
173 $indexname = $collectcfg->{'index'};
174 }
175 if (defined $collectcfg->{'no_text'} && $no_text == 0) {
176 if ($collectcfg->{'no_text'} =~ /^true$/i) {
177 $no_text = 1;
178 }
179 }
180 if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) {
181 if ($collectcfg->{'allclassifications'} =~ /^true$/i) {
182 $allclassifications = 1;
183 }
184 }
185 if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
186 $keepold = 1;
187 }
188 if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
189 $create_images = 1;
190 }
191 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
192 $textindex = $collectcfg->{'textcompress'};
193 }
194
195 } else {
196 die "Couldn't find the configuration file $configfilename\n";
197 }
198
199 #set the text index
200 if ($buildtype eq "mgpp") {
201 if ($textindex eq "") {
202 $textindex = "text";
203 }
204 }
205 else {
206 $textindex = "section:text";
207 }
208
209 # create default images if required
210 if ($create_images) {
211 my $collection_name = $collection;
212 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}
213 if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
214
215 &create_images ($collection_name);
216 }
217
218 # fill in the default archives and building directories if none
219 # were supplied, turn all \ into / and remove trailing /
220 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
221 $archivedir =~ s/[\\\/]+/\//g;
222 $archivedir =~ s/\/$//;
223 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
224 $builddir =~ s/[\\\/]+/\//g;
225 $builddir =~ s/\/$//;
226
227 # update the archive cache if needed
228 if ($cachedir) {
229 print $out "Updating archive cache\n" if ($verbosity >= 1);
230
231 $cachedir =~ s/[\\\/]+$//;
232 $cachedir .= "/collect/$collection" unless
233 $cachedir =~ /collect\/$collection/;
234
235 $realarchivedir = "$cachedir/archives";
236 $realbuilddir = "$cachedir/building";
237 &util::mk_all_dir ($realarchivedir);
238 &util::mk_all_dir ($realbuilddir);
239 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
240
241 } else {
242 $realarchivedir = $archivedir;
243 $realbuilddir = $builddir;
244 }
245
246 # build it in realbuilddir
247 &util::mk_all_dir ($realbuilddir);
248
249
250 # if a builder class has been created for this collection, use it
251 # otherwise, use the mg or mgpp builder
252 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
253 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
254 $buildertype = "${collection}builder";
255 } else {
256 $builderdir = "$ENV{'GSDLHOME'}/perllib";
257 if ($buildtype eq "mgpp") {
258 $buildertype = "mgppbuilder";
259 }
260 else {
261 $buildertype = "mgbuilder";
262 }
263 }
264
265 require "$builderdir/$buildertype.pm";
266
267 eval("\$builder = new $buildertype(\$collection, " .
268 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
269 "\$maxdocs, \$debug, \$keepold, \$allclassifications, " .
270 "\$out, \$no_text)");
271 die "$@" if $@;
272
273 $builder->init();
274
275 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
276 $builder->set_strip_html(0);
277 }
278 if ($mode =~ /^all$/i) {
279 $builder->compress_text($textindex);
280 $builder->build_indexes($indexname);
281 $builder->make_infodatabase();
282 $builder->collect_specific();
283 } elsif ($mode =~ /^compress_text$/i) {
284 $builder->compress_text($textindex);
285 } elsif ($mode =~ /^build_index$/i) {
286 $builder->build_indexes($indexname);
287 } elsif ($mode =~ /^infodb$/i) {
288 $builder->make_infodatabase();
289 } else {
290 die "unknown mode: $mode\n";
291 }
292
293 $builder->make_auxiliary_files() if !$debug;
294 $builder->deinit();
295
296 if (($realbuilddir ne $builddir) && !$debug) {
297 print $out "Copying back the cached build\n" if ($verbosity >= 1);
298 &util::rm_r ($builddir);
299 &util::cp_r ($realbuilddir, $builddir);
300 }
301
302 close OUT if $close_out;
303}
304
305sub create_images {
306 my ($collection_name) = @_;
307
308 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
309 if (!-e $image_script) {
310 print $out "WARNING: Image making script ($image_script) could not be found\n";
311 print $out " Default images will not be generated\n\n";
312 return;
313 }
314
315 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
316
317 &util::mk_all_dir ($imagedir);
318
319 # create the images
320 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
321 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
322
323 # update the collect.cfg configuration file (this will need
324 # to be changed when the config file format changes)
325 if (!open (CFGFILE, $configfilename)) {
326 print $out "WARNING: Couldn't open config file ($configfilename)\n";
327 print $out " for updating so collection images may not be linked correctly\n";
328 return;
329 }
330
331 my $line = ""; my $file = "";
332 my $found = 0; my $foundsm = 0;
333 while (defined ($line = <CFGFILE>)) {
334 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
335 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
336 $found = 1;
337 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
338 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
339 $foundsm = 1;
340 }
341 $file .= $line;
342 }
343 close CFGFILE;
344
345 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
346 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
347
348 if (!open (CFGFILE, ">$configfilename")) {
349 print $out "WARNING: Couldn't open config file ($configfilename)\n";
350 print $out " for updating so collection images may not be linked correctly\n";
351 return;
352 }
353 print CFGFILE $file;
354 close CFGFILE;
355}
356
Note: See TracBrowser for help on using the repository browser.