source: trunk/gsdl/bin/script/buildcol.pl@ 1970

Last change on this file since 1970 was 1970, checked in by sjboddie, 23 years ago

Added more usage information to all perl programs and removed a few
programs that are no longer useful.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package buildcol;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
36}
37
38use colcfg;
39use parsargv;
40use util;
41use FileHandle;
42
43&main();
44
45sub print_usage {
46 print STDERR "\n";
47 print STDERR "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
48 print STDERR " usage: $0 [options] collection-name\n\n";
49 print STDERR " options:\n";
50 print STDERR " -verbosity number 0=none, 3=lots\n";
51 print STDERR " -archivedir directory Where the archives live\n";
52 print STDERR " -builddir directory Where to put the built indexes\n";
53 print STDERR " -maxdocs number Maximum number of documents to build\n";
54 print STDERR " -debug Print output to STDOUT\n";
55 print STDERR " -mode all|compress_text|build_index|infodb\n";
56 print STDERR " -index indexname Index to build (will build all in\n";
57 print STDERR " config file if not set)\n";
58 print STDERR " -keepold will not destroy the current contents of the\n";
59 print STDERR " building directory\n";
60 print STDERR " -allclassifications Don't remove empty classifications\n";
61 print STDERR " -create_images Attempt to create default images for new\n";
62 print STDERR " collection. This relies on the Gimp being\n";
63 print STDERR " installed along with relevant perl modules\n";
64 print STDERR " to allow scripting from perl\n";
65 print STDERR " -collectdir directory Collection directory (defaults to " .
66 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
67 print STDERR " -out Filename or handle to print output status to.\n";
68 print STDERR " The default is STDERR\n";
69 print STDERR " -buildtype mg|mgpp This will override the config file setting.\n";
70 print STDERR " (default is mg)\n";
71 print STDERR " -no_strip_html Do not strip the html tags from the indexed text\n";
72 print STDERR " (only used for mgpp collections).\n\n";
73}
74
75
76sub main
77{
78 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
79 $debug, $mode, $indexname, $keepold, $allclassifications,
80 $create_images, $collectdir, $out, $buildtype, $textindex,
81 $no_strip_html);
82 if (!parsargv::parse(\@ARGV,
83 'verbosity/\d+/2', \$verbosity,
84 'archivedir/.*/', \$archivedir,
85 'cachedir/.*/', \$cachedir,
86 'builddir/.*/', \$builddir,
87 'maxdocs/^\-?\d+/-1', \$maxdocs,
88 'debug', \$debug,
89 'mode/^(all|compress_text|build_index|infodb)$/all', \$mode,
90 'index/.*/', \$indexname,
91 'keepold', \$keepold,
92 'allclassifications', \$allclassifications,
93 'create_images', \$create_images,
94 'collectdir/.*/', \$collectdir,
95 'out/.*/STDERR', \$out,
96 'no_strip_html', \$no_strip_html,
97 'buildtype/^(mg|mgpp)$/', \$buildtype)) {
98 &print_usage();
99 die "\n";
100 }
101
102 $textindex = "";
103 my $close_out = 0;
104 if ($out !~ /^(STDERR|STDOUT)$/i) {
105 open (OUT, ">$out") || die "Couldn't open output file $out\n";
106 $out = "buildcol::OUT";
107 $close_out = 1;
108 }
109 $out->autoflush(1);
110
111 # get and check the collection
112 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
113 &print_usage();
114 die "\n";
115 }
116
117 # read the configuration file
118
119 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
120 if (-e $configfilename) {
121 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
122
123 if (defined $collectcfg->{'buildtype'} && $buildtype eq "") {
124 $buildtype = $collectcfg->{'buildtype'};
125 }
126 if ($buildtype eq "") {
127 $buildtype = "mg"; # mg is the default
128 }
129 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
130 $archivedir = $collectcfg->{'archivedir'};
131 }
132 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
133 $cachedir = $collectcfg->{'cachedir'};
134 }
135 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
136 $builddir = $collectcfg->{'builddir'};
137 }
138 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
139 $textindex = $collectcfg->{'textcompress'};
140 }
141
142 } else {
143 die "Couldn't find the configuration file $configfilename\n";
144 }
145
146 #mgpp doesn't work yet on windows
147 if ($buildtype eq "mgpp" && $ENV{'GSDLOS'} =~ /^windows$/) {
148 die "mgpp doesn't work on windows\n";
149 }
150
151 #set the text index
152 if ($buildtype eq "mgpp") {
153 if ($textindex eq "") {
154 $textindex = "text";
155 }
156 }
157 else {
158 $textindex = "section:text";
159 }
160
161 # create default images if required
162 if ($create_images) {
163 my $collection_name = $collection;
164 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}
165 if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
166
167 &create_images ($collection_name);
168 }
169
170 # fill in the default archives and building directories if none
171 # were supplied, turn all \ into / and remove trailing /
172 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
173 $archivedir =~ s/[\\\/]+/\//g;
174 $archivedir =~ s/\/$//;
175 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
176 $builddir =~ s/[\\\/]+/\//g;
177 $builddir =~ s/\/$//;
178
179 # update the archive cache if needed
180 if ($cachedir) {
181 print $out "Updating archive cache\n" if ($verbosity >= 1);
182
183 $cachedir =~ s/[\\\/]+$//;
184 $cachedir .= "/collect/$collection" unless
185 $cachedir =~ /collect\/$collection/;
186
187 $realarchivedir = "$cachedir/archives";
188 $realbuilddir = "$cachedir/building";
189 &util::mk_all_dir ($realarchivedir);
190 &util::mk_all_dir ($realbuilddir);
191 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
192
193 } else {
194 $realarchivedir = $archivedir;
195 $realbuilddir = $builddir;
196 }
197
198 # build it in realbuilddir
199 &util::mk_all_dir ($realbuilddir);
200
201
202 # if a builder class has been created for this collection, use it
203 # otherwise, use the mg or mgpp builder
204 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
205 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
206 $buildertype = "${collection}builder";
207 } else {
208 $builderdir = "$ENV{'GSDLHOME'}/perllib";
209 if ($buildtype eq "mgpp") {
210 $buildertype = "mgppbuilder";
211 }
212 else {
213 $buildertype = "mgbuilder";
214 }
215 }
216
217 require "$builderdir/$buildertype.pm";
218
219 eval("\$builder = new $buildertype(\$collection, " .
220 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
221 "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)");
222 die "$@" if $@;
223
224 $builder->init();
225
226 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
227 $builder->set_strip_html(0);
228 }
229 if ($mode =~ /^all$/i) {
230 $builder->compress_text($textindex);
231 $builder->build_indexes($indexname);
232 $builder->make_infodatabase();
233 $builder->collect_specific();
234 } elsif ($mode =~ /^compress_text$/i) {
235 $builder->compress_text($textindex);
236 } elsif ($mode =~ /^build_index$/i) {
237 $builder->build_indexes($indexname);
238 } elsif ($mode =~ /^infodb$/i) {
239 $builder->make_infodatabase();
240 } else {
241 die "unknown mode: $mode\n";
242 }
243
244 $builder->make_auxiliary_files() if !$debug;
245 $builder->deinit();
246
247 if (($realbuilddir ne $builddir) && !$debug) {
248 print $out "Copying back the cached build\n" if ($verbosity >= 1);
249 &util::rm_r ($builddir);
250 &util::cp_r ($realbuilddir, $builddir);
251 }
252
253 close OUT if $close_out;
254}
255
256sub create_images {
257 my ($collection_name) = @_;
258
259 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
260 if (!-e $image_script) {
261 print $out "WARNING: Image making script ($image_script) could not be found\n";
262 print $out " Default images will not be generated\n\n";
263 return;
264 }
265
266 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
267
268 &util::mk_all_dir ($imagedir);
269
270 # create the images
271 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
272 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
273
274 # update the collect.cfg configuration file (this will need
275 # to be changed when the config file format changes)
276 if (!open (CFGFILE, $configfilename)) {
277 print $out "WARNING: Couldn't open config file ($configfilename)\n";
278 print $out " for updating so collection images may not be linked correctly\n";
279 return;
280 }
281
282 my $line = ""; my $file = "";
283 my $found = 0; my $foundsm = 0;
284 while (defined ($line = <CFGFILE>)) {
285 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
286 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
287 $found = 1;
288 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
289 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
290 $foundsm = 1;
291 }
292 $file .= $line;
293 }
294 close CFGFILE;
295
296 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
297 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
298
299 if (!open (CFGFILE, ">$configfilename")) {
300 print $out "WARNING: Couldn't open config file ($configfilename)\n";
301 print $out " for updating so collection images may not be linked correctly\n";
302 return;
303 }
304 print CFGFILE $file;
305 close CFGFILE;
306}
307
Note: See TracBrowser for help on using the repository browser.