source: trunk/gsdl/bin/script/buildcol.pl@ 1853

Last change on this file since 1853 was 1853, checked in by kjm18, 23 years ago

adapted to work with both mg and mgpp building. THe default is to use mg.
You can select mgpp in the collect.cfg ("buildtype mgpp") or pass it as
an option ("-buildtype mgpp").

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 10.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# buildcol.pl -- This program will build a particular collection
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package buildcol;
29
30BEGIN {
31 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
32 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
36}
37
38use colcfg;
39use parsargv;
40use util;
41use FileHandle;
42
43&main();
44
45sub print_usage {
46 print STDERR "\n usage: $0 [options] collection-name\n\n";
47 print STDERR " options:\n";
48 print STDERR " -verbosity number 0=none, 3=lots\n";
49 print STDERR " -archivedir directory Where the archives live\n";
50 print STDERR " -builddir directory Where to put the built indexes\n";
51 print STDERR " -maxdocs number Maximum number of documents to build\n";
52 print STDERR " -debug Print output to STDOUT\n";
53 print STDERR " -mode all|compress_text|build_index|infodb\n";
54 print STDERR " -index indexname Index to build (will build all in\n";
55 print STDERR " config file if not set)\n";
56 print STDERR " -keepold will not destroy the current contents of the\n";
57 print STDERR " building directory\n";
58 print STDERR " -allclassifications Don't remove empty classifications\n";
59 print STDERR " -create_images Attempt to create default images for new\n";
60 print STDERR " collection. This relies on the Gimp being\n";
61 print STDERR " installed along with relevant perl modules\n";
62 print STDERR " to allow scripting from perl\n";
63 print STDERR " -collectdir directory Collection directory (defaults to " .
64 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
65 print STDERR " -out Filename or handle to print output status to.\n";
66 print STDERR " The default is STDERR\n";
67 print STDERR " -buildtype mg|mgpp THis will override the config file setting. (default is mg)\n";
68 print STDERR " -no_strip_html Do not strip the html tags from the indexed text (only used for mgpp collections).\n\n";
69}
70
71
72sub main
73{
74 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
75 $debug, $mode, $indexname, $keepold, $allclassifications,
76 $create_images, $collectdir, $out, $buildtype, $textindex,
77 $no_strip_html);
78 if (!parsargv::parse(\@ARGV,
79 'verbosity/\d+/2', \$verbosity,
80 'archivedir/.*/', \$archivedir,
81 'cachedir/.*/', \$cachedir,
82 'builddir/.*/', \$builddir,
83 'maxdocs/^\-?\d+/-1', \$maxdocs,
84 'debug', \$debug,
85 'mode/^(all|compress_text|build_index|infodb)$/all', \$mode,
86 'index/.*/', \$indexname,
87 'keepold', \$keepold,
88 'allclassifications', \$allclassifications,
89 'create_images', \$create_images,
90 'collectdir/.*/', \$collectdir,
91 'out/.*/STDERR', \$out,
92 'no_strip_html', \$no_strip_html,
93 'buildtype/^(mg|mgpp)$/', \$buildtype)) {
94 &print_usage();
95 die "\n";
96 }
97
98 $textindex = "";
99 my $close_out = 0;
100 if ($out !~ /^(STDERR|STDOUT)$/i) {
101 open (OUT, ">$out") || die "Couldn't open output file $out\n";
102 $out = "buildcol::OUT";
103 $close_out = 1;
104 }
105 $out->autoflush(1);
106
107 # get and check the collection
108 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
109 &print_usage();
110 die "\n";
111 }
112
113 # read the configuration file
114
115 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
116 if (-e $configfilename) {
117 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
118
119 if (defined $collectcfg->{'buildtype'} && $buildtype eq "") {
120 $buildtype = $collectcfg->{'buildtype'};
121 }
122 if ($buildtype eq "") {
123 $buildtype = "mg"; # mg is the default
124 }
125 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
126 $archivedir = $collectcfg->{'archivedir'};
127 }
128 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
129 $cachedir = $collectcfg->{'cachedir'};
130 }
131 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
132 $builddir = $collectcfg->{'builddir'};
133 }
134 if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
135 $textindex = $collectcfg->{'textcompress'};
136 }
137
138 } else {
139 die "Couldn't find the configuration file $configfilename\n";
140 }
141
142 #mgpp doesn't work yet on windows
143 if ($buildtype eq "mgpp" && $ENV{'GSDLOS'} =~ /^windows$/) {
144 die "mgpp doesn't work on windows\n";
145 }
146
147 #set the text index
148 if ($buildtype eq "mgpp") {
149 if ($textindex eq "") {
150 $textindex = "text";
151 }
152 }
153 else {
154 $textindex = "section:text";
155 }
156
157 # create default images if required
158 if ($create_images) {
159 my $collection_name = $collection;
160 $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}
161 if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
162
163 &create_images ($collection_name);
164 }
165
166 # fill in the default archives and building directories if none
167 # were supplied, turn all \ into / and remove trailing /
168 $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
169 $archivedir =~ s/[\\\/]+/\//g;
170 $archivedir =~ s/\/$//;
171 $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
172 $builddir =~ s/[\\\/]+/\//g;
173 $builddir =~ s/\/$//;
174
175 # update the archive cache if needed
176 if ($cachedir) {
177 print $out "Updating archive cache\n" if ($verbosity >= 1);
178
179 $cachedir =~ s/[\\\/]+$//;
180 $cachedir .= "/collect/$collection" unless
181 $cachedir =~ /collect\/$collection/;
182
183 $realarchivedir = "$cachedir/archives";
184 $realbuilddir = "$cachedir/building";
185 &util::mk_all_dir ($realarchivedir);
186 &util::mk_all_dir ($realbuilddir);
187 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
188
189 } else {
190 $realarchivedir = $archivedir;
191 $realbuilddir = $builddir;
192 }
193
194 # build it in realbuilddir
195 &util::mk_all_dir ($realbuilddir);
196
197
198 # if a builder class has been created for this collection, use it
199 # otherwise, use the mg or mgpp builder
200 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
201 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
202 $buildertype = "${collection}builder";
203 } else {
204 $builderdir = "$ENV{'GSDLHOME'}/perllib";
205 if ($buildtype eq "mgpp") {
206 $buildertype = "mgppbuilder";
207 }
208 else {
209 $buildertype = "mgbuilder";
210 }
211 }
212
213 require "$builderdir/$buildertype.pm";
214
215 eval("\$builder = new $buildertype(\$collection, " .
216 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
217 "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)");
218 die "$@" if $@;
219
220 $builder->init();
221
222 if ($buildertype eq "mgppbuilder" && $no_strip_html) {
223 $builder->set_strip_html(0);
224 }
225 if ($mode =~ /^all$/i) {
226 $builder->compress_text($textindex);
227 $builder->build_indexes($indexname);
228 $builder->make_infodatabase();
229 $builder->collect_specific();
230 } elsif ($mode =~ /^compress_text$/i) {
231 $builder->compress_text($textindex);
232 } elsif ($mode =~ /^build_index$/i) {
233 $builder->build_indexes($indexname);
234 } elsif ($mode =~ /^infodb$/i) {
235 $builder->make_infodatabase();
236 } else {
237 die "unknown mode: $mode\n";
238 }
239
240 $builder->make_auxiliary_files() if !$debug;
241 $builder->deinit();
242
243 if (($realbuilddir ne $builddir) && !$debug) {
244 print $out "Copying back the cached build\n" if ($verbosity >= 1);
245 &util::rm_r ($builddir);
246 &util::cp_r ($realbuilddir, $builddir);
247 }
248
249 close OUT if $close_out;
250}
251
252sub create_images {
253 my ($collection_name) = @_;
254
255 my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
256 if (!-e $image_script) {
257 print $out "WARNING: Image making script ($image_script) could not be found\n";
258 print $out " Default images will not be generated\n\n";
259 return;
260 }
261
262 my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
263
264 &util::mk_all_dir ($imagedir);
265
266 # create the images
267 system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
268 system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
269
270 # update the collect.cfg configuration file (this will need
271 # to be changed when the config file format changes)
272 if (!open (CFGFILE, $configfilename)) {
273 print $out "WARNING: Couldn't open config file ($configfilename)\n";
274 print $out " for updating so collection images may not be linked correctly\n";
275 return;
276 }
277
278 my $line = ""; my $file = "";
279 my $found = 0; my $foundsm = 0;
280 while (defined ($line = <CFGFILE>)) {
281 if ($line =~ /collectionmeta\s+iconcollection\s+/) {
282 $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
283 $found = 1;
284 } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
285 $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
286 $foundsm = 1;
287 }
288 $file .= $line;
289 }
290 close CFGFILE;
291
292 $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
293 $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
294
295 if (!open (CFGFILE, ">$configfilename")) {
296 print $out "WARNING: Couldn't open config file ($configfilename)\n";
297 print $out " for updating so collection images may not be linked correctly\n";
298 return;
299 }
300 print CFGFILE $file;
301 close CFGFILE;
302}
303
Note: See TracBrowser for help on using the repository browser.