source: trunk/gsdl/bin/script/build@ 1678

Last change on this file since 1678 was 1678, checked in by sjboddie, 23 years ago

Re-added some recent changes that got lost when the cvs repository was
moved. This was mostly changes to the collector and building code

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.8 KB
Line 
1#!/usr/bin/perl
2
3# This perl script may be called directly or by running build.bat on
4# windows (build.bat is in bin\windows)
5
6package build;
7
8use FileHandle;
9use File::Copy;
10
11BEGIN {
12
13 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
14 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
15 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
16 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
18
19 STDOUT->autoflush(1);
20 STDERR->autoflush(1);
21}
22
23use parsargv;
24use util;
25use cfgread;
26
27# set up path - this allows for paths not to be supplied to system calls
28# and overcomes problems when GSDLHOME contains spaces (double quoting
29# the call doesn't work on win2k and probably other variants of winnt)
30my $path_separator = ":";
31$path_separator = ";" if $ENV{'GSDLOS'} =~ /^windows$/;
32$ENV{'PATH'} = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}) .
33 $path_separator . &util::filename_cat($ENV{'GSDLHOME'}, "bin", "script") .
34 $path_separator . $ENV{'PATH'};
35
36&parse_args (\@ARGV);
37
38my ($collection) = @ARGV;
39
40if (!defined $collection || $collection !~ /\w/) {
41 print STDERR "You must specify a collection to build\n";
42 &print_usage();
43 die "\n";
44}
45
46if ($optionfile =~ /\w/) {
47 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
48 my $line = [];
49 my $options = [];
50 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
51 push (@$options, @$line);
52 }
53 close OPTIONS;
54 &parse_args ($options);
55}
56
57if ($maxdocs == -1) {
58 $maxdocs = "";
59} else {
60 $maxdocs = "-maxdocs $maxdocs";
61}
62
63my $cdir = $collectdir;
64$cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
65my $importdir = &util::filename_cat ($cdir, $collection, "import");
66my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
67my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
68my $indexdir = &util::filename_cat ($cdir, $collection, "index");
69my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
70
71my $use_out = 0;
72my $outfile = $out;
73if ($out !~ /^(STDERR|STDOUT)$/i) {
74 open (OUT, ">$out") || die "Couldn't open output file $out\n";
75 $out = "OUT";
76
77 # delete any existing .final file
78 &util::rm ("$outfile.final") if -e "$outfile.final";
79
80 $use_out = 1;
81}
82$out->autoflush(1);
83
84# delete any .kill file left laying around from a previously aborted build
85if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
86 &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
87}
88
89&main();
90
91close OUT if $use_out;
92
93sub print_usage {
94 print STDERR "\n usage: $0 [options] collection-name\n\n";
95 print STDERR " options:\n";
96 print STDERR " -optionfile file Get options from file, useful on systems where\n";
97 print STDERR " long command lines may cause problems\n";
98 print STDERR " -append Add new files to existing collection\n";
99 print STDERR " -remove_archives Remove archives directory after successfully\n";
100 print STDERR " building the collection.\n";
101 print STDERR " -remove_import Remove import directory after successfully\n";
102 print STDERR " importing the collection.\n";
103 print STDERR " -buildtype build|import If 'build' attempt to build directly\n";
104 print STDERR " from archives directory (bypassing import\n";
105 print STDERR " stage). Defaults to 'import'\n";
106 print STDERR " -maxdocs number Maximum number of documents to build\n";
107 print STDERR " -download directory Directory (or file) to get import documents from.\n";
108 print STDERR " There may be multiple download directories and they\n";
109 print STDERR " may be of type http://, ftp://, or file://\n";
110 print STDERR " Note that any existing import directory will be\n";
111 print STDERR " deleted to make way for the downloaded data if\n";
112 print STDERR " a -download option is supplied\n";
113 print STDERR " -collectdir directory Collection directory (defaults to " .
114 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
115 print STDERR " -dontinstall Only applicable if -collectdir is set to something\n";
116 print STDERR " other than the default. -dontinstall will suppress the\n";
117 print STDERR " default behaviour which is to install the collection to\n";
118 print STDERR " the gsdl/collect directory once it has been built.\n";
119 print STDERR " -save_archives Create a copy of the existing archives directory called\n";
120 print STDERR " archives.org\n";
121 print STDERR " -out Filename or handle to print output status to.\n";
122 print STDERR " The default is STDERR\n\n";
123}
124
125sub main {
126
127 if ($save_archives && -d $archivedir) {
128 print $out "caching original archives to ${archivedir}.org\n";
129 &util::cp_r ($archivedir, "${archivedir}.org");
130 }
131
132 # do the download thing if we have any -download options
133 if (scalar (@download)) {
134 # remove any existing import data
135 if (&has_content ($importdir)) {
136 print $out "build: WARNING: removing contents of $importdir\n";
137 &util::rm_r ($importdir);
138 }
139
140 foreach $download_dir (@download) {
141
142 # remove any leading or trailing whitespace from filenames (just in case)
143 $download_dir =~ s/^\s+//;
144 $download_dir =~ s/\s+$//;
145
146 if ($download_dir =~ /^http:\/\//) {
147 # http download
148
149 } elsif ($download_dir =~ /^ftp:\/\//) {
150 # ftp download
151
152 } else {
153 # we assume anything not beginning with http:// or ftp://
154 # is a file or directory on the local file system.
155 $download_dir =~ s/^file:(\/\/)?//;
156 $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
157
158 if (-e $download_dir) {
159 # copy download_dir and all it contains to the import directory
160 my $download_cmd = "perl -S filecopy.pl";
161 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
162 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
163 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
164 system ($download_cmd);
165 # if using output directory append the file download output to it
166 &append_file ($out, "$outfile.download");
167 } else {
168 print $out "WARNING: '$download_dir' does not exist\n";
169 }
170 }
171 }
172 }
173
174 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
175 if (&has_content ($importdir)) {
176 if ($buildtype eq "build") {
177 &gsdl_build();
178 } else {
179 &gsdl_import();
180 &gsdl_build();
181 }
182 } else {
183 # there are archives but no import, build directly from archives
184 print $out "build: no import material was found, building directly\n";
185 print $out " from archives\n";
186 &gsdl_build();
187 }
188 } else {
189 if (&has_content ($importdir)) {
190 if ($buildtype eq "build") {
191 print $out "build: can't build directly from archives as no\n";
192 print $out " imported archives exist (did you forget to\n";
193 print $out " move the contents of $collection/import to\n";
194 print $out " collection/archives?)\n";
195 }
196 &gsdl_import();
197 &gsdl_build();
198 } else {
199 # no import or archives
200 print $out "build: ERROR: The $collection collection has no import or archives data.\n";
201 &final_out (1) if $use_out;
202 die "\n";
203 }
204 }
205
206 if ($collectdir ne "" && !$dontinstall) {
207 my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
208 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
209
210 # install collection to gsdl/collect
211 print $out "installing the $collection collection\n";
212 my $newdir = &util::filename_cat ($install_collectdir, $collection);
213 my $olddir = &util::filename_cat ($collectdir, $collection);
214 if (-d $newdir) {
215 print $out "build: Could not install collection as $newdir\n";
216 print $out " already exists. Collection will remain at\n";
217 print $out " $olddir\n";
218 &final_out (4) if $use_out;
219 die "\n";
220 }
221 if (!&File::Copy::move ($olddir, $newdir)) {
222 print $out "build: Failed to install collection to $newdir\n";
223 print $out " Collection will remain at $olddir\n";
224 &final_out (5) if $use_out;
225 die "\n";
226 }
227 }
228 }
229
230 &final_out (0) if $use_out;
231}
232
233sub gsdl_import {
234
235 print $out "importing the $collection collection\n\n";
236
237 my $import_cmd = "perl -S import.pl";
238 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
239 $import_cmd .= " -removeold" unless $append;
240 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
241 $import_cmd .= " $maxdocs $collection";
242 system ($import_cmd);
243 # if using output directory append the import output to it
244 &append_file ($out, "$outfile.import");
245
246 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
247 print $out "$collection collection imported successfully\n\n";
248 if ($remove_import) {
249 print $out "removing import directory ($importdir)\n";
250 &util::rm_r ($importdir);
251 }
252 } else {
253 &final_out (2) if $use_out;
254 print $out "\nimport.pl failed\n";
255 die "\n";
256 }
257}
258
259sub gsdl_build {
260
261 print $out "building the $collection collection\n\n";
262
263 my $build_cmd = "perl -S buildcol.pl";
264 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
265 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
266 $build_cmd .= " $maxdocs $collection";
267 system ($build_cmd);
268 # if using output directory append the buildcol output to it
269 &append_file ($out, "$outfile.build");
270
271 if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
272 -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
273 print $out "$collection collection built successfully\n\n";
274 if ($remove_archives) {
275 print $out "removing archives directory ($archivedir)\n";
276 &util::rm_r ($archivedir);
277 }
278 } else {
279 &final_out (3) if $use_out;
280 print $out "\nbuildcol.pl failed\n";
281 die "\n";
282 }
283
284 # replace old indexes with new ones
285 if (&has_content ($indexdir)) {
286 print $out "removing old indexes\n";
287 &util::rm_r ($indexdir);
288 }
289 rmdir ($indexdir) if -d $indexdir;
290 &File::Copy::move ($buildingdir, $indexdir);
291
292 # remove the cached arhives
293 if ($save_archives && -d "${archivedir}.org") {
294 &util::rm_r ("${archivedir}.org");
295 }
296}
297
298sub has_content {
299 my ($dir) = @_;
300
301 if (!-d $dir) {return 0;}
302
303 opendir (DIR, $dir) || return 0;
304 my @files = readdir DIR;
305 close DIR;
306
307 foreach my $file (@files) {
308 if ($file !~ /^\.{1,2}$/) {
309 return 1;
310 }
311 }
312 return 0;
313}
314
315sub append_file {
316 my ($handle, $file) = @_;
317
318 open (FILE, $file) || return;
319 undef $/;
320 print $handle <FILE>;
321 $/ = "\n";
322 close FILE;
323 &util::rm ($file);
324}
325
326# creates a file called $outfile.final (should only be called if -out option
327# is used and isn't STDERR or STDOUT) and writes an output code to it.
328# An output code of 0 specifies that there was no error
329sub final_out {
330 my ($exit_code) = @_;
331
332 if (open (FINAL, ">$outfile.final")) {
333 print FINAL $exit_code;
334 close FINAL;
335 }
336}
337
338sub parse_args {
339 my ($argref) = @_;
340
341 if (!parsargv::parse($argref,
342 'optionfile/.*/', \$optionfile,
343 'append', \$append,
344 'remove_archives', \$remove_archives,
345 'remove_import', \$remove_import,
346 'buildtype/^(build|import)$/import', \$buildtype,
347 'maxdocs/^\-?\d+/-1', \$maxdocs,
348 'download/.+', \@download,
349 'collectdir/.*/', \$collectdir,
350 'dontinstall', \$dontinstall,
351 'save_archives', \$save_archives,
352 'out/.*/STDERR', \$out)) {
353
354 &print_usage();
355 die "\n";
356 }
357}
Note: See TracBrowser for help on using the repository browser.