source: trunk/gsdl/bin/script/build@ 1507

Last change on this file since 1507 was 1507, checked in by sjboddie, 24 years ago

More minor changes to the collector

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.4 KB
RevLine 
[1184]1#!/usr/bin/perl
2
[1198]3# This perl script may be called directly or by running build.bat on
4# windows (build.bat is in bin\windows)
[1184]5
[1454]6package build;
7
[1438]8use FileHandle;
[1454]9use File::Copy;
[1438]10
[1184]11BEGIN {
[1198]12
13 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
14 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
15 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
16 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
[1184]17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
[1438]18
19 STDOUT->autoflush(1);
20 STDERR->autoflush(1);
[1184]21}
22
23use parsargv;
24use util;
[1454]25use cfgread;
[1184]26
[1454]27&parse_args (\@ARGV);
[1198]28
29my ($collection) = @ARGV;
30
31if (!defined $collection || $collection !~ /\w/) {
32 print STDERR "You must specify a collection to build\n";
33 &print_usage();
34 die "\n";
35}
36
[1454]37if ($optionfile =~ /\w/) {
38 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
39 my $line = [];
40 my $options = [];
41 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
42 push (@$options, @$line);
43 }
44 close OPTIONS;
45 &parse_args ($options);
46}
47
[1198]48if ($maxdocs == -1) {
49 $maxdocs = "";
50} else {
51 $maxdocs = "-maxdocs $maxdocs";
52}
53
[1454]54my $cdir = $collectdir;
55$cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
56my $importdir = &util::filename_cat ($cdir, $collection, "import");
57my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
58my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
59my $indexdir = &util::filename_cat ($cdir, $collection, "index");
[1198]60my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
61
[1431]62my $use_out = 0;
[1424]63my $outfile = $out;
64if ($out !~ /^(STDERR|STDOUT)$/i) {
65 open (OUT, ">$out") || die "Couldn't open output file $out\n";
66 $out = "OUT";
[1452]67
68 # delete any existing .final file
69 &util::rm ("$outfile.final") if -e "$outfile.final";
70
[1431]71 $use_out = 1;
[1424]72}
73$out->autoflush(1);
74
[1454]75# delete any .kill file left laying around from a previously aborted build
76if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
77 &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
78}
79
[1184]80&main();
81
[1431]82close OUT if $use_out;
[1424]83
[1184]84sub print_usage {
85 print STDERR "\n usage: $0 [options] collection-name\n\n";
86 print STDERR " options:\n";
[1454]87 print STDERR " -optionfile file Get options from file, useful on systems where\n";
88 print STDERR " long command lines may cause problems\n";
[1431]89 print STDERR " -append Add new files to existing collection\n";
[1424]90 print STDERR " -remove_archives Remove archives directory after successfully\n";
91 print STDERR " building the collection.\n";
92 print STDERR " -remove_import Remove import directory after successfully\n";
93 print STDERR " importing the collection.\n";
[1184]94 print STDERR " -buildtype build|import If 'build' attempt to build directly\n";
95 print STDERR " from archives directory (bypassing import\n";
96 print STDERR " stage). Defaults to 'import'\n";
[1424]97 print STDERR " -maxdocs number Maximum number of documents to build\n";
98 print STDERR " -download directory Directory (or file) to get import documents from.\n";
99 print STDERR " There may be multiple download directories and they\n";
[1461]100 print STDERR " may be of type http://, ftp://, or file://\n";
[1424]101 print STDERR " Note that any existing import directory will be\n";
102 print STDERR " deleted to make way for the downloaded data if\n";
103 print STDERR " a -download option is supplied\n";
[1454]104 print STDERR " -collectdir directory Collection directory (defaults to " .
105 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
106 print STDERR " -dontinstall Only applicable if -collectdir is set to something\n";
107 print STDERR " other than the default. -dontinstall will suppress the\n";
108 print STDERR " default behaviour which is to install the collection to\n";
109 print STDERR " the gsdl/collect directory once it has been built.\n";
110 print STDERR " -save_archives Create a copy of the existing archives directory called\n";
111 print STDERR " archives.org\n";
[1424]112 print STDERR " -out Filename or handle to print output status to.\n";
113 print STDERR " The default is STDERR\n\n";
[1184]114}
115
116sub main {
[1454]117
118 if ($save_archives && -d $archivedir) {
119 print $out "caching original archives to ${archivedir}.org\n";
120 &util::cp_r ($archivedir, "${archivedir}.org");
121 }
122
[1424]123 # do the download thing if we have any -download options
124 if (scalar (@download)) {
125 # remove any existing import data
[1431]126 if (&has_content ($importdir)) {
127 print $out "build: WARNING: removing contents of $importdir\n";
128 &util::rm_r ($importdir);
129 }
[1454]130
[1424]131 foreach $download_dir (@download) {
[1507]132
133 # remove any leading or trailing whitespace from filenames (just in case)
134 $download_dir =~ s/^\s+//;
135 $download_dir =~ s/\s+$//;
[1424]136
137 if ($download_dir =~ /^http:\/\//) {
138 # http download
139
140 } elsif ($download_dir =~ /^ftp:\/\//) {
141 # ftp download
142
143 } else {
144 # we assume anything not beginning with http:// or ftp://
145 # is a file or directory on the local file system.
[1485]146 $download_dir =~ s/^file:(\/\/)?//;
[1507]147 $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
[1424]148
149 if (-e $download_dir) {
150 # copy download_dir and all it contains to the import directory
151 my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl");
[1454]152 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[1431]153 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
154 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
[1424]155 system ($download_cmd);
[1431]156 # if using output directory append the file download output to it
157 &append_file ($out, "$outfile.download");
[1424]158 } else {
[1507]159 print $out "WARNING: '$download_dir' does not exist\n";
[1424]160 }
161 }
162 }
163 }
164
[1198]165 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
166 if (&has_content ($importdir)) {
[1184]167 if ($buildtype eq "build") {
168 &gsdl_build();
169 } else {
170 &gsdl_import();
171 &gsdl_build();
172 }
173 } else {
174 # there are archives but no import, build directly from archives
[1424]175 print $out "build: no import material was found, building directly\n";
176 print $out " from archives\n";
[1184]177 &gsdl_build();
178 }
179 } else {
[1198]180 if (&has_content ($importdir)) {
[1184]181 if ($buildtype eq "build") {
[1424]182 print $out "build: can't build directly from archives as no\n";
183 print $out " imported archives exist (did you forget to\n";
184 print $out " move the contents of $collection/import to\n";
185 print $out " collection/archives?)\n";
[1184]186 }
187 &gsdl_import();
188 &gsdl_build();
189 } else {
190 # no import or archives
[1507]191 print $out "build: ERROR: The $collection collection has no import or archives data.\n";
[1438]192 &final_out (1) if $use_out;
[1184]193 die "\n";
194 }
195 }
[1454]196
197 if ($collectdir ne "" && !$dontinstall) {
198 my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
199 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
200
201 # install collection to gsdl/collect
202 print $out "installing the $collection collection\n";
203 my $newdir = &util::filename_cat ($install_collectdir, $collection);
204 my $olddir = &util::filename_cat ($collectdir, $collection);
205 if (-d $newdir) {
206 print $out "build: Could not install collection as $newdir\n";
207 print $out " already exists. Collection will remain at\n";
208 print $out " $olddir\n";
209 &final_out (4) if $use_out;
210 die "\n";
211 }
212 if (!&File::Copy::move ($olddir, $newdir)) {
213 print $out "build: Failed to install collection to $newdir\n";
214 print $out " Collection will remain at $olddir\n";
215 &final_out (5) if $use_out;
216 die "\n";
217 }
218 }
219 }
220
[1438]221 &final_out (0) if $use_out;
[1184]222}
223
224sub gsdl_import {
225
[1424]226 print $out "importing the $collection collection\n\n";
[1198]227
[1431]228 my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl");
229 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
230 $import_cmd .= " -removeold" unless $append;
[1454]231 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[1431]232 $import_cmd .= " $maxdocs $collection";
233 system ($import_cmd);
234 # if using output directory append the import output to it
235 &append_file ($out, "$outfile.import");
236
[1198]237 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
[1424]238 print $out "$collection collection imported successfully\n\n";
239 if ($remove_import) {
240 print $out "removing import directory ($importdir)\n";
241 &util::rm_r ($importdir);
242 }
[1184]243 } else {
[1438]244 &final_out (2) if $use_out;
[1454]245 print $out "\nimport.pl failed\n";
246 die "\n";
[1184]247 }
248}
249
250sub gsdl_build {
251
[1424]252 print $out "building the $collection collection\n\n";
[1184]253
[1431]254 my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl");
255 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
[1454]256 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
[1431]257 $build_cmd .= " $maxdocs $collection";
258 system ($build_cmd);
259 # if using output directory append the buildcol output to it
260 &append_file ($out, "$outfile.build");
261
[1198]262 if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
263 -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
[1424]264 print $out "$collection collection built successfully\n\n";
265 if ($remove_archives) {
266 print $out "removing archives directory ($archivedir)\n";
267 &util::rm_r ($archivedir);
268 }
[1184]269 } else {
[1454]270 &final_out (3) if $use_out;
271 print $out "\nbuildcol.pl failed\n";
272 die "\n";
[1184]273 }
274
275 # replace old indexes with new ones
276 if (&has_content ($indexdir)) {
[1424]277 print $out "removing old indexes\n";
[1184]278 &util::rm_r ($indexdir);
279 }
[1277]280 rmdir ($indexdir) if -d $indexdir;
[1454]281 &File::Copy::move ($buildingdir, $indexdir);
282
283 # remove the cached arhives
[1461]284 if ($save_archives && -d "${archivedir}.org") {
[1454]285 &util::rm_r ("${archivedir}.org");
286 }
[1184]287}
288
289sub has_content {
290 my ($dir) = @_;
291
292 if (!-d $dir) {return 0;}
293
294 opendir (DIR, $dir) || return 0;
295 my @files = readdir DIR;
296 close DIR;
297
298 foreach my $file (@files) {
299 if ($file !~ /^\.{1,2}$/) {
300 return 1;
301 }
302 }
303 return 0;
304}
[1431]305
306sub append_file {
307 my ($handle, $file) = @_;
308
309 open (FILE, $file) || return;
310 undef $/;
311 print $handle <FILE>;
312 $/ = "\n";
313 close FILE;
314 &util::rm ($file);
315}
[1438]316
317# creates a file called $outfile.final (should only be called if -out option
318# is used and isn't STDERR or STDOUT) and writes an output code to it.
319# An output code of 0 specifies that there was no error
320sub final_out {
321 my ($exit_code) = @_;
322
323 if (open (FINAL, ">$outfile.final")) {
324 print FINAL $exit_code;
325 close FINAL;
326 }
327}
[1454]328
329sub parse_args {
330 my ($argref) = @_;
331
332 if (!parsargv::parse($argref,
333 'optionfile/.*/', \$optionfile,
334 'append', \$append,
335 'remove_archives', \$remove_archives,
336 'remove_import', \$remove_import,
337 'buildtype/^(build|import)$/import', \$buildtype,
338 'maxdocs/^\-?\d+/-1', \$maxdocs,
339 'download/.+', \@download,
340 'collectdir/.*/', \$collectdir,
341 'dontinstall', \$dontinstall,
342 'save_archives', \$save_archives,
343 'out/.*/STDERR', \$out)) {
344
345 &print_usage();
346 die "\n";
347 }
348}
Note: See TracBrowser for help on using the repository browser.