source: trunk/gsdl/bin/script/build@ 1507

Last change on this file since 1507 was 1507, checked in by sjboddie, 24 years ago

More minor changes to the collector

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.4 KB
Line 
1#!/usr/bin/perl
2
3# This perl script may be called directly or by running build.bat on
4# windows (build.bat is in bin\windows)
5
6package build;
7
8use FileHandle;
9use File::Copy;
10
11BEGIN {
12
13 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
14 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
15 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
16 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
18
19 STDOUT->autoflush(1);
20 STDERR->autoflush(1);
21}
22
23use parsargv;
24use util;
25use cfgread;
26
27&parse_args (\@ARGV);
28
29my ($collection) = @ARGV;
30
31if (!defined $collection || $collection !~ /\w/) {
32 print STDERR "You must specify a collection to build\n";
33 &print_usage();
34 die "\n";
35}
36
37if ($optionfile =~ /\w/) {
38 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
39 my $line = [];
40 my $options = [];
41 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
42 push (@$options, @$line);
43 }
44 close OPTIONS;
45 &parse_args ($options);
46}
47
48if ($maxdocs == -1) {
49 $maxdocs = "";
50} else {
51 $maxdocs = "-maxdocs $maxdocs";
52}
53
54my $cdir = $collectdir;
55$cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
56my $importdir = &util::filename_cat ($cdir, $collection, "import");
57my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
58my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
59my $indexdir = &util::filename_cat ($cdir, $collection, "index");
60my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
61
62my $use_out = 0;
63my $outfile = $out;
64if ($out !~ /^(STDERR|STDOUT)$/i) {
65 open (OUT, ">$out") || die "Couldn't open output file $out\n";
66 $out = "OUT";
67
68 # delete any existing .final file
69 &util::rm ("$outfile.final") if -e "$outfile.final";
70
71 $use_out = 1;
72}
73$out->autoflush(1);
74
75# delete any .kill file left laying around from a previously aborted build
76if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
77 &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
78}
79
80&main();
81
82close OUT if $use_out;
83
84sub print_usage {
85 print STDERR "\n usage: $0 [options] collection-name\n\n";
86 print STDERR " options:\n";
87 print STDERR " -optionfile file Get options from file, useful on systems where\n";
88 print STDERR " long command lines may cause problems\n";
89 print STDERR " -append Add new files to existing collection\n";
90 print STDERR " -remove_archives Remove archives directory after successfully\n";
91 print STDERR " building the collection.\n";
92 print STDERR " -remove_import Remove import directory after successfully\n";
93 print STDERR " importing the collection.\n";
94 print STDERR " -buildtype build|import If 'build' attempt to build directly\n";
95 print STDERR " from archives directory (bypassing import\n";
96 print STDERR " stage). Defaults to 'import'\n";
97 print STDERR " -maxdocs number Maximum number of documents to build\n";
98 print STDERR " -download directory Directory (or file) to get import documents from.\n";
99 print STDERR " There may be multiple download directories and they\n";
100 print STDERR " may be of type http://, ftp://, or file://\n";
101 print STDERR " Note that any existing import directory will be\n";
102 print STDERR " deleted to make way for the downloaded data if\n";
103 print STDERR " a -download option is supplied\n";
104 print STDERR " -collectdir directory Collection directory (defaults to " .
105 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
106 print STDERR " -dontinstall Only applicable if -collectdir is set to something\n";
107 print STDERR " other than the default. -dontinstall will suppress the\n";
108 print STDERR " default behaviour which is to install the collection to\n";
109 print STDERR " the gsdl/collect directory once it has been built.\n";
110 print STDERR " -save_archives Create a copy of the existing archives directory called\n";
111 print STDERR " archives.org\n";
112 print STDERR " -out Filename or handle to print output status to.\n";
113 print STDERR " The default is STDERR\n\n";
114}
115
116sub main {
117
118 if ($save_archives && -d $archivedir) {
119 print $out "caching original archives to ${archivedir}.org\n";
120 &util::cp_r ($archivedir, "${archivedir}.org");
121 }
122
123 # do the download thing if we have any -download options
124 if (scalar (@download)) {
125 # remove any existing import data
126 if (&has_content ($importdir)) {
127 print $out "build: WARNING: removing contents of $importdir\n";
128 &util::rm_r ($importdir);
129 }
130
131 foreach $download_dir (@download) {
132
133 # remove any leading or trailing whitespace from filenames (just in case)
134 $download_dir =~ s/^\s+//;
135 $download_dir =~ s/\s+$//;
136
137 if ($download_dir =~ /^http:\/\//) {
138 # http download
139
140 } elsif ($download_dir =~ /^ftp:\/\//) {
141 # ftp download
142
143 } else {
144 # we assume anything not beginning with http:// or ftp://
145 # is a file or directory on the local file system.
146 $download_dir =~ s/^file:(\/\/)?//;
147 $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
148
149 if (-e $download_dir) {
150 # copy download_dir and all it contains to the import directory
151 my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl");
152 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
153 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
154 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
155 system ($download_cmd);
156 # if using output directory append the file download output to it
157 &append_file ($out, "$outfile.download");
158 } else {
159 print $out "WARNING: '$download_dir' does not exist\n";
160 }
161 }
162 }
163 }
164
165 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
166 if (&has_content ($importdir)) {
167 if ($buildtype eq "build") {
168 &gsdl_build();
169 } else {
170 &gsdl_import();
171 &gsdl_build();
172 }
173 } else {
174 # there are archives but no import, build directly from archives
175 print $out "build: no import material was found, building directly\n";
176 print $out " from archives\n";
177 &gsdl_build();
178 }
179 } else {
180 if (&has_content ($importdir)) {
181 if ($buildtype eq "build") {
182 print $out "build: can't build directly from archives as no\n";
183 print $out " imported archives exist (did you forget to\n";
184 print $out " move the contents of $collection/import to\n";
185 print $out " collection/archives?)\n";
186 }
187 &gsdl_import();
188 &gsdl_build();
189 } else {
190 # no import or archives
191 print $out "build: ERROR: The $collection collection has no import or archives data.\n";
192 &final_out (1) if $use_out;
193 die "\n";
194 }
195 }
196
197 if ($collectdir ne "" && !$dontinstall) {
198 my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
199 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
200
201 # install collection to gsdl/collect
202 print $out "installing the $collection collection\n";
203 my $newdir = &util::filename_cat ($install_collectdir, $collection);
204 my $olddir = &util::filename_cat ($collectdir, $collection);
205 if (-d $newdir) {
206 print $out "build: Could not install collection as $newdir\n";
207 print $out " already exists. Collection will remain at\n";
208 print $out " $olddir\n";
209 &final_out (4) if $use_out;
210 die "\n";
211 }
212 if (!&File::Copy::move ($olddir, $newdir)) {
213 print $out "build: Failed to install collection to $newdir\n";
214 print $out " Collection will remain at $olddir\n";
215 &final_out (5) if $use_out;
216 die "\n";
217 }
218 }
219 }
220
221 &final_out (0) if $use_out;
222}
223
224sub gsdl_import {
225
226 print $out "importing the $collection collection\n\n";
227
228 my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl");
229 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
230 $import_cmd .= " -removeold" unless $append;
231 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
232 $import_cmd .= " $maxdocs $collection";
233 system ($import_cmd);
234 # if using output directory append the import output to it
235 &append_file ($out, "$outfile.import");
236
237 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
238 print $out "$collection collection imported successfully\n\n";
239 if ($remove_import) {
240 print $out "removing import directory ($importdir)\n";
241 &util::rm_r ($importdir);
242 }
243 } else {
244 &final_out (2) if $use_out;
245 print $out "\nimport.pl failed\n";
246 die "\n";
247 }
248}
249
250sub gsdl_build {
251
252 print $out "building the $collection collection\n\n";
253
254 my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl");
255 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
256 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
257 $build_cmd .= " $maxdocs $collection";
258 system ($build_cmd);
259 # if using output directory append the buildcol output to it
260 &append_file ($out, "$outfile.build");
261
262 if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
263 -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
264 print $out "$collection collection built successfully\n\n";
265 if ($remove_archives) {
266 print $out "removing archives directory ($archivedir)\n";
267 &util::rm_r ($archivedir);
268 }
269 } else {
270 &final_out (3) if $use_out;
271 print $out "\nbuildcol.pl failed\n";
272 die "\n";
273 }
274
275 # replace old indexes with new ones
276 if (&has_content ($indexdir)) {
277 print $out "removing old indexes\n";
278 &util::rm_r ($indexdir);
279 }
280 rmdir ($indexdir) if -d $indexdir;
281 &File::Copy::move ($buildingdir, $indexdir);
282
283 # remove the cached arhives
284 if ($save_archives && -d "${archivedir}.org") {
285 &util::rm_r ("${archivedir}.org");
286 }
287}
288
289sub has_content {
290 my ($dir) = @_;
291
292 if (!-d $dir) {return 0;}
293
294 opendir (DIR, $dir) || return 0;
295 my @files = readdir DIR;
296 close DIR;
297
298 foreach my $file (@files) {
299 if ($file !~ /^\.{1,2}$/) {
300 return 1;
301 }
302 }
303 return 0;
304}
305
306sub append_file {
307 my ($handle, $file) = @_;
308
309 open (FILE, $file) || return;
310 undef $/;
311 print $handle <FILE>;
312 $/ = "\n";
313 close FILE;
314 &util::rm ($file);
315}
316
317# creates a file called $outfile.final (should only be called if -out option
318# is used and isn't STDERR or STDOUT) and writes an output code to it.
319# An output code of 0 specifies that there was no error
320sub final_out {
321 my ($exit_code) = @_;
322
323 if (open (FINAL, ">$outfile.final")) {
324 print FINAL $exit_code;
325 close FINAL;
326 }
327}
328
329sub parse_args {
330 my ($argref) = @_;
331
332 if (!parsargv::parse($argref,
333 'optionfile/.*/', \$optionfile,
334 'append', \$append,
335 'remove_archives', \$remove_archives,
336 'remove_import', \$remove_import,
337 'buildtype/^(build|import)$/import', \$buildtype,
338 'maxdocs/^\-?\d+/-1', \$maxdocs,
339 'download/.+', \@download,
340 'collectdir/.*/', \$collectdir,
341 'dontinstall', \$dontinstall,
342 'save_archives', \$save_archives,
343 'out/.*/STDERR', \$out)) {
344
345 &print_usage();
346 die "\n";
347 }
348}
Note: See TracBrowser for help on using the repository browser.