source: trunk/gsdl/bin/script/build@ 1485

Last change on this file since 1485 was 1485, checked in by sjboddie, 24 years ago

More improvements to collector

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.3 KB
Line 
1#!/usr/bin/perl
2
3# This perl script may be called directly or by running build.bat on
4# windows (build.bat is in bin\windows)
5
6package build;
7
8use FileHandle;
9use File::Copy;
10
11BEGIN {
12
13 die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
14 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
15 die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
16 "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
17 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
18
19 STDOUT->autoflush(1);
20 STDERR->autoflush(1);
21}
22
23use parsargv;
24use util;
25use cfgread;
26
27&parse_args (\@ARGV);
28
29my ($collection) = @ARGV;
30
31if (!defined $collection || $collection !~ /\w/) {
32 print STDERR "You must specify a collection to build\n";
33 &print_usage();
34 die "\n";
35}
36
37if ($optionfile =~ /\w/) {
38 open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
39 my $line = [];
40 my $options = [];
41 while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
42 push (@$options, @$line);
43 }
44 close OPTIONS;
45 &parse_args ($options);
46}
47
48if ($maxdocs == -1) {
49 $maxdocs = "";
50} else {
51 $maxdocs = "-maxdocs $maxdocs";
52}
53
54my $cdir = $collectdir;
55$cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
56my $importdir = &util::filename_cat ($cdir, $collection, "import");
57my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
58my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
59my $indexdir = &util::filename_cat ($cdir, $collection, "index");
60my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
61
62my $use_out = 0;
63my $outfile = $out;
64if ($out !~ /^(STDERR|STDOUT)$/i) {
65 open (OUT, ">$out") || die "Couldn't open output file $out\n";
66 $out = "OUT";
67
68 # delete any existing .final file
69 &util::rm ("$outfile.final") if -e "$outfile.final";
70
71 $use_out = 1;
72}
73$out->autoflush(1);
74
75# delete any .kill file left laying around from a previously aborted build
76if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
77 &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
78}
79
80&main();
81
82close OUT if $use_out;
83
84sub print_usage {
85 print STDERR "\n usage: $0 [options] collection-name\n\n";
86 print STDERR " options:\n";
87 print STDERR " -optionfile file Get options from file, useful on systems where\n";
88 print STDERR " long command lines may cause problems\n";
89 print STDERR " -append Add new files to existing collection\n";
90 print STDERR " -remove_archives Remove archives directory after successfully\n";
91 print STDERR " building the collection.\n";
92 print STDERR " -remove_import Remove import directory after successfully\n";
93 print STDERR " importing the collection.\n";
94 print STDERR " -buildtype build|import If 'build' attempt to build directly\n";
95 print STDERR " from archives directory (bypassing import\n";
96 print STDERR " stage). Defaults to 'import'\n";
97 print STDERR " -maxdocs number Maximum number of documents to build\n";
98 print STDERR " -download directory Directory (or file) to get import documents from.\n";
99 print STDERR " There may be multiple download directories and they\n";
100 print STDERR " may be of type http://, ftp://, or file://\n";
101 print STDERR " Note that any existing import directory will be\n";
102 print STDERR " deleted to make way for the downloaded data if\n";
103 print STDERR " a -download option is supplied\n";
104 print STDERR " -collectdir directory Collection directory (defaults to " .
105 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
106 print STDERR " -dontinstall Only applicable if -collectdir is set to something\n";
107 print STDERR " other than the default. -dontinstall will suppress the\n";
108 print STDERR " default behaviour which is to install the collection to\n";
109 print STDERR " the gsdl/collect directory once it has been built.\n";
110 print STDERR " -save_archives Create a copy of the existing archives directory called\n";
111 print STDERR " archives.org\n";
112 print STDERR " -out Filename or handle to print output status to.\n";
113 print STDERR " The default is STDERR\n\n";
114}
115
116sub main {
117
118 if ($save_archives && -d $archivedir) {
119 print $out "caching original archives to ${archivedir}.org\n";
120 &util::cp_r ($archivedir, "${archivedir}.org");
121 }
122
123 # do the download thing if we have any -download options
124 if (scalar (@download)) {
125 # remove any existing import data
126 if (&has_content ($importdir)) {
127 print $out "build: WARNING: removing contents of $importdir\n";
128 &util::rm_r ($importdir);
129 }
130
131 foreach $download_dir (@download) {
132
133 if ($download_dir =~ /^http:\/\//) {
134 # http download
135
136 } elsif ($download_dir =~ /^ftp:\/\//) {
137 # ftp download
138
139 } else {
140 # we assume anything not beginning with http:// or ftp://
141 # is a file or directory on the local file system.
142 $download_dir =~ s/^file:(\/\/)?//;
143
144 if (-e $download_dir) {
145 # copy download_dir and all it contains to the import directory
146 my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl");
147 $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
148 $download_cmd .= " -out \"$outfile.download\"" if $use_out;
149 $download_cmd .= " \"" . $download_dir . "\" " . $collection;
150 system ($download_cmd);
151 # if using output directory append the file download output to it
152 &append_file ($out, "$outfile.download");
153 } else {
154 print $out "WARNING: $download_dir does not exist\n";
155 }
156 }
157 }
158 }
159
160 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
161 if (&has_content ($importdir)) {
162 if ($buildtype eq "build") {
163 &gsdl_build();
164 } else {
165 &gsdl_import();
166 &gsdl_build();
167 }
168 } else {
169 # there are archives but no import, build directly from archives
170 print $out "build: no import material was found, building directly\n";
171 print $out " from archives\n";
172 &gsdl_build();
173 }
174 } else {
175 if (&has_content ($importdir)) {
176 if ($buildtype eq "build") {
177 print $out "build: can't build directly from archives as no\n";
178 print $out " imported archives exist (did you forget to\n";
179 print $out " move the contents of $collection/import to\n";
180 print $out " collection/archives?)\n";
181 }
182 &gsdl_import();
183 &gsdl_build();
184 } else {
185 # no import or archives
186 print $out "build: ERROR: The $collection collection has no import\n";
187 print $out " or archives data. Try downloading an unbuilt version\n";
188 print $out " of the collection from www.nzdl.org\n";
189 &final_out (1) if $use_out;
190 die "\n";
191 }
192 }
193
194 if ($collectdir ne "" && !$dontinstall) {
195 my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
196 if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
197
198 # install collection to gsdl/collect
199 print $out "installing the $collection collection\n";
200 my $newdir = &util::filename_cat ($install_collectdir, $collection);
201 my $olddir = &util::filename_cat ($collectdir, $collection);
202 if (-d $newdir) {
203 print $out "build: Could not install collection as $newdir\n";
204 print $out " already exists. Collection will remain at\n";
205 print $out " $olddir\n";
206 &final_out (4) if $use_out;
207 die "\n";
208 }
209 if (!&File::Copy::move ($olddir, $newdir)) {
210 print $out "build: Failed to install collection to $newdir\n";
211 print $out " Collection will remain at $olddir\n";
212 &final_out (5) if $use_out;
213 die "\n";
214 }
215 }
216 }
217
218 &final_out (0) if $use_out;
219}
220
221sub gsdl_import {
222
223 print $out "importing the $collection collection\n\n";
224
225 my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl");
226 $import_cmd .= " -out \"$outfile.import\"" if $use_out;
227 $import_cmd .= " -removeold" unless $append;
228 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
229 $import_cmd .= " $maxdocs $collection";
230 system ($import_cmd);
231 # if using output directory append the import output to it
232 &append_file ($out, "$outfile.import");
233
234 if (-e &util::filename_cat ($archivedir, "archives.inf")) {
235 print $out "$collection collection imported successfully\n\n";
236 if ($remove_import) {
237 print $out "removing import directory ($importdir)\n";
238 &util::rm_r ($importdir);
239 }
240 } else {
241 &final_out (2) if $use_out;
242 print $out "\nimport.pl failed\n";
243 die "\n";
244 }
245}
246
247sub gsdl_build {
248
249 print $out "building the $collection collection\n\n";
250
251 my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl");
252 $build_cmd .= " -out \"$outfile.build\"" if $use_out;
253 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
254 $build_cmd .= " $maxdocs $collection";
255 system ($build_cmd);
256 # if using output directory append the buildcol output to it
257 &append_file ($out, "$outfile.build");
258
259 if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
260 -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
261 print $out "$collection collection built successfully\n\n";
262 if ($remove_archives) {
263 print $out "removing archives directory ($archivedir)\n";
264 &util::rm_r ($archivedir);
265 }
266 } else {
267 &final_out (3) if $use_out;
268 print $out "\nbuildcol.pl failed\n";
269 die "\n";
270 }
271
272 # replace old indexes with new ones
273 if (&has_content ($indexdir)) {
274 print $out "removing old indexes\n";
275 &util::rm_r ($indexdir);
276 }
277 rmdir ($indexdir) if -d $indexdir;
278 &File::Copy::move ($buildingdir, $indexdir);
279
280 # remove the cached arhives
281 if ($save_archives && -d "${archivedir}.org") {
282 &util::rm_r ("${archivedir}.org");
283 }
284}
285
286sub has_content {
287 my ($dir) = @_;
288
289 if (!-d $dir) {return 0;}
290
291 opendir (DIR, $dir) || return 0;
292 my @files = readdir DIR;
293 close DIR;
294
295 foreach my $file (@files) {
296 if ($file !~ /^\.{1,2}$/) {
297 return 1;
298 }
299 }
300 return 0;
301}
302
303sub append_file {
304 my ($handle, $file) = @_;
305
306 open (FILE, $file) || return;
307 undef $/;
308 print $handle <FILE>;
309 $/ = "\n";
310 close FILE;
311 &util::rm ($file);
312}
313
314# creates a file called $outfile.final (should only be called if -out option
315# is used and isn't STDERR or STDOUT) and writes an output code to it.
316# An output code of 0 specifies that there was no error
317sub final_out {
318 my ($exit_code) = @_;
319
320 if (open (FINAL, ">$outfile.final")) {
321 print FINAL $exit_code;
322 close FINAL;
323 }
324}
325
326sub parse_args {
327 my ($argref) = @_;
328
329 if (!parsargv::parse($argref,
330 'optionfile/.*/', \$optionfile,
331 'append', \$append,
332 'remove_archives', \$remove_archives,
333 'remove_import', \$remove_import,
334 'buildtype/^(build|import)$/import', \$buildtype,
335 'maxdocs/^\-?\d+/-1', \$maxdocs,
336 'download/.+', \@download,
337 'collectdir/.*/', \$collectdir,
338 'dontinstall', \$dontinstall,
339 'save_archives', \$save_archives,
340 'out/.*/STDERR', \$out)) {
341
342 &print_usage();
343 die "\n";
344 }
345}
Note: See TracBrowser for help on using the repository browser.