[1184] | 1 | #!/usr/bin/perl
|
---|
| 2 |
|
---|
[1198] | 3 | # This perl script may be called directly or by running build.bat on
|
---|
| 4 | # windows (build.bat is in bin\windows)
|
---|
[1184] | 5 |
|
---|
[1454] | 6 | package build;
|
---|
| 7 |
|
---|
[1438] | 8 | use FileHandle;
|
---|
[1454] | 9 | use File::Copy;
|
---|
[1438] | 10 |
|
---|
[1184] | 11 | BEGIN {
|
---|
[1198] | 12 |
|
---|
| 13 | die "GSDLHOME not set - did you remember to source setup.bash (unix) or " .
|
---|
| 14 | "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'};
|
---|
| 15 | die "GSDLOS not set - did you remember to source setup.bash (unix) or " .
|
---|
| 16 | "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'};
|
---|
[1184] | 17 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
[1438] | 18 |
|
---|
| 19 | STDOUT->autoflush(1);
|
---|
| 20 | STDERR->autoflush(1);
|
---|
[1184] | 21 | }
|
---|
| 22 |
|
---|
| 23 | use parsargv;
|
---|
| 24 | use util;
|
---|
[1454] | 25 | use cfgread;
|
---|
[1184] | 26 |
|
---|
[1454] | 27 | &parse_args (\@ARGV);
|
---|
[1198] | 28 |
|
---|
| 29 | my ($collection) = @ARGV;
|
---|
| 30 |
|
---|
| 31 | if (!defined $collection || $collection !~ /\w/) {
|
---|
| 32 | print STDERR "You must specify a collection to build\n";
|
---|
| 33 | &print_usage();
|
---|
| 34 | die "\n";
|
---|
| 35 | }
|
---|
| 36 |
|
---|
[1454] | 37 | if ($optionfile =~ /\w/) {
|
---|
| 38 | open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n";
|
---|
| 39 | my $line = [];
|
---|
| 40 | my $options = [];
|
---|
| 41 | while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) {
|
---|
| 42 | push (@$options, @$line);
|
---|
| 43 | }
|
---|
| 44 | close OPTIONS;
|
---|
| 45 | &parse_args ($options);
|
---|
| 46 | }
|
---|
| 47 |
|
---|
[1198] | 48 | if ($maxdocs == -1) {
|
---|
| 49 | $maxdocs = "";
|
---|
| 50 | } else {
|
---|
| 51 | $maxdocs = "-maxdocs $maxdocs";
|
---|
| 52 | }
|
---|
| 53 |
|
---|
[1454] | 54 | my $cdir = $collectdir;
|
---|
| 55 | $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/;
|
---|
| 56 | my $importdir = &util::filename_cat ($cdir, $collection, "import");
|
---|
| 57 | my $archivedir = &util::filename_cat ($cdir, $collection, "archives");
|
---|
| 58 | my $buildingdir = &util::filename_cat ($cdir, $collection, "building");
|
---|
| 59 | my $indexdir = &util::filename_cat ($cdir, $collection, "index");
|
---|
[1198] | 60 | my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin");
|
---|
| 61 |
|
---|
[1431] | 62 | my $use_out = 0;
|
---|
[1424] | 63 | my $outfile = $out;
|
---|
| 64 | if ($out !~ /^(STDERR|STDOUT)$/i) {
|
---|
| 65 | open (OUT, ">$out") || die "Couldn't open output file $out\n";
|
---|
| 66 | $out = "OUT";
|
---|
[1452] | 67 |
|
---|
| 68 | # delete any existing .final file
|
---|
| 69 | &util::rm ("$outfile.final") if -e "$outfile.final";
|
---|
| 70 |
|
---|
[1431] | 71 | $use_out = 1;
|
---|
[1424] | 72 | }
|
---|
| 73 | $out->autoflush(1);
|
---|
| 74 |
|
---|
[1454] | 75 | # delete any .kill file left laying around from a previously aborted build
|
---|
| 76 | if (-e &util::filename_cat ($cdir, $collection, ".kill")) {
|
---|
| 77 | &util::rm (&util::filename_cat ($cdir, $collection, ".kill"));
|
---|
| 78 | }
|
---|
| 79 |
|
---|
[1184] | 80 | &main();
|
---|
| 81 |
|
---|
[1431] | 82 | close OUT if $use_out;
|
---|
[1424] | 83 |
|
---|
[1184] | 84 | sub print_usage {
|
---|
| 85 | print STDERR "\n usage: $0 [options] collection-name\n\n";
|
---|
| 86 | print STDERR " options:\n";
|
---|
[1454] | 87 | print STDERR " -optionfile file Get options from file, useful on systems where\n";
|
---|
| 88 | print STDERR " long command lines may cause problems\n";
|
---|
[1431] | 89 | print STDERR " -append Add new files to existing collection\n";
|
---|
[1424] | 90 | print STDERR " -remove_archives Remove archives directory after successfully\n";
|
---|
| 91 | print STDERR " building the collection.\n";
|
---|
| 92 | print STDERR " -remove_import Remove import directory after successfully\n";
|
---|
| 93 | print STDERR " importing the collection.\n";
|
---|
[1184] | 94 | print STDERR " -buildtype build|import If 'build' attempt to build directly\n";
|
---|
| 95 | print STDERR " from archives directory (bypassing import\n";
|
---|
| 96 | print STDERR " stage). Defaults to 'import'\n";
|
---|
[1424] | 97 | print STDERR " -maxdocs number Maximum number of documents to build\n";
|
---|
| 98 | print STDERR " -download directory Directory (or file) to get import documents from.\n";
|
---|
| 99 | print STDERR " There may be multiple download directories and they\n";
|
---|
[1461] | 100 | print STDERR " may be of type http://, ftp://, or file://\n";
|
---|
[1424] | 101 | print STDERR " Note that any existing import directory will be\n";
|
---|
| 102 | print STDERR " deleted to make way for the downloaded data if\n";
|
---|
| 103 | print STDERR " a -download option is supplied\n";
|
---|
[1454] | 104 | print STDERR " -collectdir directory Collection directory (defaults to " .
|
---|
| 105 | &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
|
---|
| 106 | print STDERR " -dontinstall Only applicable if -collectdir is set to something\n";
|
---|
| 107 | print STDERR " other than the default. -dontinstall will suppress the\n";
|
---|
| 108 | print STDERR " default behaviour which is to install the collection to\n";
|
---|
| 109 | print STDERR " the gsdl/collect directory once it has been built.\n";
|
---|
| 110 | print STDERR " -save_archives Create a copy of the existing archives directory called\n";
|
---|
| 111 | print STDERR " archives.org\n";
|
---|
[1424] | 112 | print STDERR " -out Filename or handle to print output status to.\n";
|
---|
| 113 | print STDERR " The default is STDERR\n\n";
|
---|
[1184] | 114 | }
|
---|
| 115 |
|
---|
| 116 | sub main {
|
---|
[1454] | 117 |
|
---|
| 118 | if ($save_archives && -d $archivedir) {
|
---|
| 119 | print $out "caching original archives to ${archivedir}.org\n";
|
---|
| 120 | &util::cp_r ($archivedir, "${archivedir}.org");
|
---|
| 121 | }
|
---|
| 122 |
|
---|
[1424] | 123 | # do the download thing if we have any -download options
|
---|
| 124 | if (scalar (@download)) {
|
---|
| 125 | # remove any existing import data
|
---|
[1431] | 126 | if (&has_content ($importdir)) {
|
---|
| 127 | print $out "build: WARNING: removing contents of $importdir\n";
|
---|
| 128 | &util::rm_r ($importdir);
|
---|
| 129 | }
|
---|
[1454] | 130 |
|
---|
[1424] | 131 | foreach $download_dir (@download) {
|
---|
[1507] | 132 |
|
---|
| 133 | # remove any leading or trailing whitespace from filenames (just in case)
|
---|
| 134 | $download_dir =~ s/^\s+//;
|
---|
| 135 | $download_dir =~ s/\s+$//;
|
---|
[1424] | 136 |
|
---|
| 137 | if ($download_dir =~ /^http:\/\//) {
|
---|
| 138 | # http download
|
---|
| 139 |
|
---|
| 140 | } elsif ($download_dir =~ /^ftp:\/\//) {
|
---|
| 141 | # ftp download
|
---|
| 142 |
|
---|
| 143 | } else {
|
---|
| 144 | # we assume anything not beginning with http:// or ftp://
|
---|
| 145 | # is a file or directory on the local file system.
|
---|
[1485] | 146 | $download_dir =~ s/^file:(\/\/)?//;
|
---|
[1507] | 147 | $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest
|
---|
[1424] | 148 |
|
---|
| 149 | if (-e $download_dir) {
|
---|
| 150 | # copy download_dir and all it contains to the import directory
|
---|
| 151 | my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl");
|
---|
[1454] | 152 | $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
|
---|
[1431] | 153 | $download_cmd .= " -out \"$outfile.download\"" if $use_out;
|
---|
| 154 | $download_cmd .= " \"" . $download_dir . "\" " . $collection;
|
---|
[1424] | 155 | system ($download_cmd);
|
---|
[1431] | 156 | # if using output directory append the file download output to it
|
---|
| 157 | &append_file ($out, "$outfile.download");
|
---|
[1424] | 158 | } else {
|
---|
[1507] | 159 | print $out "WARNING: '$download_dir' does not exist\n";
|
---|
[1424] | 160 | }
|
---|
| 161 | }
|
---|
| 162 | }
|
---|
| 163 | }
|
---|
| 164 |
|
---|
[1198] | 165 | if (-e &util::filename_cat ($archivedir, "archives.inf")) {
|
---|
| 166 | if (&has_content ($importdir)) {
|
---|
[1184] | 167 | if ($buildtype eq "build") {
|
---|
| 168 | &gsdl_build();
|
---|
| 169 | } else {
|
---|
| 170 | &gsdl_import();
|
---|
| 171 | &gsdl_build();
|
---|
| 172 | }
|
---|
| 173 | } else {
|
---|
| 174 | # there are archives but no import, build directly from archives
|
---|
[1424] | 175 | print $out "build: no import material was found, building directly\n";
|
---|
| 176 | print $out " from archives\n";
|
---|
[1184] | 177 | &gsdl_build();
|
---|
| 178 | }
|
---|
| 179 | } else {
|
---|
[1198] | 180 | if (&has_content ($importdir)) {
|
---|
[1184] | 181 | if ($buildtype eq "build") {
|
---|
[1424] | 182 | print $out "build: can't build directly from archives as no\n";
|
---|
| 183 | print $out " imported archives exist (did you forget to\n";
|
---|
| 184 | print $out " move the contents of $collection/import to\n";
|
---|
| 185 | print $out " collection/archives?)\n";
|
---|
[1184] | 186 | }
|
---|
| 187 | &gsdl_import();
|
---|
| 188 | &gsdl_build();
|
---|
| 189 | } else {
|
---|
| 190 | # no import or archives
|
---|
[1507] | 191 | print $out "build: ERROR: The $collection collection has no import or archives data.\n";
|
---|
[1438] | 192 | &final_out (1) if $use_out;
|
---|
[1184] | 193 | die "\n";
|
---|
| 194 | }
|
---|
| 195 | }
|
---|
[1454] | 196 |
|
---|
| 197 | if ($collectdir ne "" && !$dontinstall) {
|
---|
| 198 | my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
|
---|
| 199 | if (!&util::filenames_equal ($collectdir, $install_collectdir)) {
|
---|
| 200 |
|
---|
| 201 | # install collection to gsdl/collect
|
---|
| 202 | print $out "installing the $collection collection\n";
|
---|
| 203 | my $newdir = &util::filename_cat ($install_collectdir, $collection);
|
---|
| 204 | my $olddir = &util::filename_cat ($collectdir, $collection);
|
---|
| 205 | if (-d $newdir) {
|
---|
| 206 | print $out "build: Could not install collection as $newdir\n";
|
---|
| 207 | print $out " already exists. Collection will remain at\n";
|
---|
| 208 | print $out " $olddir\n";
|
---|
| 209 | &final_out (4) if $use_out;
|
---|
| 210 | die "\n";
|
---|
| 211 | }
|
---|
| 212 | if (!&File::Copy::move ($olddir, $newdir)) {
|
---|
| 213 | print $out "build: Failed to install collection to $newdir\n";
|
---|
| 214 | print $out " Collection will remain at $olddir\n";
|
---|
| 215 | &final_out (5) if $use_out;
|
---|
| 216 | die "\n";
|
---|
| 217 | }
|
---|
| 218 | }
|
---|
| 219 | }
|
---|
| 220 |
|
---|
[1438] | 221 | &final_out (0) if $use_out;
|
---|
[1184] | 222 | }
|
---|
| 223 |
|
---|
| 224 | sub gsdl_import {
|
---|
| 225 |
|
---|
[1424] | 226 | print $out "importing the $collection collection\n\n";
|
---|
[1198] | 227 |
|
---|
[1431] | 228 | my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl");
|
---|
| 229 | $import_cmd .= " -out \"$outfile.import\"" if $use_out;
|
---|
| 230 | $import_cmd .= " -removeold" unless $append;
|
---|
[1454] | 231 | $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
|
---|
[1431] | 232 | $import_cmd .= " $maxdocs $collection";
|
---|
| 233 | system ($import_cmd);
|
---|
| 234 | # if using output directory append the import output to it
|
---|
| 235 | &append_file ($out, "$outfile.import");
|
---|
| 236 |
|
---|
[1198] | 237 | if (-e &util::filename_cat ($archivedir, "archives.inf")) {
|
---|
[1424] | 238 | print $out "$collection collection imported successfully\n\n";
|
---|
| 239 | if ($remove_import) {
|
---|
| 240 | print $out "removing import directory ($importdir)\n";
|
---|
| 241 | &util::rm_r ($importdir);
|
---|
| 242 | }
|
---|
[1184] | 243 | } else {
|
---|
[1438] | 244 | &final_out (2) if $use_out;
|
---|
[1454] | 245 | print $out "\nimport.pl failed\n";
|
---|
| 246 | die "\n";
|
---|
[1184] | 247 | }
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | sub gsdl_build {
|
---|
| 251 |
|
---|
[1424] | 252 | print $out "building the $collection collection\n\n";
|
---|
[1184] | 253 |
|
---|
[1431] | 254 | my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl");
|
---|
| 255 | $build_cmd .= " -out \"$outfile.build\"" if $use_out;
|
---|
[1454] | 256 | $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/;
|
---|
[1431] | 257 | $build_cmd .= " $maxdocs $collection";
|
---|
| 258 | system ($build_cmd);
|
---|
| 259 | # if using output directory append the buildcol output to it
|
---|
| 260 | &append_file ($out, "$outfile.build");
|
---|
| 261 |
|
---|
[1198] | 262 | if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") ||
|
---|
| 263 | -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) {
|
---|
[1424] | 264 | print $out "$collection collection built successfully\n\n";
|
---|
| 265 | if ($remove_archives) {
|
---|
| 266 | print $out "removing archives directory ($archivedir)\n";
|
---|
| 267 | &util::rm_r ($archivedir);
|
---|
| 268 | }
|
---|
[1184] | 269 | } else {
|
---|
[1454] | 270 | &final_out (3) if $use_out;
|
---|
| 271 | print $out "\nbuildcol.pl failed\n";
|
---|
| 272 | die "\n";
|
---|
[1184] | 273 | }
|
---|
| 274 |
|
---|
| 275 | # replace old indexes with new ones
|
---|
| 276 | if (&has_content ($indexdir)) {
|
---|
[1424] | 277 | print $out "removing old indexes\n";
|
---|
[1184] | 278 | &util::rm_r ($indexdir);
|
---|
| 279 | }
|
---|
[1277] | 280 | rmdir ($indexdir) if -d $indexdir;
|
---|
[1454] | 281 | &File::Copy::move ($buildingdir, $indexdir);
|
---|
| 282 |
|
---|
| 283 | # remove the cached arhives
|
---|
[1461] | 284 | if ($save_archives && -d "${archivedir}.org") {
|
---|
[1454] | 285 | &util::rm_r ("${archivedir}.org");
|
---|
| 286 | }
|
---|
[1184] | 287 | }
|
---|
| 288 |
|
---|
| 289 | sub has_content {
|
---|
| 290 | my ($dir) = @_;
|
---|
| 291 |
|
---|
| 292 | if (!-d $dir) {return 0;}
|
---|
| 293 |
|
---|
| 294 | opendir (DIR, $dir) || return 0;
|
---|
| 295 | my @files = readdir DIR;
|
---|
| 296 | close DIR;
|
---|
| 297 |
|
---|
| 298 | foreach my $file (@files) {
|
---|
| 299 | if ($file !~ /^\.{1,2}$/) {
|
---|
| 300 | return 1;
|
---|
| 301 | }
|
---|
| 302 | }
|
---|
| 303 | return 0;
|
---|
| 304 | }
|
---|
[1431] | 305 |
|
---|
| 306 | sub append_file {
|
---|
| 307 | my ($handle, $file) = @_;
|
---|
| 308 |
|
---|
| 309 | open (FILE, $file) || return;
|
---|
| 310 | undef $/;
|
---|
| 311 | print $handle <FILE>;
|
---|
| 312 | $/ = "\n";
|
---|
| 313 | close FILE;
|
---|
| 314 | &util::rm ($file);
|
---|
| 315 | }
|
---|
[1438] | 316 |
|
---|
| 317 | # creates a file called $outfile.final (should only be called if -out option
|
---|
| 318 | # is used and isn't STDERR or STDOUT) and writes an output code to it.
|
---|
| 319 | # An output code of 0 specifies that there was no error
|
---|
| 320 | sub final_out {
|
---|
| 321 | my ($exit_code) = @_;
|
---|
| 322 |
|
---|
| 323 | if (open (FINAL, ">$outfile.final")) {
|
---|
| 324 | print FINAL $exit_code;
|
---|
| 325 | close FINAL;
|
---|
| 326 | }
|
---|
| 327 | }
|
---|
[1454] | 328 |
|
---|
| 329 | sub parse_args {
|
---|
| 330 | my ($argref) = @_;
|
---|
| 331 |
|
---|
| 332 | if (!parsargv::parse($argref,
|
---|
| 333 | 'optionfile/.*/', \$optionfile,
|
---|
| 334 | 'append', \$append,
|
---|
| 335 | 'remove_archives', \$remove_archives,
|
---|
| 336 | 'remove_import', \$remove_import,
|
---|
| 337 | 'buildtype/^(build|import)$/import', \$buildtype,
|
---|
| 338 | 'maxdocs/^\-?\d+/-1', \$maxdocs,
|
---|
| 339 | 'download/.+', \@download,
|
---|
| 340 | 'collectdir/.*/', \$collectdir,
|
---|
| 341 | 'dontinstall', \$dontinstall,
|
---|
| 342 | 'save_archives', \$save_archives,
|
---|
| 343 | 'out/.*/STDERR', \$out)) {
|
---|
| 344 |
|
---|
| 345 | &print_usage();
|
---|
| 346 | die "\n";
|
---|
| 347 | }
|
---|
| 348 | }
|
---|