#!/usr/bin/perl # This perl script may be called directly or by running build.bat on # windows (build.bat is in bin\windows) use FileHandle; BEGIN { die "GSDLHOME not set - did you remember to source setup.bash (unix) or " . "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set - did you remember to source setup.bash (unix) or " . "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); STDOUT->autoflush(1); STDERR->autoflush(1); } use parsargv; use util; if (!parsargv::parse(\@ARGV, 'append', \$append, 'remove_archives', \$remove_archives, 'remove_import', \$remove_import, 'buildtype/^(build|import)$/import', \$buildtype, 'maxdocs/^\-?\d+/-1', \$maxdocs, 'download/.+', \@download, 'out/.*/STDERR', \$out)) { &print_usage(); die "\n"; } my ($collection) = @ARGV; if (!defined $collection || $collection !~ /\w/) { print STDERR "You must specify a collection to build\n"; &print_usage(); die "\n"; } if ($maxdocs == -1) { $maxdocs = ""; } else { $maxdocs = "-maxdocs $maxdocs"; } my $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection); my $importdir = &util::filename_cat ($collectdir, "import"); my $archivedir = &util::filename_cat ($collectdir, "archives"); my $buildingdir = &util::filename_cat ($collectdir, "building"); my $indexdir = &util::filename_cat ($collectdir, "index"); my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin"); my $use_out = 0; my $outfile = $out; if ($out !~ /^(STDERR|STDOUT)$/i) { open (OUT, ">$out") || die "Couldn't open output file $out\n"; $out = "OUT"; # delete any existing .final file &util::rm ("$outfile.final") if -e "$outfile.final"; $use_out = 1; } $out->autoflush(1); &main(); close OUT if $use_out; sub print_usage { print STDERR "\n usage: $0 [options] collection-name\n\n"; print STDERR " options:\n"; print STDERR " -append Add new files to existing collection\n"; print STDERR " -remove_archives Remove archives directory after successfully\n"; print STDERR " building the collection.\n"; print STDERR " -remove_import Remove import directory after successfully\n"; print STDERR " importing the collection.\n"; print STDERR " -buildtype build|import If 'build' attempt to build directly\n"; print STDERR " from archives directory (bypassing import\n"; print STDERR " stage). Defaults to 'import'\n"; print STDERR " -maxdocs number Maximum number of documents to build\n"; print STDERR " -download directory Directory (or file) to get import documents from.\n"; print STDERR " There may be multiple download directories and they\n"; print STDERR " may be of type http://, ftp://, or file://\n."; print STDERR " Note that any existing import directory will be\n"; print STDERR " deleted to make way for the downloaded data if\n"; print STDERR " a -download option is supplied\n"; print STDERR " -out Filename or handle to print output status to.\n"; print STDERR " The default is STDERR\n\n"; } sub main { # do the download thing if we have any -download options if (scalar (@download)) { # remove any existing import data if (&has_content ($importdir)) { print $out "build: WARNING: removing contents of $importdir\n"; &util::rm_r ($importdir); } foreach $download_dir (@download) { if ($download_dir =~ /^http:\/\//) { # http download } elsif ($download_dir =~ /^ftp:\/\//) { # ftp download } else { # we assume anything not beginning with http:// or ftp:// # is a file or directory on the local file system. $download_dir =~ s/^file:\/\///; if (-e $download_dir) { # copy download_dir and all it contains to the import directory my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl"); $download_cmd .= " -out \"$outfile.download\"" if $use_out; $download_cmd .= " \"" . $download_dir . "\" " . $collection; system ($download_cmd); # if using output directory append the file download output to it &append_file ($out, "$outfile.download"); } else { print $out "WARNING: $download_dir does not exist\n"; } } } } if (-e &util::filename_cat ($archivedir, "archives.inf")) { if (&has_content ($importdir)) { if ($buildtype eq "build") { &gsdl_build(); } else { &gsdl_import(); &gsdl_build(); } } else { # there are archives but no import, build directly from archives print $out "build: no import material was found, building directly\n"; print $out " from archives\n"; &gsdl_build(); } } else { if (&has_content ($importdir)) { if ($buildtype eq "build") { print $out "build: can't build directly from archives as no\n"; print $out " imported archives exist (did you forget to\n"; print $out " move the contents of $collection/import to\n"; print $out " collection/archives?)\n"; } &gsdl_import(); &gsdl_build(); } else { # no import or archives print $out "build: ERROR: The $collection collection has no import\n"; print $out " or archives data. Try downloading an unbuilt version\n"; print $out " of the collection from www.nzdl.org\n"; &final_out (1) if $use_out; die "\n"; } } &final_out (0) if $use_out; } sub gsdl_import { print $out "importing the $collection collection\n\n"; my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl"); $import_cmd .= " -out \"$outfile.import\"" if $use_out; $import_cmd .= " -removeold" unless $append; $import_cmd .= " $maxdocs $collection"; system ($import_cmd); # if using output directory append the import output to it &append_file ($out, "$outfile.import"); if (-e &util::filename_cat ($archivedir, "archives.inf")) { print $out "$collection collection imported successfully\n\n"; if ($remove_import) { print $out "removing import directory ($importdir)\n"; &util::rm_r ($importdir); } } else { &final_out (2) if $use_out; die "\nimport.pl failed\n"; } } sub gsdl_build { print $out "building the $collection collection\n\n"; my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl"); $build_cmd .= " -out \"$outfile.build\"" if $use_out; $build_cmd .= " $maxdocs $collection"; system ($build_cmd); # if using output directory append the buildcol output to it &append_file ($out, "$outfile.build"); if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") || -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) { print $out "$collection collection built successfully\n\n"; if ($remove_archives) { print $out "removing archives directory ($archivedir)\n"; &util::rm_r ($archivedir); } } else { &final_out (2) if $use_out; die "\nbuildcol.pl failed\n"; } # replace old indexes with new ones if (&has_content ($indexdir)) { print $out "removing old indexes\n"; &util::rm_r ($indexdir); } rmdir ($indexdir) if -d $indexdir; rename ($buildingdir, $indexdir); } sub has_content { my ($dir) = @_; if (!-d $dir) {return 0;} opendir (DIR, $dir) || return 0; my @files = readdir DIR; close DIR; foreach my $file (@files) { if ($file !~ /^\.{1,2}$/) { return 1; } } return 0; } sub append_file { my ($handle, $file) = @_; open (FILE, $file) || return; undef $/; print $handle ; $/ = "\n"; close FILE; &util::rm ($file); } # creates a file called $outfile.final (should only be called if -out option # is used and isn't STDERR or STDOUT) and writes an output code to it. # An output code of 0 specifies that there was no error sub final_out { my ($exit_code) = @_; if (open (FINAL, ">$outfile.final")) { print FINAL $exit_code; close FINAL; } }