#!/usr/bin/perl -w ########################################################################### # # mirror.pl # # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # This program uses w3mirror to mirror a web site. It looks for a # mirror program configuration files in etc, and if it finds them then # it runs the mirroring software using this configuration file, and placing # the mirror in the import directory. # # mirror.pl can use the w3mirror program or the wget program if they are # installed. # # To use w3mirror, the configuration file must be in etc/w3mir.cfg. # To use GNU wget, the configuration file (i.e. a wgetrc file) must # be in etc/wget.cfg and a file of the URLs to read in etc/wget.url BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); } use arcinfo; use colcfg; use util; use parsargv; sub print_usage { print STDERR "\n"; print STDERR "mirror.pl: Uses w3mir or wget to sync a collections import data\n"; print STDERR " with a website.\n\n"; print STDERR " usage: $0 [options] collection-name\n\n"; print STDERR " options:\n"; print STDERR " -verbosity number 0=none, 3=lots\n"; print STDERR " -importdir directory Where to place the mirrored material\n"; } &main (); sub main { my ($verbosity, $importdir, $etcdir, $collection, $configfilename, $collectcfg); if (!parsargv::parse(\@ARGV, 'verbosity/\d+/2', \$verbosity, 'importdir/.*/', \$importdir )) { &print_usage(); die "\n"; } # get and check the collection name if (($collection = &util::use_collection(@ARGV)) eq "") { &print_usage(); die "\n"; } # get the etc directory $etcdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc"); # check the collection configuration file for options my $interval = 0; $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); if (-e $configfilename) { $collectcfg = &colcfg::read_collect_cfg ($configfilename); if (defined $collectcfg->{'importdir'} && $importdir eq "") { $importdir = $collectcfg->{'importdir'}; } } else { die "Couldn't find the configuration file $configfilename\n"; } # fill in the default import directories if none # were supplied, turn all \ into / and remove trailing / $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq ""; $importdir =~ s/[\\\/]+/\//g; $importdir =~ s/\/$//; # make sure there is an import directory if (! -e "$importdir") { &util::mk_dir($importdir); } # if w3mir.cfg exists, # then we are using w3mirror to mirror the remote site if (-e "$etcdir/w3mir.cfg") { # run the mirror program from the import directory my $cmd = "cd $importdir; "; # need to ensure that the path to perl is quoted (in case there's spaces in it) $cmd .= "\"".&util::get_perl_exec()."\" -S gsw3mir.pl -cfgfile $etcdir/w3mir.cfg"; # print "\n$cmd\n"; `$cmd`; } # if wget.cfg and wget.url both exist, # then we are using GNU wget to mirror the remote site elsif ((-e "$etcdir/wget.cfg") && (-e "$etcdir/wget.url")) { $ENV{WGETRC} = "$etcdir/wget.cfg"; my $cmd = "\"".&util::get_perl_exec()."\" -S gsWget.pl --input-file=$etcdir/wget.url --directory-prefix=$importdir"; system($cmd); } # otherwise, there are no mirror copnfiguration files else { die "Couldn't find the mirror configuration files in $etcdir\n"; } }