Changeset 1533


Ignore:
Timestamp:
2000-09-11T10:59:28+12:00 (24 years ago)
Author:
paynter
Message:

Added support for GNU Wget to web site mirroring tools (perviously, you had to use w3mirror, which is very hard to install).

Location:
trunk/gsdl/bin/script
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/mirror.pl

    r1375 r1533  
    2929
    3030# This program uses w3mirror to mirror a web site.  It looks for a
    31 # w3mirror configuration file in etc/w3mir.cfg, and if it finds one then
    32 # it runs w3mirror (using this configuration file) in the import
    33 # directory.  Afterwards, a mirror of the web site (as dictated by the
    34 # configuration file) will be in the import directory.
     31# mirror program configuration files in etc, and if it finds them then
     32# it runs the mirroring software using this configuration file, and placing
     33# the mirror in the import directory.
     34#
     35# mirror.pl can use the w3mirror program or the wget program if they are
     36# installed.
     37#
     38# To use w3mirror, the configuration file must be in etc/w3mir.cfg.
     39# To use GNU wget, the configuartion file (i.e. a wgetrc file) must
     40# be in etc/wget.cfg and a file of the URLs to read in etc/wget.url
    3541
    3642BEGIN {
     
    7783    }
    7884
    79     # check the configuration file for options
     85    # get the etc directory
     86    $etcdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc");
     87   
     88    # check the collection configuration file for options
    8089    my $interval = 0;
    81     $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
     90    $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'},
     91                       "etc", "collect.cfg");
    8292    if (-e $configfilename) {
    8393    $collectcfg = &colcfg::read_collect_cfg ($configfilename);
     
    95105    $importdir =~ s/\/$//;
    96106
    97     $etcdir = "$ENV{'GSDLCOLLECTDIR'}/etc";
    98 
    99     print "import directory: $importdir\n";
    100     print "   w3config file: $etcdir/w3mir.cfg\n\n";
    101 
    102107    # make sure there is an import directory
    103108    if (! -e "$importdir") {
     
    105110    }
    106111
    107     # make sure there is a w3mir file
    108     if (! -e "$etcdir/w3mir.cfg") {
    109     die "Couldn't find the w3mir configuration file $etcdir/w3mir.cfg\n";
     112    # if w3mir.cfg exists,
     113    # then we are using w3mirror to mirror the remote site
     114    if (-e "$etcdir/w3mir.cfg") {
     115
     116    # run the mirror program from the import directory
     117    my  $cmd = "cd $importdir; ";
     118    $cmd .= "gsw3mir.pl -cfgfile $etcdir/w3mir.cfg";
     119    # print "\n$cmd\n";
     120    `$cmd`;
     121
     122    }
     123
     124    # if wget.cfg and wget.url both exist,
     125    # then we are using GNU wget to mirror the remote site
     126    elsif ((-e "$etcdir/wget.cfg") && (-e "$etcdir/wget.url")) {
     127    $ENV{WGETRC} = "$etcdir/wget.cfg";
     128    my $cmd = "gsWget.pl --input-file=$etcdir/wget.url --directory-prefix=$importdir";
     129    system($cmd);
    110130    }
    111131
    112     # run the mirror program from the import directory
    113     my  $command = "cd $importdir; gsw3mir.pl -cfgfile $etcdir/w3mir.cfg\n";
    114     # print "\n$command\n";
    115     `$command`;
     132    # otherwise, there are no mirror copnfiguration files
     133    else {
     134    die "Couldn't find the mirror configuration files in $etcdir\n";
     135    }
     136
    116137
    117138}
     
    121142
    122143
     144
  • trunk/gsdl/bin/script/update.pl

    r1376 r1533  
    117117    # if there is no mirror information, we're all done
    118118    if (!defined($mirror)) {
    119     print "No mirror information\n";
     119    print "No mirror command in $configfilename\n";
    120120    exit;
    121121    }
     
    129129    }
    130130
    131     # make sure there is a w3mir file
    132     if (! -e "$etcdir/w3mir.cfg") {
    133     die "Couldn't find the w3mir configuration file $etcdir/w3mir.cfg\n";
    134     }
    135 
    136131    # make sure there is an import directory
    137132    if (! -e "$importdir") {
     
    139134    }
    140135
    141     # make sure there is an empty archives directory
    142     if (-e "$archivedir") {
    143     &util::rm_r ($archivedir);
    144     }
    145     &util::mk_dir($archivedir);
    146    
    147136    print "archives directory: $archivedir\n";
    148137    print "  import directory: $importdir\n";
    149138    print "     etc directory: $etcdir\n";
    150     print "     w3config file: $etcdir/w3mir.cfg\n";
    151 
    152139    print "          interval: $interval days\n";
    153140
     
    179166    # Import the collection
    180167    print WLOG "\n\nStarting import at " . time . "\n\n";
    181     $command = "import.pl $collection";
     168    $command = "import.pl -removeold $collection";
    182169    print WLOG "Executing: $command\n";
    183170    print WLOG `$command`;
Note: See TracChangeset for help on using the changeset viewer.