source: trunk/gsdl/bin/script/mirror.pl@ 1533

Last change on this file since 1533 was 1533, checked in by paynter, 24 years ago

Added support for GNU Wget to web site mirroring tools (perviously, you had to use w3mirror, which is very hard to install).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# mirror.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program uses w3mirror to mirror a web site. It looks for a
31# mirror program configuration files in etc, and if it finds them then
32# it runs the mirroring software using this configuration file, and placing
33# the mirror in the import directory.
34#
35# mirror.pl can use the w3mirror program or the wget program if they are
36# installed.
37#
38# To use w3mirror, the configuration file must be in etc/w3mir.cfg.
39# To use GNU wget, the configuartion file (i.e. a wgetrc file) must
40# be in etc/wget.cfg and a file of the URLs to read in etc/wget.url
41
42BEGIN {
43 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
44 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
45 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
46 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
47 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
48}
49
50use strict;
51use arcinfo;
52use colcfg;
53use plugin;
54use docprint;
55use util;
56use parsargv;
57
58sub print_usage {
59 print STDERR "\n usage: $0 [options] collection-name\n\n";
60 print STDERR " options:\n";
61 print STDERR " -verbosity number 0=none, 3=lots\n";
62 print STDERR " -importdir directory Where to place the mirrored material\n";
63}
64
65
66&main ();
67
68sub main {
69 my ($verbosity, $importdir, $etcdir,
70 $collection, $configfilename, $collectcfg);
71
72 if (!parsargv::parse(\@ARGV,
73 'verbosity/\d+/2', \$verbosity,
74 'importdir/.*/', \$importdir )) {
75 &print_usage();
76 die "\n";
77 }
78
79 # get and check the collection name
80 if (($collection = &util::use_collection(@ARGV)) eq "") {
81 &print_usage();
82 die "\n";
83 }
84
85 # get the etc directory
86 $etcdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc");
87
88 # check the collection configuration file for options
89 my $interval = 0;
90 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'},
91 "etc", "collect.cfg");
92 if (-e $configfilename) {
93 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
94 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
95 $importdir = $collectcfg->{'importdir'};
96 }
97 } else {
98 die "Couldn't find the configuration file $configfilename\n";
99 }
100
101 # fill in the default import directories if none
102 # were supplied, turn all \ into / and remove trailing /
103 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
104 $importdir =~ s/[\\\/]+/\//g;
105 $importdir =~ s/\/$//;
106
107 # make sure there is an import directory
108 if (! -e "$importdir") {
109 &util::mk_dir($importdir);
110 }
111
112 # if w3mir.cfg exists,
113 # then we are using w3mirror to mirror the remote site
114 if (-e "$etcdir/w3mir.cfg") {
115
116 # run the mirror program from the import directory
117 my $cmd = "cd $importdir; ";
118 $cmd .= "gsw3mir.pl -cfgfile $etcdir/w3mir.cfg";
119 # print "\n$cmd\n";
120 `$cmd`;
121
122 }
123
124 # if wget.cfg and wget.url both exist,
125 # then we are using GNU wget to mirror the remote site
126 elsif ((-e "$etcdir/wget.cfg") && (-e "$etcdir/wget.url")) {
127 $ENV{WGETRC} = "$etcdir/wget.cfg";
128 my $cmd = "gsWget.pl --input-file=$etcdir/wget.url --directory-prefix=$importdir";
129 system($cmd);
130 }
131
132 # otherwise, there are no mirror copnfiguration files
133 else {
134 die "Couldn't find the mirror configuration files in $etcdir\n";
135 }
136
137
138}
139
140
141
142
143
144
Note: See TracBrowser for help on using the repository browser.