source: trunk/gsdl/bin/script/mirror.pl@ 2785

Last change on this file since 2785 was 2785, checked in by sjboddie, 23 years ago

The build process now creates a summary of how many files were included,
which were rejected, etc. A link to a page containing this summary is
provided from the final page of the collector (once the collection is built
successfully) and from the default "about this collection" text for
collections built by the collector.

Also did a little bit of tidying in a couple of places

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# mirror.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program uses w3mirror to mirror a web site. It looks for a
31# mirror program configuration files in etc, and if it finds them then
32# it runs the mirroring software using this configuration file, and placing
33# the mirror in the import directory.
34#
35# mirror.pl can use the w3mirror program or the wget program if they are
36# installed.
37#
38# To use w3mirror, the configuration file must be in etc/w3mir.cfg.
39# To use GNU wget, the configuration file (i.e. a wgetrc file) must
40# be in etc/wget.cfg and a file of the URLs to read in etc/wget.url
41
42BEGIN {
43 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
44 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
45 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
46 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
47 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
48}
49
50use arcinfo;
51use colcfg;
52use plugin;
53use docprint;
54use util;
55use parsargv;
56
57sub print_usage {
58 print STDERR "\n";
59 print STDERR "mirror.pl: Uses w3mir or wget to sync a collections import data\n";
60 print STDERR " with a website.\n\n";
61 print STDERR " usage: $0 [options] collection-name\n\n";
62 print STDERR " options:\n";
63 print STDERR " -verbosity number 0=none, 3=lots\n";
64 print STDERR " -importdir directory Where to place the mirrored material\n";
65}
66
67
68&main ();
69
70sub main {
71 my ($verbosity, $importdir, $etcdir,
72 $collection, $configfilename, $collectcfg);
73
74 if (!parsargv::parse(\@ARGV,
75 'verbosity/\d+/2', \$verbosity,
76 'importdir/.*/', \$importdir )) {
77 &print_usage();
78 die "\n";
79 }
80
81 # get and check the collection name
82 if (($collection = &util::use_collection(@ARGV)) eq "") {
83 &print_usage();
84 die "\n";
85 }
86
87 # get the etc directory
88 $etcdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc");
89
90 # check the collection configuration file for options
91 my $interval = 0;
92 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'},
93 "etc", "collect.cfg");
94 if (-e $configfilename) {
95 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
96 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
97 $importdir = $collectcfg->{'importdir'};
98 }
99 } else {
100 die "Couldn't find the configuration file $configfilename\n";
101 }
102
103 # fill in the default import directories if none
104 # were supplied, turn all \ into / and remove trailing /
105 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
106 $importdir =~ s/[\\\/]+/\//g;
107 $importdir =~ s/\/$//;
108
109 # make sure there is an import directory
110 if (! -e "$importdir") {
111 &util::mk_dir($importdir);
112 }
113
114 # if w3mir.cfg exists,
115 # then we are using w3mirror to mirror the remote site
116 if (-e "$etcdir/w3mir.cfg") {
117
118 # run the mirror program from the import directory
119 my $cmd = "cd $importdir; ";
120 $cmd .= "perl -S gsw3mir.pl -cfgfile $etcdir/w3mir.cfg";
121 # print "\n$cmd\n";
122 `$cmd`;
123
124 }
125
126 # if wget.cfg and wget.url both exist,
127 # then we are using GNU wget to mirror the remote site
128 elsif ((-e "$etcdir/wget.cfg") && (-e "$etcdir/wget.url")) {
129 $ENV{WGETRC} = "$etcdir/wget.cfg";
130 my $cmd = "perl -S gsWget.pl --input-file=$etcdir/wget.url --directory-prefix=$importdir";
131 system($cmd);
132 }
133
134 # otherwise, there are no mirror copnfiguration files
135 else {
136 die "Couldn't find the mirror configuration files in $etcdir\n";
137 }
138
139
140}
141
142
143
144
145
146
Note: See TracBrowser for help on using the repository browser.