1 | #!/usr/bin/perl -w
|
---|
2 |
|
---|
3 | ###########################################################################
|
---|
4 | #
|
---|
5 | # mirror.pl
|
---|
6 | #
|
---|
7 | # A component of the Greenstone digital library software
|
---|
8 | # from the New Zealand Digital Library Project at the
|
---|
9 | # University of Waikato, New Zealand.
|
---|
10 | #
|
---|
11 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
12 | #
|
---|
13 | # This program is free software; you can redistribute it and/or modify
|
---|
14 | # it under the terms of the GNU General Public License as published by
|
---|
15 | # the Free Software Foundation; either version 2 of the License, or
|
---|
16 | # (at your option) any later version.
|
---|
17 | #
|
---|
18 | # This program is distributed in the hope that it will be useful,
|
---|
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
21 | # GNU General Public License for more details.
|
---|
22 | #
|
---|
23 | # You should have received a copy of the GNU General Public License
|
---|
24 | # along with this program; if not, write to the Free Software
|
---|
25 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
26 | #
|
---|
27 | ###########################################################################
|
---|
28 |
|
---|
29 |
|
---|
30 | # This program uses w3mirror to mirror a web site. It looks for a
|
---|
31 | # mirror program configuration files in etc, and if it finds them then
|
---|
32 | # it runs the mirroring software using this configuration file, and placing
|
---|
33 | # the mirror in the import directory.
|
---|
34 | #
|
---|
35 | # mirror.pl can use the w3mirror program or the wget program if they are
|
---|
36 | # installed.
|
---|
37 | #
|
---|
38 | # To use w3mirror, the configuration file must be in etc/w3mir.cfg.
|
---|
39 | # To use GNU wget, the configuration file (i.e. a wgetrc file) must
|
---|
40 | # be in etc/wget.cfg and a file of the URLs to read in etc/wget.url
|
---|
41 |
|
---|
42 | BEGIN {
|
---|
43 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
44 | die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
|
---|
45 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
46 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
|
---|
47 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
|
---|
48 | }
|
---|
49 |
|
---|
50 | use arcinfo;
|
---|
51 | use colcfg;
|
---|
52 | use plugin;
|
---|
53 | use docprint;
|
---|
54 | use util;
|
---|
55 | use parsargv;
|
---|
56 |
|
---|
57 | sub print_usage {
|
---|
58 | print STDERR "\n";
|
---|
59 | print STDERR "mirror.pl: Uses w3mir or wget to sync a collections import data\n";
|
---|
60 | print STDERR " with a website.\n\n";
|
---|
61 | print STDERR " usage: $0 [options] collection-name\n\n";
|
---|
62 | print STDERR " options:\n";
|
---|
63 | print STDERR " -verbosity number 0=none, 3=lots\n";
|
---|
64 | print STDERR " -importdir directory Where to place the mirrored material\n";
|
---|
65 | }
|
---|
66 |
|
---|
67 |
|
---|
68 | &main ();
|
---|
69 |
|
---|
70 | sub main {
|
---|
71 | my ($verbosity, $importdir, $etcdir,
|
---|
72 | $collection, $configfilename, $collectcfg);
|
---|
73 |
|
---|
74 | if (!parsargv::parse(\@ARGV,
|
---|
75 | 'verbosity/\d+/2', \$verbosity,
|
---|
76 | 'importdir/.*/', \$importdir )) {
|
---|
77 | &print_usage();
|
---|
78 | die "\n";
|
---|
79 | }
|
---|
80 |
|
---|
81 | # get and check the collection name
|
---|
82 | if (($collection = &util::use_collection(@ARGV)) eq "") {
|
---|
83 | &print_usage();
|
---|
84 | die "\n";
|
---|
85 | }
|
---|
86 |
|
---|
87 | # get the etc directory
|
---|
88 | $etcdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc");
|
---|
89 |
|
---|
90 | # check the collection configuration file for options
|
---|
91 | my $interval = 0;
|
---|
92 | $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'},
|
---|
93 | "etc", "collect.cfg");
|
---|
94 | if (-e $configfilename) {
|
---|
95 | $collectcfg = &colcfg::read_collect_cfg ($configfilename);
|
---|
96 | if (defined $collectcfg->{'importdir'} && $importdir eq "") {
|
---|
97 | $importdir = $collectcfg->{'importdir'};
|
---|
98 | }
|
---|
99 | } else {
|
---|
100 | die "Couldn't find the configuration file $configfilename\n";
|
---|
101 | }
|
---|
102 |
|
---|
103 | # fill in the default import directories if none
|
---|
104 | # were supplied, turn all \ into / and remove trailing /
|
---|
105 | $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
|
---|
106 | $importdir =~ s/[\\\/]+/\//g;
|
---|
107 | $importdir =~ s/\/$//;
|
---|
108 |
|
---|
109 | # make sure there is an import directory
|
---|
110 | if (! -e "$importdir") {
|
---|
111 | &util::mk_dir($importdir);
|
---|
112 | }
|
---|
113 |
|
---|
114 | # if w3mir.cfg exists,
|
---|
115 | # then we are using w3mirror to mirror the remote site
|
---|
116 | if (-e "$etcdir/w3mir.cfg") {
|
---|
117 |
|
---|
118 | # run the mirror program from the import directory
|
---|
119 | my $cmd = "cd $importdir; ";
|
---|
120 | $cmd .= "perl -S gsw3mir.pl -cfgfile $etcdir/w3mir.cfg";
|
---|
121 | # print "\n$cmd\n";
|
---|
122 | `$cmd`;
|
---|
123 |
|
---|
124 | }
|
---|
125 |
|
---|
126 | # if wget.cfg and wget.url both exist,
|
---|
127 | # then we are using GNU wget to mirror the remote site
|
---|
128 | elsif ((-e "$etcdir/wget.cfg") && (-e "$etcdir/wget.url")) {
|
---|
129 | $ENV{WGETRC} = "$etcdir/wget.cfg";
|
---|
130 | my $cmd = "perl -S gsWget.pl --input-file=$etcdir/wget.url --directory-prefix=$importdir";
|
---|
131 | system($cmd);
|
---|
132 | }
|
---|
133 |
|
---|
134 | # otherwise, there are no mirror copnfiguration files
|
---|
135 | else {
|
---|
136 | die "Couldn't find the mirror configuration files in $etcdir\n";
|
---|
137 | }
|
---|
138 |
|
---|
139 |
|
---|
140 | }
|
---|
141 |
|
---|
142 |
|
---|
143 |
|
---|
144 |
|
---|
145 |
|
---|
146 |
|
---|