source: trunk/gsdl/bin/script/update.pl@ 1533

Last change on this file since 1533 was 1533, checked in by paynter, 24 years ago

Added support for GNU Wget to web site mirroring tools (perviously, you had to use w3mirror, which is very hard to install).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.0 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# update.pl
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program updates any collections that are based on mirrors of
31# web sites after a certain interval. It first checks that the
32# collection is a mirrored collection and whether it is time to
33# update the collection. If so, it updates the mirror with
34# mirror.pl; then imports the collection with import.pl; then
35# builds the collection with buildcol.pl; then replaces the
36# existing index directory with the new building directory.
37# The etc/mirror.log file stores the STDOUT output.
38
39
40BEGIN {
41 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
42 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
44 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
45 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
46}
47
48use strict;
49use arcinfo;
50use colcfg;
51use plugin;
52use docprint;
53use util;
54use parsargv;
55
56sub print_usage {
57 print STDERR "\n usage: $0 [options] collection-name\n\n";
58 print STDERR " options:\n";
59 print STDERR " -verbosity number 0=none, 3=lots\n";
60 print STDERR " -importdir directory Where to place the mirrored material\n";
61 print STDERR " -archivedir directory Where the converted material ends up\n";
62}
63
64
65&main ();
66
67sub main {
68 my ($verbosity, $importdir, $archivedir, $builddir, $indexdir, $etcdir,
69 $mirror, $interval, $logfile,
70 $collection, $configfilename, $collectcfg);
71
72 if (!parsargv::parse(\@ARGV,
73 'verbosity/\d+/2', \$verbosity,
74 'importdir/.*/', \$importdir,
75 'archivedir/.*/', \$archivedir )) {
76 &print_usage();
77 die "\n";
78 }
79
80 # get and check the collection name
81 if (($collection = &util::use_collection(@ARGV)) eq "") {
82 &print_usage();
83 die "\n";
84 }
85
86 # check the configuration file for options
87 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
88 if (-e $configfilename) {
89 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
90 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
91 $importdir = $collectcfg->{'importdir'};
92 }
93 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
94 $archivedir = $collectcfg->{'archivedir'};
95 }
96 if (defined $collectcfg->{'mirror'}) {
97 $mirror = $collectcfg->{'mirror'};
98 }
99 } else {
100 die "Couldn't find the configuration file $configfilename\n";
101 }
102
103 # fill in the default import and archives directories if none
104 # were supplied, turn all \ into / and remove trailing /
105 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
106 $importdir =~ s/[\\\/]+/\//g;
107 $importdir =~ s/\/$//;
108 $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
109 $archivedir =~ s/[\\\/]+/\//g;
110 $archivedir =~ s/\/$//;
111
112 $indexdir = "$ENV{'GSDLCOLLECTDIR'}/index";
113 $builddir = "$ENV{'GSDLCOLLECTDIR'}/building";
114 $etcdir = "$ENV{'GSDLCOLLECTDIR'}/etc";
115 $logfile = "$etcdir/mirror.log";
116
117 # if there is no mirror information, we're all done
118 if (!defined($mirror)) {
119 print "No mirror command in $configfilename\n";
120 exit;
121 }
122
123 # read the mirror interval
124 if (($#$mirror == 1) && ($$mirror[0] =~ /interval/)){
125 $interval = $$mirror[1];
126 } else {
127 die "Malformed mirror information: use \"mirror interval N\"\n" .
128 "where N is the number of days between mirrors.\n";
129 }
130
131 # make sure there is an import directory
132 if (! -e "$importdir") {
133 &util::mk_dir($importdir);
134 }
135
136 print "archives directory: $archivedir\n";
137 print " import directory: $importdir\n";
138 print " etc directory: $etcdir\n";
139 print " interval: $interval days\n";
140
141 # how many days is it since the last mirror
142 my $seconds = 0;
143 if (-e "$logfile") {
144 my $now = time;
145 my @stats = stat("$logfile");
146 my $then = $stats[9];
147 # calculate the number of days since the last mirror
148 $seconds = $now - $then;
149 }
150 my $days = (($seconds / 3600) / 24);
151
152 # Is it too soon to start mirroring?
153 if (($seconds > 0) && ($interval > $days)) {
154 printf "Mirror not started: only %.1f days have passed\n", $days;
155 exit;
156 }
157
158 # Mirror the remote site
159 open(WLOG, ">$logfile");
160 print WLOG "Starting mirror at " . time . "\n\n";
161
162 my $command = "mirror.pl $collection";
163 print WLOG "Executing: $command\n";
164 print WLOG `$command`;
165
166 # Import the collection
167 print WLOG "\n\nStarting import at " . time . "\n\n";
168 $command = "import.pl -removeold $collection";
169 print WLOG "Executing: $command\n";
170 print WLOG `$command`;
171
172 # Build the collection
173 print WLOG "\n\nStarting buildcol.pl at " . time . "\n\n";
174 $command = "buildcol.pl $collection";
175 print WLOG "Executing: $command\n";
176 print WLOG `$command`;
177
178 # Renaming the building directory to index
179 print WLOG "\n\nRenaming building directory at " . time . "\n\n";
180 if (-e $indexdir) {
181 &util::mv($indexdir, "$indexdir.old");
182 &util::mv($builddir, $indexdir);
183 &util::rm_r ("$indexdir.old");
184 } else {
185 &util::mv($builddir, $indexdir);
186 }
187
188
189 close WLOG;
190
191}
192
193
194
195
196
Note: See TracBrowser for help on using the repository browser.