#!/usr/bin/perl -w ########################################################################### # # urlcopy.pl -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # This program will download the specified urls (http:, ftp: and file:) BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/lib"); $ENV{'PATH'} = "$ENV{'GSDLHOME'}/perllib/cpan/bin:$ENV{'PATH'}"; } use util; use File::Basename; sub print_usage { print STDERR "\n usage: $0 [urls] collection-name\n\n"; } sub main { if (scalar(@ARGV)<2) { print_usage(); exit(1); } my $dirname = pop(@ARGV); my $full_importname = &util::filename_cat($ENV{'GSDLHOME'},"collect",$dirname,"import"); mkdir ($full_importname, 0777) unless -e $full_importname; # split argv into 3 lists: http, ftp and file my (@http,@ftp,@file) = ((),(),()); my $a; foreach $a (@ARGV) { $a =~ s/^\"//; $a =~ s/\"$//; if ($a =~ m/^http:/i) { push(@http,$a); } elsif ($a =~ m/^ftp:/i) { push(@ftp,$a); } elsif ($a =~ m/^file:/i) { push(@file,$a); } else { print STDERR "URL argument not supported: $a\n"; print STDERR "Ingoring argument.\n"; } } if (scalar(@http)>0) { my $w3mircfg_filename = &util::filename_cat($ENV{'GSDLHOME'},"tmp","$dirname.w3mir"); # create cfg file open(CFGOUT,">$w3mircfg_filename") || die "Unable to open $w3mircfg_filename: $!"; print CFGOUT "Options: recurse\n\n"; my $first = 1; my $a; foreach $a (@http) { my $src_url = $a; my $dst_dir = $a; $dst_dir =~ s/^http://i; $dst_dir = &util::filename_cat($full_importname,$dst_dir); if ($src_url !~ m/\/$/) { # last name is a file => strip off filename $dst_dir = &File::Basename::dirname($dst_dir); } if ($first) { print CFGOUT "URL: $src_url $dst_dir\n"; $first = 0; } else { print CFGOUT "Also-queue: $src_url $dst_dir\n"; } } print CFGOUT "\nFixup: run\n"; close(CFGOUT); my $cmd = "cd $full_importname; "; $cmd .= "w3mir.pl -cfgfile $w3mircfg_filename"; my $status = system($cmd); $status /= 256; if ($status != 0) { print STDERR "An error was encountered executing: $cmd\n"; exit($status); } } if (scalar(@ftp)>0) { my $cmd = "cd $full_importname; "; $cmd .= "ncftp "; my $a; foreach $a (@ftp) { $a = "$a/" if ($a !~ m/\/$/); $cmd .= " \"$a/*\""; } my $status = system($cmd); $status /= 256; if ($status != 0) { print STDERR "An error was encountered executing: $cmd\n"; exit($status); } } if (scalar(@file)>0) { my $cmd = "filedownload.pl"; my $a; foreach $a (@file) { $cmd .= " \"$a\""; } $cmd .= " $dirname"; my $status = system($cmd); $status /= 256; if ($status != 0) { print STDERR "An error was encountered executing: $cmd\n"; exit($status); } } return 0; } &main();