source: gsdl/trunk/bin/script/filecopy.pl@ 14119

Last change on this file since 14119 was 2495, checked in by sjboddie, 23 years ago

Fixed a bug (again) that prevented the collector from being able to build
a collection where the input was type file:// and pointed to a single
document rather than a directory. This bug was fixed some time ago but
the fix got lost when I made some changes to prevent possible infinite
recursion problems.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# filecopy.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will "download" the specified files/directories
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
34}
35
36use util;
37use parsargv;
38use File::stat;
39use File::Basename;
40use FileHandle;
41
42sub print_usage {
43 print STDERR "\n";
44 print STDERR "filecopy.pl: Recursively copies files into a collections import directory.\n\n";
45 print STDERR "\n usage: $0 [options] [directories] collection-name\n\n";
46
47 print STDERR " options:\n";
48 print STDERR " -follow_links Follow symbolic links when recursing directory\n";
49 print STDERR " structure\n";
50 print STDERR " -collectdir directory Collection directory (defaults to " .
51 &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
52 print STDERR " -out Filename or handle to print output status to.\n";
53 print STDERR " The default is STDERR\n\n";
54}
55
56sub get_file_list {
57 my $dirhash = shift @_;
58 my $filehash = shift @_;
59 my $dirname = shift @_;
60
61 my $full_importname
62 = &util::filename_cat($collectdir, $dirname, "import");
63
64 # check for .kill file
65 if (-e &util::filename_cat($collectdir, $dirname, ".kill")) {
66 print $out "filecopy.pl killed by .kill file\n";
67 die "\n";
68 }
69
70 foreach my $file (@_) {
71
72 $file =~ s/^\"//;
73 $file =~ s/\"$//;
74
75 if (!$follow_links && -l $file) {
76 # do nothing as we don't want to follow symbolic links
77
78 } elsif (-d $file) {
79 my $dst_dir = &get_dst_dir ($full_importname, $file);
80 # add this directory to the list to be created
81 $dirhash->{$dst_dir} = 1;
82
83 # read in dir
84 if (!opendir (DIR, $file)) {
85 print $out "Error: Could not open directory $file\n";
86 } else {
87 my @sub_files = grep (!/^\.\.?$/, readdir (DIR));
88 closedir DIR;
89 map { $_ = &util::filename_cat($file, $_); } @sub_files;
90 &get_file_list($dirhash, $filehash, $dirname, @sub_files);
91 }
92
93 } else {
94 my $dst_file = &get_dst_dir ($full_importname, $file);
95
96 # make sure files directory is included in dirhash
97 $dirhash->{File::Basename::dirname($dst_file)} = 1;
98
99 if (-e $dst_file) {
100 # if destination file exists already we'll only copy it if
101 # the source file is newer
102 my $src_stat = stat($file);
103 my $dst_stat = stat($dst_file);
104 $filehash->{$file} = $dst_file if ($src_stat->mtime > $dst_stat->mtime);
105 } else {
106 $filehash->{$file} = $dst_file;
107 }
108 }
109 }
110}
111
112
113sub main {
114
115 if (!parsargv::parse(\@ARGV,
116 'follow_links', \$follow_links,
117 'collectdir/.*/', \$collectdir,
118 'out/.*/STDERR', \$out)) {
119 &print_usage();
120 die "\n";
121 }
122
123 if ($collectdir !~ /\w/) {
124 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
125 }
126
127 my $collection = pop @ARGV;
128
129 my $close_out = 0;
130 if ($out !~ /^(STDERR|STDOUT)$/i) {
131 open (OUT, ">$out") || die "Couldn't open output file $out\n";
132 $out = OUT;
133 $close_out = 1;
134 }
135 $out->autoflush(1);
136
137 # first compile a list of all the files we want to copy (we do it this
138 # way rather than simply copying the files as we recurse the directory
139 # structure to avoid nasty infinite recursion if the directory we're
140 # copying to happens to be a subdirectory of one of the directories
141 # we're copying from)
142 my $dirhash = {};
143 my $filehash = {};
144 &get_file_list($dirhash, $filehash, $collection, @ARGV);
145
146 # create all the required destination directories
147 my $count = 0;
148 foreach my $dir (keys %$dirhash) {
149 # check for .kill file
150 if (($count ++ % 20 == 0) &&
151 (-e &util::filename_cat($collectdir, $collection, ".kill"))) {
152 print $out "filecopy.pl killed by .kill file\n";
153 die "\n";
154 }
155 &util::mk_all_dir($dir);
156 }
157
158 # copy all the files
159 foreach my $file (keys %$filehash) {
160 # check for .kill file
161 if (($count ++ % 20 == 0) &&
162 (-e &util::filename_cat($collectdir, $collection, ".kill"))) {
163 print $out "filecopy.pl killed by .kill file\n";
164 die "\n";
165 }
166 print $out "copying $file --> $filehash->{$file}\n";
167 &util::cp($file, $filehash->{$file});
168 }
169
170 close OUT if $close_out;
171 return 0;
172}
173
174sub get_dst_dir {
175 my ($full_importname, $dir) = @_;
176
177 if ($ENV{'GSDLOS'} eq "windows") {
178 # don't want windows filenames like c:\gsdl\...\import\c:\dir
179 $dir =~ s/^[a-z]:[\\\/]//i;
180 }
181 return &util::filename_cat($full_importname, $dir);
182}
183
184&main();
Note: See TracBrowser for help on using the repository browser.