Changeset 1709
- Timestamp:
- 2000-11-28T15:59:09+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/build
r1678 r1709 144 144 $download_dir =~ s/\s+$//; 145 145 146 if ($download_dir =~ /^http:\/\//) { 147 # http download 148 149 } elsif ($download_dir =~ /^ftp:\/\//) { 150 # ftp download 151 146 if ($download_dir =~ /^(http|ftp):\/\//) { 147 # use wget to mirror http or ftp urls 148 # options used are: 149 # -P = the directory to download documents to 150 # -np = don't ascend to parent directories. this means that only documents 151 # that live in the same directory or below on the same server as 152 # the given url will be downloaded 153 # -nv = not too verbose 154 # -r = recursively mirror 155 # -N = use time-stamping to see if an up-to-date local copy of each 156 # file already exists. this may be useful if wget fails and 157 # is restarted 158 # -l inf = infinite recursion depth 159 # -R "*\?*" = don't download cgi based urls 160 # -o = the output file to write download status to (only used if the -out 161 # option was given to build) 162 my $download_cmd = "perl -S gsWget.pl -P \"$importdir\" -np -nv"; 163 $download .= " -r -N -l inf -R \"*\?*\""; 164 $download_cmd .= " -o \"$outfile.download\"" if $use_out; 165 $download_cmd .= " \"$download_dir\""; 166 system ($download_cmd); 167 168 # note that wget obeys the robot rules. this means that it will have 169 # downloaded a robots.txt file if one was present. since it's unlikely 170 # anyone really wants to include it in a collection we'll delete it. 171 # robots.txt shouldn't be more than two directories deep (I think it will 172 # always be exactly two deep but will look for it in the top directory too) 173 # so that's as far as we'll go looking for it. 174 if (opendir (DIR, $importdir)) { 175 my @files = readdir DIR; 176 closedir DIR; 177 foreach my $file (@files) { 178 next if $file =~ /^\.\.?$/; 179 if ($file =~ /^robots.txt$/i) { 180 &util::rm (&util::filename_cat ($importdir, $file)); 181 last; 182 } else { 183 $file = &util:filename_cat ($importdir, $file); 184 if (-d $file) { 185 if (opendir (DIR, $file)) { 186 my @2files = readdir DIR; 187 closedir DIR; 188 foreach my $2file (@2files) { 189 if ($2file =~ /^robots.txt$/i) { 190 &util::rm (&util::filename_cat ($file, $2file)); 191 last; 192 } 193 } 194 } 195 } 196 } 197 } 198 } 199 200 # if using output directory append the file download output to it 201 &append_file ($out, "$outfile.download"); 202 152 203 } else { 153 204 # we assume anything not beginning with http:// or ftp://
Note:
See TracChangeset
for help on using the changeset viewer.