Changeset 2671
- Timestamp:
- 2001-07-27T09:17:04+12:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/grab_collection.pl
r2668 r2671 33 33 34 34 # This is where we start our mirroring 35 $address = 'http:// ginkgo.cisti.nrc.ca:8080/cgi-bin/library?a=p&p=about&c=envl&u=1';35 $address = 'http://nowhere.com/cgi-bin/library?a=p&p=about&c=demo&u=1'; 36 36 37 37 … … 125 125 $linknumber = 0; # used to name/number the dl-ed html files 126 126 127 #my $failed = 0;128 #while ($failed == 0)129 #{130 #if ($linknumber % $dir_entries == 0)131 #{132 #if (!((-e $outputdir.$linknumber)&&(-d $outputdir.$linknumber)))133 #{134 #$failed++;135 #mkdir($outputdir.$linknumber, 0777) or print " ** Cannot create ",$outputdir.$linknumber, "!: $!\n";136 #}137 #$numberdir = $linknumber;138 #}139 140 #$check_file = $outputdir.$numberdir."/".$linknumber.".html";141 #if ((-e $check_file)&&($failed == 0))142 #{143 #$linknumber++;144 #}145 #else146 #{147 #$failed++;127 my $failed = 0; 128 while ($failed == 0) 129 { 130 if ($linknumber % $dir_entries == 0) 131 { 132 if (!((-e $outputdir.$linknumber)&&(-d $outputdir.$linknumber))) 133 { 134 $failed++; 135 mkdir($outputdir.$linknumber, 0777) or print " ** Cannot create ",$outputdir.$linknumber, "!: $!\n"; 136 } 137 $numberdir = $linknumber; 138 } 139 140 $check_file = $outputdir.$numberdir."/".$linknumber.".html"; 141 if ((-e $check_file)&&($failed == 0)) 142 { 143 $linknumber++; 144 } 145 else 146 { 147 $failed++; 148 148 # I'm subtracting 1 from the starting link, 149 149 # just in case it only loaded half the page ;^) 150 #if($linknumber>0)151 #{152 #$linknumber--;153 #}154 #print " Will start downloading at number $linknumber \n";155 #}156 #}150 if($linknumber>0) 151 { 152 $linknumber--; 153 } 154 print " Will start downloading at number $linknumber \n"; 155 } 156 } 157 157 158 158 # if we're starting from scratch, then we might as well nuke the links file … … 179 179 # if we're NOT starting from scratch, then read in old links from links text file 180 180 # and grab the old image-links as well... 181 #if ($linknumber != 0)182 #{181 if ($linknumber != 0) 182 { 183 183 # load the old links from links.txt, if it doesn't exist, then give up :( 184 #my $this = "";185 #my $that = "";186 #open (CHECK, "links.txt") or die " ** Cannot find/open links.txt file!: $! **\n";187 #while(eof CHECK == 0)188 #{189 #while($this ne "\n")190 #{191 #read CHECK, $this ,1;192 #$that = $that.$this;193 #}194 #$linkz_list[$linkz_pointer] = $that;184 my $this = ""; 185 my $that = ""; 186 open (CHECK, "links.txt") or die " ** Cannot find/open links.txt file!: $! **\n"; 187 while(eof CHECK == 0) 188 { 189 while($this ne "\n") 190 { 191 read CHECK, $this ,1; 192 $that = $that.$this; 193 } 194 $linkz_list[$linkz_pointer] = $that; 195 195 196 #for my $search(0 .. (length($that) - 3))197 #{198 #if((substr($that, $search, 3) eq '?a=')||(substr($that, $search, 3) eq '&a='))199 #{200 #$short_linkz_list[$linkz_pointer] = substr($that, $search);201 #last;202 #}203 #}204 #$linkz_pointer++;205 #$that = ""; $this = "";206 #}207 #close(CHECK);208 #print "- I found ",($#linkz_list + 1)," links in links.txt -\n";196 for my $search(0 .. (length($that) - 3)) 197 { 198 if((substr($that, $search, 3) eq '?a=')||(substr($that, $search, 3) eq '&a=')) 199 { 200 $short_linkz_list[$linkz_pointer] = substr($that, $search); 201 last; 202 } 203 } 204 $linkz_pointer++; 205 $that = ""; $this = ""; 206 } 207 close(CHECK); 208 print "- I found ",($#linkz_list + 1)," links in links.txt -\n"; 209 209 210 210 #make sure that we start dl-ing the correct first page 211 #$address = $linkz_list[$linknumber];211 $address = $linkz_list[$linknumber]; 212 212 213 213 # load the old image links from image.txt (if it doesn't exist, no big deal ;) 214 #my $im_this = "";215 #my $im_that = "";216 #open (IMAGES, "images.txt") || print " ** Cannot find/open images.txt file! : $! **\n";217 #while(eof IMAGES == 0)218 #{219 #while($im_this ne "\n")220 #{221 #read IMAGES, $im_this ,1;222 #$im_that = $im_that.$im_this;223 #}224 #$image_list[$image_pointer] = $im_that;225 #$image_pointer++;226 #$im_that = ""; $im_this = "";227 #}228 #close(IMAGES);229 #print "- I found ",($#image_list + 1)," picture-links in images.txt -\n";214 my $im_this = ""; 215 my $im_that = ""; 216 open (IMAGES, "images.txt") || print " ** Cannot find/open images.txt file! : $! **\n"; 217 while(eof IMAGES == 0) 218 { 219 while($im_this ne "\n") 220 { 221 read IMAGES, $im_this ,1; 222 $im_that = $im_that.$im_this; 223 } 224 $image_list[$image_pointer] = $im_that; 225 $image_pointer++; 226 $im_that = ""; $im_this = ""; 227 } 228 close(IMAGES); 229 print "- I found ",($#image_list + 1)," picture-links in images.txt -\n"; 230 230 231 231 #..and last but not least, load any image_dirs from image_dirs.txt 232 232 # again, if its not there, no big deal :) 233 #my $imd_this = "";234 #my $imd_that = "";235 #open (IMAGE_DIR, "image_dirs.txt") || print " ** Cannot find/open image_dirs.txt file!: $! **\n";236 #while(eof IMAGE_DIR == 0)237 #{238 #while($imd_this ne "\n")239 #{240 #read IMAGE_DIR, $imd_this ,1;241 #$imd_that = $imd_that.$imd_this;242 #}243 #$image_dirs_list[$image_dirs_pointer] = $imd_that;244 #$image_dirs_pointer++;245 #$imd_that = ""; $imd_this = "";246 #}247 #close(IMAGE_DIR);248 #print "- I found ",($#image_dirs_list + 1)," picture directories in image_dirs.txt -\n";249 #}233 my $imd_this = ""; 234 my $imd_that = ""; 235 open (IMAGE_DIR, "image_dirs.txt") || print " ** Cannot find/open image_dirs.txt file!: $! **\n"; 236 while(eof IMAGE_DIR == 0) 237 { 238 while($imd_this ne "\n") 239 { 240 read IMAGE_DIR, $imd_this ,1; 241 $imd_that = $imd_that.$imd_this; 242 } 243 $image_dirs_list[$image_dirs_pointer] = $imd_that; 244 $image_dirs_pointer++; 245 $imd_that = ""; $imd_this = ""; 246 } 247 close(IMAGE_DIR); 248 print "- I found ",($#image_dirs_list + 1)," picture directories in image_dirs.txt -\n"; 249 } 250 250 251 251 # Just keep going till we can find no more new links … … 291 291 } 292 292 293 # ignore mailto urls 294 if ($data[$i] !~ /mailto:/i) { 293 295 294 #----------- the link is NOT an image ---------------- 295 if ($its_an_image == 0) 296 { 297 # &its_a_link($temp[1], $outputdir); 298 &its_a_link($data[$i], $outputdir); 299 } 300 301 #----------- the link IS an image ---------------- 302 if ($its_an_image != 0) 303 { 304 # &its_an_image($temp[1], $finaldir); 305 &its_an_image($data[$i], $finaldir); 296 #----------- the link is NOT an image ---------------- 297 if ($its_an_image == 0) 298 { 299 &its_a_link($data[$i], $outputdir); 300 } 301 302 #----------- the link IS an image ---------------- 303 if ($its_an_image != 0) 304 { 305 &its_an_image($data[$i], $finaldir); 306 } 306 307 } 307 308 }
Note:
See TracChangeset
for help on using the changeset viewer.