Changeset 2671


Ignore:
Timestamp:
2001-07-27T09:17:04+12:00 (23 years ago)
Author:
sjboddie
Message:

* empty log message *

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/grab_collection.pl

    r2668 r2671  
    3333
    3434# This is where we start our mirroring
    35 $address = 'http://ginkgo.cisti.nrc.ca:8080/cgi-bin/library?a=p&p=about&c=envl&u=1';
     35$address = 'http://nowhere.com/cgi-bin/library?a=p&p=about&c=demo&u=1';
    3636
    3737
     
    125125$linknumber    = 0;                    # used to name/number the dl-ed html files
    126126
    127 #my $failed     = 0;
    128 #while ($failed == 0)
    129 #{
    130 #    if ($linknumber % $dir_entries == 0)
    131 #   {
    132 #       if (!((-e $outputdir.$linknumber)&&(-d $outputdir.$linknumber)))
    133 #       {
    134 #       $failed++;
    135 #       mkdir($outputdir.$linknumber, 0777) or print " ** Cannot create ",$outputdir.$linknumber, "!: $!\n";
    136 #       }
    137 #       $numberdir = $linknumber;
    138 #   }
    139 
    140 #    $check_file = $outputdir.$numberdir."/".$linknumber.".html";
    141 #    if ((-e $check_file)&&($failed == 0))
    142 #    {
    143 #   $linknumber++;
    144 #    }
    145 #    else
    146 #    {
    147 #   $failed++;
     127my $failed     = 0;
     128while ($failed == 0)
     129{
     130    if ($linknumber % $dir_entries == 0)
     131    {
     132        if (!((-e $outputdir.$linknumber)&&(-d $outputdir.$linknumber)))
     133        {
     134        $failed++;
     135        mkdir($outputdir.$linknumber, 0777) or print " ** Cannot create ",$outputdir.$linknumber, "!: $!\n";
     136        }
     137        $numberdir = $linknumber;
     138    }
     139
     140    $check_file = $outputdir.$numberdir."/".$linknumber.".html";
     141    if ((-e $check_file)&&($failed == 0))
     142    {
     143    $linknumber++;
     144    }
     145    else
     146    {
     147    $failed++;
    148148    # I'm subtracting 1 from the starting link,
    149149    # just in case it only loaded half the page ;^)
    150 #   if($linknumber>0)
    151 #   {
    152 #       $linknumber--;
    153 #   }
    154 #   print " Will start downloading at number $linknumber \n";
    155 #    }
    156 #}
     150    if($linknumber>0)
     151    {
     152        $linknumber--;
     153    }
     154    print " Will start downloading at number $linknumber \n";
     155    }
     156}
    157157
    158158# if we're starting from scratch, then we might as well nuke the links file
     
    179179# if we're NOT starting from scratch, then read in old links from links text file
    180180# and grab the old image-links as well...
    181 #if ($linknumber != 0)
    182 #{
     181if ($linknumber != 0)
     182{
    183183    # load the old links from links.txt, if it doesn't exist, then give up :(
    184 #    my $this = "";
    185 #    my $that = "";
    186 #    open (CHECK, "links.txt") or die " ** Cannot find/open links.txt file!: $! **\n";
    187 #    while(eof CHECK == 0)
    188 #    {
    189 #   while($this ne "\n")
    190 #   {
    191 #       read CHECK, $this ,1;
    192 #       $that = $that.$this;   
    193 #   }
    194 #   $linkz_list[$linkz_pointer] = $that;
     184    my $this = "";
     185    my $that = "";
     186    open (CHECK, "links.txt") or die " ** Cannot find/open links.txt file!: $! **\n";
     187    while(eof CHECK == 0)
     188    {
     189    while($this ne "\n")
     190    {
     191        read CHECK, $this ,1;
     192        $that = $that.$this;   
     193    }
     194    $linkz_list[$linkz_pointer] = $that;
    195195   
    196 #   for my $search(0 .. (length($that) - 3))
    197 #   {
    198 #       if((substr($that, $search, 3) eq '?a=')||(substr($that, $search, 3) eq '&a='))
    199 #       {
    200 #       $short_linkz_list[$linkz_pointer] = substr($that, $search);
    201 #       last;
    202 #       }
    203 #   }
    204 #   $linkz_pointer++;
    205 #   $that = ""; $this = "";
    206 #    }
    207 #    close(CHECK);
    208 #    print "- I found ",($#linkz_list + 1)," links in links.txt -\n";
     196    for my $search(0 .. (length($that) - 3))
     197    {
     198        if((substr($that, $search, 3) eq '?a=')||(substr($that, $search, 3) eq '&a='))
     199        {
     200        $short_linkz_list[$linkz_pointer] = substr($that, $search);
     201        last;
     202        }
     203    }
     204    $linkz_pointer++;
     205    $that = ""; $this = "";
     206    }
     207    close(CHECK);
     208    print "- I found ",($#linkz_list + 1)," links in links.txt -\n";
    209209   
    210210    #make sure that we start dl-ing the correct first page
    211 #    $address = $linkz_list[$linknumber];
     211    $address = $linkz_list[$linknumber];
    212212
    213213    # load the old image links from image.txt (if it doesn't exist, no big deal ;)
    214 #    my $im_this = "";
    215 #    my $im_that = "";
    216 #    open (IMAGES, "images.txt") || print " ** Cannot find/open images.txt file! : $! **\n";
    217 #    while(eof IMAGES == 0)
    218 #    {
    219 #   while($im_this ne "\n")
    220 #   {
    221 #       read IMAGES, $im_this ,1;
    222 #       $im_that = $im_that.$im_this;   
    223 #   }
    224 #   $image_list[$image_pointer] = $im_that;
    225 #   $image_pointer++;
    226 #   $im_that = ""; $im_this = "";
    227 #    }
    228 #    close(IMAGES);
    229 #    print "- I found ",($#image_list + 1)," picture-links in images.txt -\n";
     214    my $im_this = "";
     215    my $im_that = "";
     216    open (IMAGES, "images.txt") || print " ** Cannot find/open images.txt file! : $! **\n";
     217    while(eof IMAGES == 0)
     218    {
     219    while($im_this ne "\n")
     220    {
     221        read IMAGES, $im_this ,1;
     222        $im_that = $im_that.$im_this;   
     223    }
     224    $image_list[$image_pointer] = $im_that;
     225    $image_pointer++;
     226    $im_that = ""; $im_this = "";
     227    }
     228    close(IMAGES);
     229    print "- I found ",($#image_list + 1)," picture-links in images.txt -\n";
    230230
    231231    #..and last but not least, load any image_dirs from image_dirs.txt
    232232    # again, if its not there, no big deal :)
    233 #    my $imd_this = "";
    234 #    my $imd_that = "";
    235 #    open (IMAGE_DIR, "image_dirs.txt") || print " ** Cannot find/open image_dirs.txt file!: $! **\n";
    236 #    while(eof IMAGE_DIR == 0)
    237 #    {
    238 #   while($imd_this ne "\n")
    239 #   {
    240 #       read IMAGE_DIR, $imd_this ,1;
    241 #       $imd_that = $imd_that.$imd_this;   
    242 #   }
    243 #   $image_dirs_list[$image_dirs_pointer] = $imd_that;
    244 #   $image_dirs_pointer++;
    245 #   $imd_that = ""; $imd_this = "";
    246 #    }
    247 #    close(IMAGE_DIR);
    248 #    print "- I found ",($#image_dirs_list + 1)," picture directories in image_dirs.txt -\n";
    249 #}
     233    my $imd_this = "";
     234    my $imd_that = "";
     235    open (IMAGE_DIR, "image_dirs.txt") || print " ** Cannot find/open image_dirs.txt file!: $! **\n";
     236    while(eof IMAGE_DIR == 0)
     237    {
     238    while($imd_this ne "\n")
     239    {
     240        read IMAGE_DIR, $imd_this ,1;
     241        $imd_that = $imd_that.$imd_this;   
     242    }
     243    $image_dirs_list[$image_dirs_pointer] = $imd_that;
     244    $image_dirs_pointer++;
     245    $imd_that = ""; $imd_this = "";
     246    }
     247    close(IMAGE_DIR);
     248    print "- I found ",($#image_dirs_list + 1)," picture directories in image_dirs.txt -\n";
     249}
    250250
    251251#  Just keep going till we can find no more new links
     
    291291    }
    292292
     293    # ignore mailto urls
     294    if ($data[$i] !~ /mailto:/i) {
    293295   
    294         #----------- the link is NOT an image ----------------     
    295     if ($its_an_image == 0)
    296     {
    297 #       &its_a_link($temp[1], $outputdir);
    298         &its_a_link($data[$i], $outputdir);
    299     }
    300 
    301         #----------- the link IS an image ----------------
    302     if ($its_an_image != 0)
    303     {
    304 #       &its_an_image($temp[1], $finaldir);
    305         &its_an_image($data[$i], $finaldir);
     296        #----------- the link is NOT an image ----------------     
     297        if ($its_an_image == 0)
     298        {
     299        &its_a_link($data[$i], $outputdir);
     300        }
     301
     302        #----------- the link IS an image ----------------
     303        if ($its_an_image != 0)
     304        {
     305        &its_an_image($data[$i], $finaldir);
     306        }
    306307    }
    307308    }   
Note: See TracChangeset for help on using the changeset viewer.