Changeset 28250

Show
Ignore:
Timestamp:
10.09.2013 15:37:45 (6 years ago)
Author:
ak19
Message:

1. Jenny and I fixed an oversight in OAIDownload, thanks to Kathy's suggestion, where an html page's contents used to be inspected for file types specified to be downloaded with GetDocument? ONLY if the file extension was htm(l). Now the test is whether the header specifies Content-Type text/html. 2. Deprecated utils functions replaced with their FileUtils? equivalents.

Location:
main/trunk/greenstone2/perllib
Files:
6 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/download.pm

    r17527 r28250  
    4444    if ($ENV{'GSDLCOLLECTDIR'}){ 
    4545    
    46     $coldownloadname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},  
     46    $coldownloadname = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},  
    4747                          "perllib","downloaders",  
    4848                          "${download_name}.pm"); 
     
    5050    } 
    5151    
    52     my $maindownloadname = &util::filename_cat($ENV{'GSDLHOME'}, 
     52    my $maindownloadname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, 
    5353                           "perllib","downloaders",  
    5454                           "${download_name}.pm"); 
  • main/trunk/greenstone2/perllib/downloaders/MediaWikiDownload.pm

    r17207 r28250  
    128128    # check css files for HTML pages are downloaded as well 
    129129    $self->{'url'} =~ /http:\/\/([^\/]*)\//;         
    130     my $base_url = &util::filename_cat($hashGeneralOptions->{"cache_dir"}, $1);     
     130    my $base_url = &FileUtils::filenameConcatenate($hashGeneralOptions->{"cache_dir"}, $1);     
    131131    &check_file($base_url, $self); 
    132132         
  • main/trunk/greenstone2/perllib/downloaders/OAIDownload.pm

    r27012 r28250  
    112112    my $tmp_dir = "$ENV{GSDLHOME}/tmp"; 
    113113    if (! -e $tmp_dir) { 
    114     &util::mk_dir($tmp_dir); 
     114    &FileUtils::makeDirectory($tmp_dir); 
    115115    } 
    116116 
     
    171171    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $self->{'max_records'}, $blnDownloadDoc); 
    172172 
    173 #    my $tmp_file = &util::filename_cat($ENV{'GSDLHOME'},"tmp","oai.tmp"); 
    174 #    &util::rm($tmp_file);  
     173#    my $tmp_file = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"tmp","oai.tmp"); 
     174#    &FileUtils::removeFiles($tmp_file);  
    175175 
    176176    return 1; 
     
    318318        } 
    319319 
     320 
     321        my $is_page_html = 0; 
     322        if($id_file_ext =~ m/^html?$/i) { 
     323            $is_page_html = 1; 
     324        } elsif ($doc_id_url) { # important: if no doc id has currently been typed into the url field, skip this block 
    320325         
    321         if ((!$primary_doc_match) && ($id_file_ext =~ m/^html?$/i)) { 
     326            # get the page and check the header's content-type and see if this is text/html 
     327            # if so, $is_page_html is true 
     328            # See http://superuser.com/questions/197009/wget-head-request 
     329             
     330            my $wget_opts3 = $self->getWgetOptions(); 
     331            my $wget_header_cmd = "$wget_opts3 -S --spider \"$doc_id_url\""; 
     332            my $page_content = $self->useWget($wget_header_cmd);             
     333             
     334            if($page_content && $page_content =~ m@Content-Type:\s*text/html@i) {                
     335                $is_page_html = 1; 
     336            } 
     337        } 
     338         
     339        if (!$primary_doc_match && $is_page_html) {      
     340         
    322341        # Download this doc if HTML, scan through it looking for a link 
    323342        # that does match get_doc_exts 
     
    387406 
    388407        if (-e $tmp_filename) { 
    389             &util::rm($tmp_filename);  
     408            &FileUtils::removeFiles($tmp_filename);  
    390409        } 
    391410        } 
     
    474493        ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL); 
    475494    
    476     &util::mk_all_dir($strSubDirPath); 
     495    &FileUtils::makeAllDirectories($strSubDirPath); 
    477496 
    478497    my $ds = &util::get_dirsep(); 
     
    559578 
    560579    #Open a temporary file to store OAI information, and store the information to the temp file 
    561     my $name = &util::filename_cat($ENV{GSDLHOME},"tmp","oai.tmp");  
     580    my $name = &FileUtils::filenameConcatenate($ENV{GSDLHOME},"tmp","oai.tmp");  
    562581 
    563582    open(*OAIOUT,"> $name"); 
  • main/trunk/greenstone2/perllib/downloaders/SRWDownload.pm

    r17230 r28250  
    127127    $host =~ s/http:\/\///; 
    128128    $strOutputDir  =~ s/"//g; #" 
    129     my $strOutputFile = &util::filename_cat($strOutputDir,$host,"$strFileName.xml"); 
     129    my $strOutputFile = &FileUtils::filenameConcatenate($strOutputDir,$host,"$strFileName.xml"); 
    130130  
    131131    # prepare subdirectory for record (if needed) 
    132132 
    133133    my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile); 
    134     &util::mk_all_dir($strSubDirPath); 
     134    &FileUtils::makeAllDirectories($strSubDirPath); 
    135135 
    136136    print STDERR "Saving records to \"$strOutputFile\"\n"; 
  • main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm

    r17840 r28250  
    238238    } 
    239239 
    240     my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 
     240    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 
    241241    $command = "\"$wget_file_path\" $cmdWget"; 
    242242    #print STDOUT "Command is: $command\n"; 
     
    387387    } 
    388388 
    389     my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 
     389    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 
    390390    my $command = "\"$wget_file_path\" $cmdWget"; 
    391391    #print STDOUT "Command is: $command\n"; 
     
    538538 
    5395391; 
     540 
  • main/trunk/greenstone2/perllib/downloaders/Z3950Download.pm

    r24386 r28250  
    9797    $self->{'url'} = $self->{'host'}.":".$self->{'port'}; 
    9898 
    99     $self->{'yaz'} = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "yaz-client"); 
     99    $self->{'yaz'} = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "yaz-client"); 
    100100     
    101101    return bless $self, $class; 
     
    278278    $strOutputDir  =~ s/"//g; #" 
    279279 
    280     my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc"); 
     280    my $strOutputFile = &FileUtils::filenameConcatenate($strOutputDir,$self->{'host'},"$strFileName.marc"); 
    281281     # prepare subdirectory for record (if needed) 
    282282    my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile); 
    283283  
    284     &util::mk_all_dir($strSubDirPath); 
     284    &FileUtils::makeAllDirectories($strSubDirPath); 
    285285   
    286286    print STDERR "Saving records to \"$strOutputFile\"\n";