Changeset 28250


Ignore:
Timestamp:
09/10/13 15:37:45 (8 years ago)
Author:
ak19
Message:
  1. Jenny and I fixed an oversight in OAIDownload, thanks to Kathy's suggestion, where an html page's contents used to be inspected for file types specified to be downloaded with GetDocument ONLY if the file extension was htm(l). Now the test is whether the header specifies Content-Type text/html. 2. Deprecated utils functions replaced with their FileUtils equivalents.
Location:
main/trunk/greenstone2/perllib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/download.pm

    r17527 r28250  
    4444    if ($ENV{'GSDLCOLLECTDIR'}){
    4545   
    46     $coldownloadname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},
     46    $coldownloadname = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},
    4747                          "perllib","downloaders",
    4848                          "${download_name}.pm");
     
    5050    }
    5151   
    52     my $maindownloadname = &util::filename_cat($ENV{'GSDLHOME'},
     52    my $maindownloadname = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},
    5353                           "perllib","downloaders",
    5454                           "${download_name}.pm");
  • main/trunk/greenstone2/perllib/downloaders/MediaWikiDownload.pm

    r17207 r28250  
    128128    # check css files for HTML pages are downloaded as well
    129129    $self->{'url'} =~ /http:\/\/([^\/]*)\//;       
    130     my $base_url = &util::filename_cat($hashGeneralOptions->{"cache_dir"}, $1);   
     130    my $base_url = &FileUtils::filenameConcatenate($hashGeneralOptions->{"cache_dir"}, $1);   
    131131    &check_file($base_url, $self);
    132132       
  • main/trunk/greenstone2/perllib/downloaders/OAIDownload.pm

    r27012 r28250  
    112112    my $tmp_dir = "$ENV{GSDLHOME}/tmp";
    113113    if (! -e $tmp_dir) {
    114     &util::mk_dir($tmp_dir);
     114    &FileUtils::makeDirectory($tmp_dir);
    115115    }
    116116
     
    171171    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $self->{'max_records'}, $blnDownloadDoc);
    172172
    173 #    my $tmp_file = &util::filename_cat($ENV{'GSDLHOME'},"tmp","oai.tmp");
    174 #    &util::rm($tmp_file);
     173#    my $tmp_file = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'},"tmp","oai.tmp");
     174#    &FileUtils::removeFiles($tmp_file);
    175175
    176176    return 1;
     
    318318        }
    319319
     320
     321        my $is_page_html = 0;
     322        if($id_file_ext =~ m/^html?$/i) {
     323            $is_page_html = 1;
     324        } elsif ($doc_id_url) { # important: if no doc id has currently been typed into the url field, skip this block
    320325       
    321         if ((!$primary_doc_match) && ($id_file_ext =~ m/^html?$/i)) {
     326            # get the page and check the header's content-type and see if this is text/html
     327            # if so, $is_page_html is true
     328            # See http://superuser.com/questions/197009/wget-head-request
     329           
     330            my $wget_opts3 = $self->getWgetOptions();
     331            my $wget_header_cmd = "$wget_opts3 -S --spider \"$doc_id_url\"";
     332            my $page_content = $self->useWget($wget_header_cmd);           
     333           
     334            if($page_content && $page_content =~ m@Content-Type:\s*text/html@i) {               
     335                $is_page_html = 1;
     336            }
     337        }
     338       
     339        if (!$primary_doc_match && $is_page_html) {     
     340       
    322341        # Download this doc if HTML, scan through it looking for a link
    323342        # that does match get_doc_exts
     
    387406
    388407        if (-e $tmp_filename) {
    389             &util::rm($tmp_filename);
     408            &FileUtils::removeFiles($tmp_filename);
    390409        }
    391410        }
     
    474493        ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
    475494   
    476     &util::mk_all_dir($strSubDirPath);
     495    &FileUtils::makeAllDirectories($strSubDirPath);
    477496
    478497    my $ds = &util::get_dirsep();
     
    559578
    560579    #Open a temporary file to store OAI information, and store the information to the temp file
    561     my $name = &util::filename_cat($ENV{GSDLHOME},"tmp","oai.tmp");
     580    my $name = &FileUtils::filenameConcatenate($ENV{GSDLHOME},"tmp","oai.tmp");
    562581
    563582    open(*OAIOUT,"> $name");
  • main/trunk/greenstone2/perllib/downloaders/SRWDownload.pm

    r17230 r28250  
    127127    $host =~ s/http:\/\///;
    128128    $strOutputDir  =~ s/"//g; #"
    129     my $strOutputFile = &util::filename_cat($strOutputDir,$host,"$strFileName.xml");
     129    my $strOutputFile = &FileUtils::filenameConcatenate($strOutputDir,$host,"$strFileName.xml");
    130130 
    131131    # prepare subdirectory for record (if needed)
    132132
    133133    my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
    134     &util::mk_all_dir($strSubDirPath);
     134    &FileUtils::makeAllDirectories($strSubDirPath);
    135135
    136136    print STDERR "Saving records to \"$strOutputFile\"\n";
  • main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm

    r17840 r28250  
    238238    }
    239239
    240     my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
     240    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
    241241    $command = "\"$wget_file_path\" $cmdWget";
    242242    #print STDOUT "Command is: $command\n";
     
    387387    }
    388388
    389     my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
     389    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
    390390    my $command = "\"$wget_file_path\" $cmdWget";
    391391    #print STDOUT "Command is: $command\n";
     
    538538
    5395391;
     540
  • main/trunk/greenstone2/perllib/downloaders/Z3950Download.pm

    r24386 r28250  
    9797    $self->{'url'} = $self->{'host'}.":".$self->{'port'};
    9898
    99     $self->{'yaz'} = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "yaz-client");
     99    $self->{'yaz'} = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "yaz-client");
    100100   
    101101    return bless $self, $class;
     
    278278    $strOutputDir  =~ s/"//g; #"
    279279
    280     my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc");
     280    my $strOutputFile = &FileUtils::filenameConcatenate($strOutputDir,$self->{'host'},"$strFileName.marc");
    281281     # prepare subdirectory for record (if needed)
    282282    my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
    283283 
    284     &util::mk_all_dir($strSubDirPath);
     284    &FileUtils::makeAllDirectories($strSubDirPath);
    285285 
    286286    print STDERR "Saving records to \"$strOutputFile\"\n";
Note: See TracChangeset for help on using the changeset viewer.