Changeset 25224

Show
Ignore:
Timestamp:
15.03.2012 12:26:14 (8 years ago)
Author:
kjdon
Message:

removed default value for max_records option. If not specified, now this will download all records. Previously the only way to download all records was to set max_records to a bigger number than the number of records available. Also fixed a bug where if you didn't specify a place to download into, it would try to download into /path-based-on-url.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/downloaders/OAIDownload.pm

    r25199 r25224  
    7676    'desc' => "{OAIDownload.max_records}", 
    7777    'type' => "int", 
    78     'deft' => "500", 
    7978    'range' => "1,", 
    8079    'reqd' => "no"} ]; 
     
    8584        'inherits' => "yes", 
    8685        'args'     => $arguments }; 
    87  
    88 ##my $self; 
    89  
    90 #### my $strWgetOptions=""; 
    9186 
    9287sub new  
     
    119114    &util::mk_dir($tmp_dir); 
    120115    } 
    121      
     116 
     117    # if max_records not specified, parsing will have set it to "" 
     118    undef $self->{'max_records'} if $self->{'max_records'} eq ""; 
    122119 
    123120    # set up hashmap for individual items in get_doc_exts 
     
    126123    $self->{'lookup_exts'} = {}; 
    127124    my $get_doc_exts = $self->{'get_doc_exts'}; 
    128  
     125     
    129126    if ((defined $get_doc_exts) && ($get_doc_exts ne "")) { 
    130127    my @exts = split(/,\s*/,$get_doc_exts); 
     
    148145    $strOutputDir = $hashGeneralOptions->{"cache_dir"}; 
    149146    my $strBasURL = $self->{'url'}; 
    150     my $intMaxRecords = $self->{'max_records'}; 
    151147    my $blnDownloadDoc = $self->{'get_doc'}; 
    152148 
     
    163159    my $aryIDs = $self->parseOAIIDs($strIDs); 
    164160    my $intIDs = 0; 
    165     if($self->{'max_records'} < scalar(@$aryIDs)) 
     161    if(defined $self->{'max_records'} && $self->{'max_records'} < scalar(@$aryIDs)) 
    166162    { 
    167163    $intIDs = $self->{'max_records'}; 
     
    173169    print STDERR "<<Total number of record(s):$intIDs>>\n"; 
    174170 
    175     $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc); 
     171    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $self->{'max_records'}, $blnDownloadDoc); 
    176172 
    177173#    my $tmp_file = &util::filename_cat($ENV{'GSDLHOME'},"tmp","oai.tmp"); 
     
    191187   
    192188    print STDERR  "Gathering OAI identifiers.....\n"; 
    193  
     189     
    194190    my $metadata_prefix = $self->{'metadata_prefix'}; 
    195191    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=$metadata_prefix"; 
     
    218214 
    219215    $accumulated_strIDs = $strIDs; 
    220  
     216    my $max_recs = $self->{'max_records'}; 
    221217    while ($strIDs =~ m/<resumptionToken.*?>\s*(.*?)\s*<\/resumptionToken>/s) {  
    222218    # top up list with further requests for IDs 
     
    241237 
    242238    my $num_acc_identifiers = scalar(@accumulated_identifiers); 
    243     if ($num_acc_identifiers > $self->{'max_records'}) { 
     239    if (defined  $max_recs && $num_acc_identifiers > $max_recs ) { 
    244240        last; 
    245241    } 
     
    440436{ 
    441437    my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_; 
    442  
    443438    my $intDocCounter = 0; 
    444439 
     
    463458        my $host =$self->{'url'};  
    464459   
    465         $host =~ s/https?:\/\///g; 
     460        $host =~ s@https?:\/\/@@g; 
    466461 
    467462        $host =~ s/:.*//g;  
    468463 
    469     my $strFileURL = "$strOutputDir/$host/$local_id.oai"; 
    470  
     464    my $strFileURL = ""; 
     465    if ($strOutputDir ne "") { 
     466        $strFileURL = "$strOutputDir/"; 
     467    } 
     468    $strFileURL .= "$host/$local_id.oai"; 
    471469 
    472470    # prepare subdirectory for record (if needed) 
     
    490488    close(OAIOUT); 
    491489 
    492         print STDERR "Saving records to $strFileURL\n"; 
     490        print STDERR "Saving record to $strFileURL\n"; 
    493491        print STDERR "<<Done>>\n"; 
    494492    $intDocCounter ++;   
    495     last if ($intDocCounter >= $intMaxRecords); 
    496     } 
    497  
    498     ($intDocCounter >= $intMaxRecords) ?  
     493    last if (defined $intMaxRecords && $intDocCounter >= $intMaxRecords); 
     494    } 
     495 
     496    (defined $intMaxRecords && $intDocCounter >= $intMaxRecords) ?  
    499497    print  STDERR "Reached maximum download records, use -max_records to set the maximum.\n":  
    500498    print  STDERR "Complete download meta record from $strBasURL\n"; 
     
    575573##  $self->{'parser'}->parse($xml_text); 
    576574    }; 
    577      
     575 
    578576    if ($@) { 
    579577    die "OAI: Parsed file $name is not a well formed XML file ($@)\n";