Changeset 25224


Ignore:
Timestamp:
2012-03-15T12:26:14+13:00 (12 years ago)
Author:
kjdon
Message:

removed default value for max_records option. If not specified, now this will download all records. Previously the only way to download all records was to set max_records to a bigger number than the number of records available. Also fixed a bug where if you didn't specify a place to download into, it would try to download into /path-based-on-url.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/downloaders/OAIDownload.pm

    r25199 r25224  
    7676    'desc' => "{OAIDownload.max_records}",
    7777    'type' => "int",
    78     'deft' => "500",
    7978    'range' => "1,",
    8079    'reqd' => "no"} ];
     
    8584        'inherits' => "yes",
    8685        'args'     => $arguments };
    87 
    88 ##my $self;
    89 
    90 #### my $strWgetOptions="";
    9186
    9287sub new
     
    119114    &util::mk_dir($tmp_dir);
    120115    }
    121    
     116
     117    # if max_records not specified, parsing will have set it to ""
     118    undef $self->{'max_records'} if $self->{'max_records'} eq "";
    122119
    123120    # set up hashmap for individual items in get_doc_exts
     
    126123    $self->{'lookup_exts'} = {};
    127124    my $get_doc_exts = $self->{'get_doc_exts'};
    128 
     125   
    129126    if ((defined $get_doc_exts) && ($get_doc_exts ne "")) {
    130127    my @exts = split(/,\s*/,$get_doc_exts);
     
    148145    $strOutputDir = $hashGeneralOptions->{"cache_dir"};
    149146    my $strBasURL = $self->{'url'};
    150     my $intMaxRecords = $self->{'max_records'};
    151147    my $blnDownloadDoc = $self->{'get_doc'};
    152148
     
    163159    my $aryIDs = $self->parseOAIIDs($strIDs);
    164160    my $intIDs = 0;
    165     if($self->{'max_records'} < scalar(@$aryIDs))
     161    if(defined $self->{'max_records'} && $self->{'max_records'} < scalar(@$aryIDs))
    166162    {
    167163    $intIDs = $self->{'max_records'};
     
    173169    print STDERR "<<Total number of record(s):$intIDs>>\n";
    174170
    175     $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
     171    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $self->{'max_records'}, $blnDownloadDoc);
    176172
    177173#    my $tmp_file = &util::filename_cat($ENV{'GSDLHOME'},"tmp","oai.tmp");
     
    191187 
    192188    print STDERR  "Gathering OAI identifiers.....\n";
    193 
     189   
    194190    my $metadata_prefix = $self->{'metadata_prefix'};
    195191    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=$metadata_prefix";
     
    218214
    219215    $accumulated_strIDs = $strIDs;
    220 
     216    my $max_recs = $self->{'max_records'};
    221217    while ($strIDs =~ m/<resumptionToken.*?>\s*(.*?)\s*<\/resumptionToken>/s) {
    222218    # top up list with further requests for IDs
     
    241237
    242238    my $num_acc_identifiers = scalar(@accumulated_identifiers);
    243     if ($num_acc_identifiers > $self->{'max_records'}) {
     239    if (defined  $max_recs && $num_acc_identifiers > $max_recs ) {
    244240        last;
    245241    }
     
    440436{
    441437    my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
    442 
    443438    my $intDocCounter = 0;
    444439
     
    463458        my $host =$self->{'url'};
    464459 
    465         $host =~ s/https?:\/\///g;
     460        $host =~ s@https?:\/\/@@g;
    466461
    467462        $host =~ s/:.*//g;
    468463
    469     my $strFileURL = "$strOutputDir/$host/$local_id.oai";
    470 
     464    my $strFileURL = "";
     465    if ($strOutputDir ne "") {
     466        $strFileURL = "$strOutputDir/";
     467    }
     468    $strFileURL .= "$host/$local_id.oai";
    471469
    472470    # prepare subdirectory for record (if needed)
     
    490488    close(OAIOUT);
    491489
    492         print STDERR "Saving records to $strFileURL\n";
     490        print STDERR "Saving record to $strFileURL\n";
    493491        print STDERR "<<Done>>\n";
    494492    $intDocCounter ++; 
    495     last if ($intDocCounter >= $intMaxRecords);
    496     }
    497 
    498     ($intDocCounter >= $intMaxRecords) ?
     493    last if (defined $intMaxRecords && $intDocCounter >= $intMaxRecords);
     494    }
     495
     496    (defined $intMaxRecords && $intDocCounter >= $intMaxRecords) ?
    499497    print  STDERR "Reached maximum download records, use -max_records to set the maximum.\n":
    500498    print  STDERR "Complete download meta record from $strBasURL\n";
     
    575573##  $self->{'parser'}->parse($xml_text);
    576574    };
    577    
     575
    578576    if ($@) {
    579577    die "OAI: Parsed file $name is not a well formed XML file ($@)\n";
Note: See TracChangeset for help on using the changeset viewer.