Changeset 25224
- Timestamp:
- 2012-03-15T12:26:14+13:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/downloaders/OAIDownload.pm
r25199 r25224 76 76 'desc' => "{OAIDownload.max_records}", 77 77 'type' => "int", 78 'deft' => "500",79 78 'range' => "1,", 80 79 'reqd' => "no"} ]; … … 85 84 'inherits' => "yes", 86 85 'args' => $arguments }; 87 88 ##my $self;89 90 #### my $strWgetOptions="";91 86 92 87 sub new … … 119 114 &util::mk_dir($tmp_dir); 120 115 } 121 116 117 # if max_records not specified, parsing will have set it to "" 118 undef $self->{'max_records'} if $self->{'max_records'} eq ""; 122 119 123 120 # set up hashmap for individual items in get_doc_exts … … 126 123 $self->{'lookup_exts'} = {}; 127 124 my $get_doc_exts = $self->{'get_doc_exts'}; 128 125 129 126 if ((defined $get_doc_exts) && ($get_doc_exts ne "")) { 130 127 my @exts = split(/,\s*/,$get_doc_exts); … … 148 145 $strOutputDir = $hashGeneralOptions->{"cache_dir"}; 149 146 my $strBasURL = $self->{'url'}; 150 my $intMaxRecords = $self->{'max_records'};151 147 my $blnDownloadDoc = $self->{'get_doc'}; 152 148 … … 163 159 my $aryIDs = $self->parseOAIIDs($strIDs); 164 160 my $intIDs = 0; 165 if( $self->{'max_records'} < scalar(@$aryIDs))161 if(defined $self->{'max_records'} && $self->{'max_records'} < scalar(@$aryIDs)) 166 162 { 167 163 $intIDs = $self->{'max_records'}; … … 173 169 print STDERR "<<Total number of record(s):$intIDs>>\n"; 174 170 175 $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $ intMaxRecords, $blnDownloadDoc);171 $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $self->{'max_records'}, $blnDownloadDoc); 176 172 177 173 # my $tmp_file = &util::filename_cat($ENV{'GSDLHOME'},"tmp","oai.tmp"); … … 191 187 192 188 print STDERR "Gathering OAI identifiers.....\n"; 193 189 194 190 my $metadata_prefix = $self->{'metadata_prefix'}; 195 191 $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=$metadata_prefix"; … … 218 214 219 215 $accumulated_strIDs = $strIDs; 220 216 my $max_recs = $self->{'max_records'}; 221 217 while ($strIDs =~ m/<resumptionToken.*?>\s*(.*?)\s*<\/resumptionToken>/s) { 222 218 # top up list with further requests for IDs … … 241 237 242 238 my $num_acc_identifiers = scalar(@accumulated_identifiers); 243 if ( $num_acc_identifiers > $self->{'max_records'}) {239 if (defined $max_recs && $num_acc_identifiers > $max_recs ) { 244 240 last; 245 241 } … … 440 436 { 441 437 my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_; 442 443 438 my $intDocCounter = 0; 444 439 … … 463 458 my $host =$self->{'url'}; 464 459 465 $host =~ s /https?:\/\///g;460 $host =~ s@https?:\/\/@@g; 466 461 467 462 $host =~ s/:.*//g; 468 463 469 my $strFileURL = "$strOutputDir/$host/$local_id.oai"; 470 464 my $strFileURL = ""; 465 if ($strOutputDir ne "") { 466 $strFileURL = "$strOutputDir/"; 467 } 468 $strFileURL .= "$host/$local_id.oai"; 471 469 472 470 # prepare subdirectory for record (if needed) … … 490 488 close(OAIOUT); 491 489 492 print STDERR "Saving record sto $strFileURL\n";490 print STDERR "Saving record to $strFileURL\n"; 493 491 print STDERR "<<Done>>\n"; 494 492 $intDocCounter ++; 495 last if ( $intDocCounter >= $intMaxRecords);496 } 497 498 ( $intDocCounter >= $intMaxRecords) ?493 last if (defined $intMaxRecords && $intDocCounter >= $intMaxRecords); 494 } 495 496 (defined $intMaxRecords && $intDocCounter >= $intMaxRecords) ? 499 497 print STDERR "Reached maximum download records, use -max_records to set the maximum.\n": 500 498 print STDERR "Complete download meta record from $strBasURL\n"; … … 575 573 ## $self->{'parser'}->parse($xml_text); 576 574 }; 577 575 578 576 if ($@) { 579 577 die "OAI: Parsed file $name is not a well formed XML file ($@)\n";
Note:
See TracChangeset
for help on using the changeset viewer.