Changeset 20926
- Timestamp:
- 2009-11-11T11:28:33+13:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/downloaders/OAIDownload.pm
r17668 r20926 295 295 { 296 296 my $doc_id_url = $2; 297 297 print STDERR "Found doc url: $doc_id_url\n"; 298 298 next if ($doc_id_url !~ m/^(https?|ftp):\/\//); 299 299 … … 334 334 my $wget_opts2 = $self->getWgetOptions(); 335 335 my $wget_cmd2 = "$wget_opts2 --convert-links -O \"$tmp_filename\" \"$doc_id_url\""; 336 337 336 my ($stdout_and_err2,$error2,$follow2) = $self->useWgetMonitored($wget_cmd2); 338 337 return $strRecord if $self->{'forced_quit'}; … … 340 339 if($error2 ne "") 341 340 { 342 print STDERR "Error occured while retrieving OAI source documents: $error2\n"; 343 exit(-1); 341 print STDERR "Error occured while retrieving OAI source documents (1): $error2\n"; 342 #exit(-1); 343 next; 344 344 } 345 345 … … 386 386 } 387 387 else { 388 print STDERR "Error occurred while retrieving OAI source documents :\n";388 print STDERR "Error occurred while retrieving OAI source documents (2):\n"; 389 389 print STDERR "$!\n"; 390 390 } … … 402 402 my ($unused,$download_doc_file) = $self->dirFileSplit($download_doc_filename); 403 403 404 # may have ' in url - others?? 405 my $safe_doc_id_url = $doc_id_url; 406 $safe_doc_id_url =~ s/'/\'/g; 407 404 408 my $wget_opts = $self->getWgetOptions(); 405 my $wget_cmd = "$wget_opts --convert-links -O \"$download_doc_filename\" \"$ doc_id_url\"";406 409 my $wget_cmd = "$wget_opts --convert-links -O \"$download_doc_filename\" \"$safe_doc_id_url\""; 410 407 411 my ($stdout_and_err,$errors,$follow) = $self->useWgetMonitored($wget_cmd); 408 412 return $strRecord if $self->{'forced_quit'}; … … 410 414 if($errors ne "") 411 415 { 412 print STDERR "Error occured while retriving OAI souce documents :\n";416 print STDERR "Error occured while retriving OAI souce documents (3):\n"; 413 417 print STDERR "$errors\n"; 414 exit(-1); 418 #exit(-1); 419 next; 415 420 } 416 421 417 422 418 $strRecord =~ s/<metadata>(.*?)<( dc:)?identifier>$orig_doc_id_url<\/(dc:)?identifier>(.*?)<\/metadata>/<metadata>$1<${2}identifier>$orig_doc_id_url<\/${2}identifier>\n <gi.Sourcedoc>$download_doc_file<\/gi.Sourcedoc>$4<\/metadata>/s;423 $strRecord =~ s/<metadata>(.*?)<((?:dc:)?identifier)>$orig_doc_id_url<\/((?:dc:)?identifier)>(.*?)<\/metadata>/<metadata>$1<${2}>$orig_doc_id_url<\/${2}>\n <gi.Sourcedoc>$download_doc_file<\/gi.Sourcedoc>$4<\/metadata>/s; 419 424 } 420 425 … … 446 451 my $wget_opts = $self->getWgetOptions(); 447 452 my $cmdWget= "$wget_opts -q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=$metadata_prefix&identifier=$strID\""; 448 453 449 454 my $strRecord = $self->useWget($cmdWget); 450 455
Note:
See TracChangeset
for help on using the changeset viewer.