Ignore:
Timestamp:
2011-08-09T22:21:36+12:00 (13 years ago)
Author:
ak19
Message:

Fixed several bugs in Z3950 download. The main bug had to do with how the Library of Congress is returning records in XML format, instead of whichever format other sources have been returning and for which the code so far was set up to work with. Now it works with the XML returned, however, the MARC record returned (which is in XML) does not explode properly. Other bugs have to do with the Server Information button in GLI failing for Z3950 and an ugly 2-level nested folder structure being created to store the files containing records returned: we don't want a folder called http.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/downloaders/Z3950Download.pm

    r17284 r24379  
    251251        $strShow = "show $intStartNumber+$intRecordsLeft";
    252252        $intRecordsLeft = 0;
    253        
    254            }
     253        
     254    }
    255255   
    256256    $strResponse .= $self->get($strShow,$numRecords);
    257          
     257
    258258    if ($strResponse eq ""){
    259259        print STDERR "<<ERROR: failed to get $numRecords records>>\n";
     
    276276    my $strFileName = $self->generateFileName($intMaxRecords);
    277277
    278     $strOutputDir  =~ s/"//g; #"
    279 
    280     my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc");
     278    $strOutputDir =~ s/"//g; #"
     279
     280    # remove any http:// prefix from the hostname to generate folder-name to store records in
     281    my $foldername = $self->{'host'};
     282    $foldername =~ s@^http:\/\/(.*)@$1@;
     283
     284    my $strOutputFile = &util::filename_cat($strOutputDir,$foldername,"$strFileName.marc");
    281285     # prepare subdirectory for record (if needed)
    282286    my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
     
    316320   while (my $strLine = <$output>)
    317321   {
    318    
    319322       if ($strLine =~ m/Records: ([\d]*)/i ){
    320323       $readRecord = 1;
    321324       next; 
     325       } elsif ($strLine =~ m/<record/i){ # XML output such as from Library of Congress
     326       $strFullOutput .= $strLine;
     327       $readRecord = 1;
     328       next;
    322329       }
    323330     
    324       return $strFullOutput if ($strLine =~ m/nextResultSetPosition|Not connected/i);
     331       if ($strLine =~ m/\<\/record\>/i) { # end of XML, include the closing tag and then terminate below
     332       $strFullOutput .= $strLine;
     333       }
     334
     335      return $strFullOutput if ($strLine =~ m/nextResultSetPosition|Not connected|\<\/record\>/i);
    325336       
    326337      next if(!$readRecord);
     
    389400   $url = $self->{'url'} unless defined $url;
    390401
    391    my $strOpen = $self->start_yaz();
    392 
    393    $strOpen = $self->run_command_with_output("open $url","^Options"); 
     402   my $strOpen = $self->start_yaz($url);
     403
     404   $strOpen = $self->run_command_with_output("open $url","^Options|Connecting...OK."); 
    394405
    395406   $strOpen =~ s/Z> //g;
Note: See TracChangeset for help on using the changeset viewer.