Changeset 12465


Ignore:
Timestamp:
2006-08-18T09:24:29+12:00 (18 years ago)
Author:
shaoqun
Message:

fixed th bugs on windows

Location:
trunk/gsdl/perllib
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/download.pm

    r11784 r12465  
    3636sub load_download {
    3737    my ($download_name,$download_options) = @_;
     38
    3839    my ($download_obj);
    3940
    40     my $coldownloadname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},
     41    my $coldownloadname ="";
     42
     43    if ($ENV{'GSDLCOLLECTDIR'}){
     44   
     45    $coldownloadname = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},
    4146                          "perllib/downloaders",
    4247                          "${download_name}.pm");
     48
     49   }
     50   
    4351    my $maindownloadname = &util::filename_cat($ENV{'GSDLHOME'},
    4452                           "perllib/downloaders",
    4553                           "${download_name}.pm");
     54
    4655    if (-e $coldownloadname) { require $coldownloadname;}
    4756    elsif (-e $maindownloadname ) { require $maindownloadname; }
  • trunk/gsdl/perllib/downloaders/OAIDownload.pm

    r11783 r12465  
    3737use XMLParser;
    3838
    39 use IO::File;
    4039use POSIX qw(tmpnam);
     40use util;
    4141
    4242sub BEGIN {
     
    5656    'reqd' => "no"},
    5757      { 'name' => "get_doc",
    58     'disp' => "{OAIDownload.qet_doc_disp}",
     58    'disp' => "{OAIDownload.get_doc_disp}",
    5959    'desc' => "{OAIDownload.get_doc}",
    6060    'type' => "flag",
     
    7676my $self;
    7777
     78my $strWgetOptions="";
     79
    7880sub new
    7981{
     
    107109    my ($hashGeneralOptions) = @_;
    108110
    109     # Checking if the wget has been well setup
    110     # &WgetDownload::checkWgetSetup($self,$hashGeneralOptions->{'gli_call'});
    111    
    112     my $strOutputDir = $hashGeneralOptions->{"cache_dir"};
     111    print STDERR "here2";
     112   
     113    $strWgetOptions = $self->getWgetOptions();
     114    my $cmdWget = $strWgetOptions;
     115 
     116    my $strOutputDir ="";
     117    $strOutputDir = $hashGeneralOptions->{"cache_dir"};
    113118    my $strBasURL = $self->{'url'};
    114119    my $intMaxRecords = $self->{'max_records'};
     
    116121
    117122    print STDERR "<<Defined Maximum>>\n";
    118     my $strIDs = &getOAIIDs($self,$strBasURL);
    119     if($strIDs eq "")
     123
     124    my $strIDs = $self->getOAIIDs($strBasURL);
     125 
     126   if($strIDs eq "")
    120127    {
    121128    print STDERR "Error: No ID being found\n";
    122129    return 0;
    123130    }
    124     my $aryIDs = &parseOAIIDs($strIDs);
     131    my $aryIDs = $self->parseOAIIDs($strIDs);
    125132    my $intIDs = 0;
    126133    if($self->{'max_records'} < scalar(@$aryIDs))
     
    134141    print STDERR "<<Total number of record(s):$intIDs>>\n";
    135142
    136     &getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
     143    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
     144
     145    my $tmp_file = "$ENV{GSDLHOME}/tmp/oai.tmp";
     146    &util::rm($tmp_file);
    137147
    138148    return 1;
     
    143153    my ($self,$strBasURL) = @_;
    144154    my ($cmdWget);
     155     
     156    my $wgetOptions = $self->getWgetOptions();
     157
     158    $cmdWget = $wgetOptions;
     159 
    145160    print STDERR  "Gathering OAI identifiers.....\n";
     161
    146162    if($self->{'set'} ne "")
    147163    {
    148     $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" ";
     164    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" ";
    149165    }
    150166    else
    151167    {
    152     $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" ";
    153     }
    154     my $strIDs =  &WgetDownload::useWget($cmdWget);
     168    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" ";
     169    }
     170
     171 
     172    my $strIDs =  $self->useWget($cmdWget);
     173
     174    if (!defined $strIDs or $strIDs eq ""  ){
     175    print STDERR "Server information is unavailable.\n";
     176    print STDERR "<<Finished>>\n";
     177        return; 
     178    }
     179
     180    print STDERR "<<Download Information>>\n";
     181
     182    $self->parse_xml($strIDs);
     183
    155184    return $strIDs;
    156185}
     
    158187sub parseOAIIDs
    159188{   
    160     my ($strIDs) = @_;
     189    my ($self,$strIDs) = @_;
    161190
    162191    print STDERR "Parsing OAI identifiers.....\n";
     
    177206sub dirFileSplit
    178207{
    179     my ($strFile) = @_;
    180 
    181     my @aryDirs = split("/",$strFile);
     208    my ($self,$strFile) = @_;
     209
     210    my @aryDirs = split("[/\]",$strFile);
     211   
    182212    my $strLocalFile = pop(@aryDirs);
    183213    my $strSubDirs = join("/",@aryDirs);
     
    188218sub getOAIDoc
    189219{
    190     my ($strRecord, $strSubDirPath) = @_;
    191     
     220    my ($self,$strRecord, $strSubDirPath) = @_;
     221 
    192222    print  STDERR "Gathering source documents.....\n";
    193223    # look out for identifier tag in metadata section
     224   
    194225    if ($strRecord =~ m/<metadata>(.*)<\/metadata>/s)
    195226    {
     
    200231        my $strDocURL = $2;
    201232
    202         my ($unused,$strDocFile) = dirFileSplit($strDocURL);
    203 
    204         my $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs");
     233        my ($unused,$strDocFile) = $self->dirFileSplit($strDocURL);
     234
     235            my $strSoureDirPath ="";
     236
     237        $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs");
     238
    205239        &util::mk_dir($strSoureDirPath)  if (!-e "$strSoureDirPath");
    206240       
    207241        my $strFullDocFilePath = &util::filename_cat($strSoureDirPath,$strDocFile);
    208242       
    209         my $wget_cmd = "-q -O $strFullDocFilePath \"$strDocURL\"";
    210 
    211         my $strResponse =  &WgetDownload::useWget($wget_cmd,1);
     243        my $wget_cmd = $strWgetOptions." -q -O $strFullDocFilePath \"$strDocURL\"";
     244
     245        my $strResponse =  $self->useWget($wget_cmd,1);
    212246
    213247        if($strResponse ne "")
     
    233267sub getOAIRecords
    234268{
    235     my ($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
     269    my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
    236270
    237271    my $intDocCounter = 0;
     
    240274    {
    241275    print  STDERR "Gathering OAI record with ID:$strID.....\n";
    242     # wget it;
    243     my $cmdWget= "-q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\"";
    244     my $strRecord =  &WgetDownload::useWget($cmdWget);
     276       
     277    my $cmdWget= $strWgetOptions." -q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\"";
     278
     279    my $strRecord =  $self->useWget($cmdWget);
     280
     281       
     282        my @fileDirs = split(":",$strID); 
    245283
    246284    # setup directories
    247     my $strFileURL = "$strOutputDir/$strID.oai";
    248     $strFileURL =~ s/:/\//g;
    249 
     285
     286        $strOutputDir  =~ s/"//g;
     287 
     288    my $strFileURL = "$strOutputDir/$fileDirs[0]/$fileDirs[1].oai";
     289   
    250290    # prepare subdirectory for record (if needed)
    251     my ($strSubDirPath,$unused) = dirFileSplit($strFileURL);
     291    my ($strSubDirPath,$unused) = ("", "");
     292
     293        ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
     294   
    252295    &util::mk_all_dir($strSubDirPath);
    253296
    254297    my $ds = &util::get_dirsep();
    255     my $strOutputFile = &util::filename_cat($strOutputDir,"$strID.oai");
    256     $strOutputFile =~ s/:/$ds/g;
    257 
     298   
    258299    if($blnDownloadDoc)
    259300    {
    260         &getOAIDoc($strRecord,$strSubDirPath);
     301        $self->getOAIDoc($strRecord,$strSubDirPath);
    261302    }
    262303
    263304    # save record
    264     open (OAIOUT,">$strOutputFile")
     305    open (OAIOUT,">$strFileURL")
    265306        || die "Unable to save oai metadata record: $!\n";
    266307    print OAIOUT $strRecord;
    267308    close(OAIOUT);
    268309
    269     $intDocCounter ++;
    270     print STDERR "<<Done>>\n";
     310        print STDERR "Saving records to $strFileURL\n";
     311        print STDERR "<<Done>>\n";
     312    $intDocCounter ++; 
    271313    last if ($intDocCounter >= $intMaxRecords);
    272314    }
     315
    273316    ($intDocCounter >= $intMaxRecords) ?
    274317    print  STDERR "Reach maximum download records, use -max_records to set the maximum.\n":
    275318    print  STDERR "Complete download meta record from $strBasURL\n";
    276319
     320       print STDERR "<<Finished>>\n";
    277321}
    278322
     
    282326    if(!defined $self){ die "System Error: No \$self defined for url_information in OAIDownload\n";}
    283327   
    284     my $strBaseCMD = "-q -O - \"$self->{'url'}?_OPTS_\"";
     328    my $wgetOptions = $self->getWgetOptions();
     329    my $strBaseCMD = $wgetOptions." -q -O - \"$self->{'url'}?_OPTS_\"";
    285330 
    286331    my $strIdentify = "verb=Identify";
     
    290335    $strIdentifyCMD =~ s/_OPTS_/$strIdentify/; 
    291336
    292     my $strIdentifyText = &WgetDownload::useWget($strIdentifyCMD);
     337    my $strIdentifyText = $self->useWget($strIdentifyCMD);
     338
     339     if (!defined $strIdentifyText or $strIdentifyText eq ""  ){
     340    print STDERR "Server information is unavailable.\n";
     341    print STDERR "<<Finished>>\n";
     342        return; 
     343    }
    293344
    294345    print STDERR "General information:\n";
     
    297348    my $strListSetCMD = $strBaseCMD;
    298349    $strListSetCMD =~ s/_OPTS_/$strListSets/;   
    299     my $strListSetsText = &WgetDownload::useWget($strListSetCMD);
     350    my $strListSetsText = $self->useWget($strListSetCMD);
     351
     352
    300353    print STDERR "List Information:\n";
    301354    $self->parse_xml($strListSetsText);
     
    306359    my ($self) = shift (@_);
    307360    my ($strOutputText) = @_;
    308     my ($name,$fh);
    309    
     361   
    310362    #Open a temporary file to store OAI information, and store the information to the temp file
    311     do {$name = tmpnam()}
    312     until $fh = IO::File->new($name, O_RDWR|O_CREAT|O_EXCL);
    313     print $fh $strOutputText;
    314     close($fh);
     363    my $name = "$ENV{GSDLHOME}/tmp/oai.tmp";
     364
     365    open(*OAIOUT,"> $name");
     366   
     367    print OAIOUT $strOutputText;
     368    close(OAIOUT);
    315369
    316370    $self->{'temp_file_name'} = $name;
     
    340394    if ((defined $self->{'subfield'} && ($self->{'subfield'} ne ""))) {
    341395    $self->{'text'} .= $_[1];
    342     $self->{'text'} =~ s/[\n]|[" "]//g;
     396    $self->{'text'} =~ s/[\n]|([ ]{2,})//g;
    343397    if($self->{'text'} ne "")
    344398    {       
     
    352406{
    353407    my ($expat, $element, %attr) = @_;
     408
    354409    $self->{'subfield'} = $element;
     410   
    355411}
    356412
     
    364420sub error
    365421{
    366     my ($strFunctionName,$strError) = @_;
     422    my ($self,$strFunctionName,$strError) = @_;
    367423    {
    368424    print "Error occoured in OAIDownload.pm\n".
  • trunk/gsdl/perllib/downloaders/SRWDownload.pm

    r11783 r12465  
    3434use strict;
    3535
    36 use BasDownload;
     36use Z3950Download;
    3737use IPC::Open2;
    3838
    3939sub BEGIN {
    40     @SRWDownload::ISA = ('BasDownload');
    41 }
    42 
    43 local (*YAZOUT, *YAZIN);
    44 
    45 # args same as Z3950Download at the moment - should it be based on that??
    46 my $arguments =
    47     [  { 'name' => "host",
    48     'disp' => "{Z3950Download.host_disp}",
    49     'desc' => "{Z3950Download.host}",
    50     'type' => "string",
    51     'reqd' => "yes"},
    52       { 'name' => "port",
    53     'disp' => "{Z3950Download.port_disp}",
    54     'desc' => "{Z3950Download.port}",
    55     'type' => "string",
    56     'reqd' => "yes"},
    57       { 'name' => "database",
    58     'disp' => "{Z3950Download.database_disp}",
    59     'desc' => "{Z3950Download.database}",
    60         'type' => "string",
    61     'reqd' => "yes"},
    62       { 'name' => "find",
    63     'disp' => "{Z3950Download.find_disp}",
    64     'desc' => "{Z3950Download.find}",
    65     'type' => "string",
    66     'deft' => "",
    67     'reqd' => "yes"},
    68       { 'name' => "max_records",
    69     'disp' => "{Z3950Download.max_records_disp}",
    70     'desc' => "{Z3950Download.max_records}",
    71     'type' => "int",
    72     'deft' => "500",
    73     'reqd' => "no"}];
     40    @SRWDownload::ISA = ('Z3950Download');
     41}
     42
     43my $arguments;
    7444
    7545my $options = { 'name'     => "SRWDownload",
    7646        'desc'     => "{SRWDownload.desc}",
    7747        'abstract' => "no",
    78         'inherits' => "yes",
    79         'args'     => $arguments };
     48        'inherits' => "yes"
     49        };
    8050
    8151
     
    8959    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    9060
    91     my $self = (defined $hashArgOptLists)? new BasDownload($getlist,$inputargs,$hashArgOptLists): new BasDownload($getlist,$inputargs);
     61    my $self = (defined $hashArgOptLists)? new Z3950Download($getlist,$inputargs,$hashArgOptLists): new Z3950Download($getlist,$inputargs);
    9262
    9363    if ($self->{'info_only'}) {
     
    10676    my ($hashGeneralOptions) = @_;
    10777    my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
     78
     79    my $url = $self->{'url'};
     80
    10881    print STDERR "<<Defined Maximum>>\n";
    10982
    110     my $url = $self->{'url'};
    111 
    112     open2(*YAZOUT, *YAZIN, "yaz-client $url")
    113     or die "can't open pipe to yaz-client: $!";
     83    my  $yaz =  $self->{'yaz'};
     84 
     85    my $childpid = open2(*YAZOUT, *YAZIN, $yaz)
     86    or (print STDERR "<<Finished>>\n" and die "can't open pipe to yaz-client: $!");
     87
     88    $self->{'YAZOUT'} = *YAZOUT;
     89    $self->{'YAZIN'} = *YAZIN;
     90
     91    $strOpen = $self->open_connection("open $url");
     92
     93    if (!$strOpen) {
     94        print STDERR "Cannot connect to $url\n";
     95        print STDERR "<<Finished>>\n"; 
     96    return 0;
     97    }
    11498
    11599    print STDERR "Opening connection to \"$self->{'url'}\"\n";
    116     #$strOpen = &run_command_with_output("open $self->{'url'}");
    117100    print STDERR "Access database: \"$self->{'database'}\"\n";
    118     &run_command_without_output("base $self->{'database'}");
    119     &run_command_without_output("querytype prefix");
     101    $self->run_command_without_output("base $self->{'database'}");
     102    $self->run_command_without_output("querytype prefix");
    120103    print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
    121     $intAmount = &findAmount($self->{'find'});
     104    $intAmount =$self->findAmount($self->{'find'});
    122105
    123106    if($intAmount <= 0)
    124107    {
    125108    ($intAmount == -1)?
    126         print STDERR "Unexpected format, Parsing operation can not be performed\n" :
     109        print STDERR "Something wrong with the arguments,downloading can not be performed\n" :
    127110        print STDERR "No Record is found\n";
     111        print STDERR "<<Finished>>\n";
    128112    return 0;
    129113    }
    130114    $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
    131115    print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
    132     $strRecords = &getRecords($intMaxRecords);
    133     print STDERR $strRecords;
    134     &saveRecords($self,$strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
    135     print STDERR "Closing connection\n";
     116 
     117    $strRecords = $self->getRecords($intMaxRecords);
     118 
     119    $self->saveRecords($strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
     120    print STDERR "Closing connection...\n";
     121    print STDERR "<<Finished>>\n";
    136122    close(YAZOUT);
    137123    close(YAZIN);
    138     return 1;
    139 }
    140 
    141 sub findAmount
    142 {
    143     my($strFindTarget) = @_;
    144     my $strResponse = &run_command_with_output("find $strFindTarget");
    145 
    146     return ($strResponse =~ m/^Number of hits: (\d+)/m)? $1:-1; 
    147 }
    148 
    149 sub getRecords
    150 {
    151     my ($intMaxRecords) = @_;
    152     my ($strShow,$intStartNumber,$strResponse,$strRecords,$intRecordsLeft);
    153 
    154     $intStartNumber = 1;
    155     $intRecordsLeft = $intMaxRecords;
    156     while ($intRecordsLeft > 0)
    157     {
    158     if($intRecordsLeft > 50)
    159     {
    160         print STDERR "<<Done:50>>\n";
    161         print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+49)."\n";
    162        
    163         $strShow = "show $intStartNumber+50";
    164         $intStartNumber = $intStartNumber + 50;
    165         $intRecordsLeft = $intRecordsLeft - 50;
    166     }
    167     else
    168     {
    169         print STDERR "<<Done:".($intRecordsLeft).">>\n";
    170         print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+$intRecordsLeft-1)."\n";
    171         $strShow = "show $intStartNumber+$intRecordsLeft";
    172         $intRecordsLeft = 0;
    173     }
    174    
    175     $strResponse = &run_command_with_output($strShow);
    176 
    177         ## need to change this
    178                
    179         print STDERR $strResponse;
    180    
    181         if($strResponse =~  m/pos=[\d]*(.*)>\n/s)
    182     {
    183            $strRecords .= "$1>\n";
    184            
    185            $strRecords =~ s/pos=[\d]*(.*)?\n//g;           
    186     }
    187     }
    188     return $strRecords;
    189 }
     124   return 1;
     125}
     126
    190127
    191128sub saveRecords
     
    195132    # setup directories
    196133    # Currently only gather the MARC format
    197     my $strFileName = &generateFileName($self,$intMaxRecords);
    198     my $strFileURL = "$strOutputDir/$self->{'host'}/$strFileName.marc";
    199     $strFileURL =~ s/:/\//g;
    200 
     134    $strRecords ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<collection>$strRecords</collection>"; 
     135    my $strFileName =  $self->generateFileName($intMaxRecords);
     136    my $host = $self->{'host'};
     137    $host =~ s/http:\/\//srw\//;
     138    $strOutputDir  =~ s/"//g;
     139    my $strFileURL = "$strOutputDir/$host/$strFileName.xml";
     140 
    201141    # prepare subdirectory for record (if needed)
    202     my ($strSubDirPath,$unused) = dirFileSplit($strFileURL);
     142
     143    my ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
    203144    &util::mk_all_dir($strSubDirPath);
    204145
    205146    my $ds = &util::get_dirsep();
    206     my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc");
    207     $strOutputFile =~ s/:/$ds/g;
    208 
    209     print STDERR "Saving records to \"$strOutputFile\"\n";
     147 
     148    print STDERR "Saving records to \"$strFileURL\"\n";
    210149
    211150    # save record
    212     open (ZOUT,">$strOutputFile")
     151    open (ZOUT,">$strFileURL")
    213152    || die "Unable to save oai metadata record: $!\n";
    214153    print ZOUT $strRecords;
     
    216155}
    217156
    218 sub run_command_with_output
    219 {
    220     my ($strCMD) = @_;
    221     return &run_command($strCMD,"^Elapsed:.*\$");
    222 }
    223 
    224 sub run_command_without_output
    225 {
    226     my ($strCMD) = @_;
    227     &run_command($strCMD);
    228 }
    229 
    230 sub run_command
    231 {
    232     my ($strCMD,$strStopRE) = @_;
     157sub get{
     158   my ($self,$strShow,$numRecord) = @_; 
     159
     160   $self->run_command($strShow); 
     161   
     162   my $strFullOutput="";
     163   my $count=0;
     164   my $readRecord = 0;
     165   
     166   while (my $strLine = <YAZOUT>)
     167   {
    233168   
    234     print YAZIN "$strCMD\n";
    235     if (!defined $strStopRE){return "";}
    236     else
    237     {
    238     my $strFullOutput;
    239         while (my $strLine = <YAZOUT>)
    240     {
    241             $strFullOutput .= $strLine;
    242         if($strLine =~ m/$strStopRE/){return $strFullOutput;}
    243     }
    244     }
    245 }
    246 
    247 sub url_information
    248 {
    249    my ($self) = shift (@_);
     169       return $strFullOutput if ($count >= $numRecord);
     170
     171       return $strFullOutput if($strLine =~ m/^HTTP ERROR/i);
     172
     173       if ($strLine =~ m/pos=[\d]*/i ){
     174           $count++;
     175       $readRecord = 1;
     176       next; 
     177       }
     178     
     179       next if(!$readRecord);
     180
     181       $strFullOutput .= $strLine;     
     182   }
     183 
     184}
     185
     186sub url_information{
     187   my ($self) = @_;
    250188
    251189   my $url = $self->{'url'};
    252190
    253   open2(*YAZOUT, *YAZIN, "yaz-client $url") or die "can't open pipe to yaz-client: $!";
    254 
    255    my $strFullOutput="";
    256 
    257    while (my $strLine = <YAZOUT>)
    258     {
    259           $strFullOutput .= $strLine; 
    260       }
    261 
    262    return  $strFullOutput;
    263 }
    264 
    265 sub generateFileName
    266 {
    267     my ($self,$intMaxRecords) = @_;
    268     my $strFileName = ($self->{'database'})."_".($self->{'find'})."_".($intMaxRecords);
    269 }
    270 
    271 sub dirFileSplit
    272 {
    273     my ($strFile) = @_;
    274 
    275     my @aryDirs = split("/",$strFile);
    276     my $strLocalFile = pop(@aryDirs);
    277     my $strSubDirs = join("/",@aryDirs);
    278 
    279     return ($strSubDirs,$strLocalFile);
     191   $url =~ s#http://##;
     192
     193  return $self->SUPER::url_information($url); 
     194
    280195}
    281196
    282197sub error
    283198{
    284     my ($strFunctionName,$strError) = @_;
     199    my ($self, $strFunctionName,$strError) = @_;
    285200    {
    286201    print STDERR "Error occoured in SRWDownload.pm\n".
  • trunk/gsdl/perllib/downloaders/WebDownload.pm

    r11783 r12465  
    9999    my ($hashGeneralOptions) = @_;
    100100
    101     # TODO: the checking for Wget is still not complete, we need to
    102     #       check if the proxy has been set or not, and whether the
    103     #       connection has been established.
    104     # Checking if the wget has been well setup
    105     # &WgetDownload::checkWgetSetup($self,$hashGeneralOptions->{'gli_call'});
    106 
     101 
    107102    # Download options
    108     my $strOptions = &generateOptionsString($self);
    109     my $strWgetOptions = &WgetDownload::getWgetOptions($self);
    110    
    111    
     103    my $strOptions = $self->generateOptionsString();
     104    my $strWgetOptions = $self->getWgetOptions();
     105     
    112106    # Setup the command for using wget
    113107    my $cmdWget = "-N -k -x -t 2 -P ".$hashGeneralOptions->{"cache_dir"}." $strWgetOptions $strOptions ".$self->{'url'};
    114108   
    115     print "**************".$cmdWget."\n";
    116 
    117109    # Download the web pages
    118110    # print "Strat download from $self->{'url'}...\n";
    119    
    120111    print STDERR "<<Undefined Maximum>>\n";
    121     my $strResponse = &WgetDownload::useWget($cmdWget,1);
    122     #if ($strResponse ne ""){print "$strResponse\n";}
    123    
    124     # print "Finish download from $self->{'url'}...\n";
    125 
     112
     113    my $strResponse = $self->useWget($cmdWget,1);
     114 
     115    # if ($strResponse ne ""){print "$strResponse\n";}
     116   
     117    print STDERR "Finish download from $self->{'url'}\n";
     118
     119    print STDERR "<<Finished>>\n";
     120 
    126121    return 1;
    127122}
     
    146141    else
    147142    {
    148     &error("setupOptions","Incorrect Depth is defined!!\n");
     143    $self->error("setupOptions","Incorrect Depth is defined!!\n");
    149144    }
    150145
     
    174169{
    175170    my ($self) = shift (@_);
    176     if(!defined $self){ die "System Error: No \$self defined for url_information in WebDownload\n";}
    177    
    178     my $strBaseCMD = "-q -O - \"$self->{'url'}\"";
    179 
    180     my $strIdentifyText = &WgetDownload::useWget($strBaseCMD);
    181    
     171
     172    my $strOptions = $self->getWgetOptions();
     173
     174    my $strBaseCMD = $strOptions." -q -O - \"$self->{'url'}\"";
     175
     176 
     177    my $strIdentifyText = $self->useWget($strBaseCMD);
     178   
     179    if (!defined $strIdentifyText or $strIdentifyText eq ""  ){
     180    print STDERR "Server information is unavailable.\n";
     181    print STDERR "<<Finished>>\n";
     182         return; 
     183    }
     184
    182185    while ($strIdentifyText =~ m/^(.*)<title>(.*?)<\/title>(.*)$/s)
    183186    {
     
    185188    print STDERR "Page Title: $2\n";
    186189    }
    187 
     190 
    188191    while ($strIdentifyText =~ m/^(.*)<meta (.*?)>(.*)$/s)
    189192    {
    190193    $strIdentifyText = $1.$3;
    191194    my $strTempString = $2;
    192     print STDERR "Meta Information:\n";
     195    print STDERR "\n";
     196
    193197    while($strTempString =~ m/(.*?)=[\"|\'](.*?)[\"|\'](.*?)$/s)
    194198    {
     
    202206        $strMetaName =~ s/^([" "])+//m;
    203207        $strMetaContain =~ s/^([" "])+//m;
    204         print STDERR "\t$strMetaName: $strMetaContain\n";
    205        
     208             
     209        print STDERR "$strMetaName: $strMetaContain\n\n";
     210           
    206211    }
    207     print STDERR "\n";
    208     }
     212
     213    }
     214
     215    print STDERR "<<Finished>>\n";
     216
    209217}
    210218
     
    222230
    2232311;
     232
  • trunk/gsdl/perllib/downloaders/WgetDownload.pm

    r11783 r12465  
    3434use BasDownload;
    3535use strict;
     36use IPC::Open2;
    3637
    3738sub BEGIN {
     
    4041
    4142my $arguments =
    42 #    [ { 'name' => "url",
    43 #   'desc' => "{WgetDownload.url}",         
    44 #   'type' => "string",
    45 #   'deft' => "",
    46 #   'reqd' => "yes"},
    4743     [ { 'name' => "proxy_on",
    4844    'desc' => "{WgetDownload.proxy_on}",
     
    125121    # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
    126122    # Test if the connection is succeful. If the connection wasn't succeful then ask user to supply username and password.
    127    
    128123
    129     # TODO: How to test run if the proxy setup is working correctly??
    130     # Use -spider to test whether the connection is working correctly.
    131     # TODO: Ask user to supply username and password.
    132     # Try to use the .wgetrc to setup the user name and password
    133    
    134124}
    135125
    136126sub useWget
    137127{
    138     my ($cmdWget,$blnShow) = @_;
     128    my ($self, $cmdWget,$blnShow) = @_;
    139129
    140     my $strReadIn = "";
    141     my $strLine;
     130    my ($os,$strReadIn,$strLine,$command);
     131   
     132    $os = $ENV{'GSDLOS'};
     133 
     134 
     135    if ($os =~ /windows/i){
     136    $command = "\"$ENV{'GSDLHOME'}\\bin\\windows\\wget\" $cmdWget |";
     137    }
     138    else{
     139        $command = "$ENV{'GSDLHOME'}/packages/wget/wget-1.9/src/wget $cmdWget |"; 
     140    }
    142141
    143     open (WIN,"$ENV{'GSDLHOME'}/packages/wget/wget-1.9/src/wget $cmdWget|") || die "wget request failed: $!\n";
     142   
     143
     144   open(*WIN,$command) || die "wget request failed: $!\n";
     145
     146   
     147
    144148    while (defined($strLine=<WIN>))
    145149    {
     150
     151
    146152    if($blnShow)
    147153    {
    148         print "$strReadIn\n";
     154        print STDERR "$strReadIn\n";
    149155    }
     156
    150157    $strReadIn .= $strLine;
    151158    }
     159
    152160    close(WIN);
    153161
  • trunk/gsdl/perllib/downloaders/Z3950Download.pm

    r11783 r12465  
    4040    @Z3950Download::ISA = ('BasDownload');
    4141}
    42 
    43 local (*YAZOUT, *YAZIN);
    4442
    4543my $arguments =
     
    9795    # Must set $self->{'url'}, since GLI use $self->{'url'} to calculate the log file name!
    9896    $self->{'url'} = $self->{'host'}.":".$self->{'port'};
     97
     98     my $os = $ENV{'GSDLOS'};
     99
     100    if ($os !~ /windows/) {
     101    $self->{'yaz'} = "$ENV{'GSDLHOME'}/packages/yaz/yaz-2.1.4/client/yaz-client";
     102    }
     103    else{
     104    $self->{'yaz'} = "$ENV{'GSDLHOME'}/bin/windows/yaz-client";
     105    }
     106   
    99107    return bless $self, $class;
    100108
     
    106114    my ($hashGeneralOptions) = @_;
    107115    my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
     116
     117    my $url = $self->{'url'};
     118 
    108119    print STDERR "<<Defined Maximum>>\n";
    109120 
    110     my $url = $self->{'url'};
    111121    print STDERR "Opening connection to $url\n";
    112122     
    113     my $childpid = open2(*YAZOUT, *YAZIN, "yaz-client")
    114     or die "can't open pipe to yaz-client: $!";
    115  
    116     $strOpen = &run_command_with_output("open $url"); 
     123    my  $yaz = $self->{'yaz'};
     124 
     125    my $childpid = open2(*YAZOUT, *YAZIN, $yaz)
     126    or (print STDERR "<<Finished>>\n" and die "can't open pipe to yaz-client: $!");
     127    $self->{'YAZOUT'} = *YAZOUT;
     128    $self->{'YAZIN'} = *YAZIN;
     129
     130    $strOpen = $self->open_connection("open $url"); 
     131
     132    if (!$strOpen) {
     133        print STDERR "Cannot connect to $url\n";
     134        print STDERR "<<Finished>>\n"; 
     135    return 0;
     136    }
     137
    117138    print STDERR "Access database: \"$self->{'database'}\"\n";
    118     &run_command_without_output("base $self->{'database'}");
     139    $self->run_command_without_output("base $self->{'database'}");
    119140    print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
    120     $intAmount = &findAmount($self->{'find'});
     141    $intAmount = $self->findAmount($self->{'find'});
    121142
    122143    if($intAmount <= 0)
    123144    {
    124145    ($intAmount == -1)?
    125         print STDERR "Unexpected format, Parsing operation can not be performed\n" :
    126         print STDERR "No Record is found\n";
     146        print STDERR "Something wrong with the arguments,downloading can not be performed\n":
     147        print STDERR "No Record is found\n";
     148    print STDERR "<<Finished>>\n";
    127149    return 0;
    128150    }
    129151    $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
    130152    print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
    131     $strRecords = &getRecords($intMaxRecords);
    132     print STDERR $strRecords;
    133     &saveRecords($self,$strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
     153    $strRecords = "Records: $intMaxRecords\n".$self->getRecords($intMaxRecords);
     154   
     155    $self->saveRecords($strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
    134156    print STDERR "Closing connection...\n";
     157    print STDERR "<<Finished>>\n";
     158
    135159    close(YAZOUT);
    136160    close(YAZIN);
    137     waitpid($childpid, 0);
    138161    return 1;
    139162}
    140163
     164sub open_connection{
     165  my ($self,$strCommand) =  (@_);
     166 
     167  $self->run_command($strCommand); 
     168
     169  my $out = $self->{'YAZOUT'};
     170
     171  $_ = <$out>;
     172 
     173  return (/Connecting...OK/i)? 1: 0;
     174 
     175}
     176
    141177sub findAmount
    142178{
     179    my ($self) = shift (@_);
    143180    my($strFindTarget) = @_;
    144     my $strResponse = &run_command_with_output("find $strFindTarget");
    145     return ($strResponse =~ m/^Number of hits: (\d+)/m)? $1:-1;   
     181    my $strResponse = $self->run_command_with_output("find $strFindTarget","^Number of hits:");
     182   return ($strResponse =~ m/^Number of hits: (\d+)/m)? $1:-1;   
    146183}
    147184
    148185sub getRecords
    149186{
     187    my ($self) = shift (@_);
    150188    my ($intMaxRecords) = @_;
    151     my ($strShow,$intStartNumber,$strResponse,$strRecords,$intRecordsLeft);
     189    my ($strShow,$intStartNumber,$numRecords,$strResponse,$strRecords,$intRecordsLeft);
    152190
    153191    $intStartNumber = 1;
    154192    $intRecordsLeft = $intMaxRecords;
     193    $numRecords = 0;
     194    $strResponse ="";
     195
    155196    while ($intRecordsLeft > 0)
    156197    {
    157198    if($intRecordsLeft > 50)
    158199    {
    159         print STDERR "<<Done:50>>\n";
     200       
    160201        print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+49)."\n";
    161        
     202        $numRecords = 50;
    162203        $strShow = "show $intStartNumber+50";
    163204        $intStartNumber = $intStartNumber + 50;
    164205        $intRecordsLeft = $intRecordsLeft - 50;
     206             
    165207    }
    166208    else
    167209    {
    168         print STDERR "<<Done:".($intRecordsLeft).">>\n";
     210        $numRecords = $intRecordsLeft;
    169211        print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+$intRecordsLeft-1)."\n";
    170212        $strShow = "show $intStartNumber+$intRecordsLeft";
    171213        $intRecordsLeft = 0;
     214       
     215           }
     216   
     217    $strResponse .= $self->get($strShow,$numRecords);
     218         
     219    if ($strResponse eq ""){
     220        print STDERR "<<ERROR: failed to get $numRecords records>>\n";
    172221    }
     222    else{
     223        print STDERR "<<Done:$numRecords>>\n";
     224    }
     225    }
     226
     227    return  "$strResponse\n";
    173228   
    174     $strResponse = &run_command_with_output($strShow);
    175        
    176     if($strResponse =~ m/Records: (\d*?)\n(.*?)nextResultSetPosition = (\d*?)\n/s)
    177     {
    178         $strRecords .= $2;
    179     }
    180     }
    181     return $strRecords;
    182229}
    183230
     
    188235    # setup directories
    189236    # Currently only gather the MARC format
    190     my $strFileName = &generateFileName($self,$intMaxRecords);
     237    my $strFileName = $self->generateFileName($intMaxRecords);
     238
     239    $strOutputDir  =~ s/"//g;
     240
    191241    my $strFileURL = "$strOutputDir/$self->{'host'}/$strFileName.marc";
    192     $strFileURL =~ s/:/\//g;
    193 
     242 
    194243    # prepare subdirectory for record (if needed)
    195     my ($strSubDirPath,$unused) = dirFileSplit($strFileURL);
     244    my ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
     245 
    196246    &util::mk_all_dir($strSubDirPath);
    197247
    198248    my $ds = &util::get_dirsep();
    199249    my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc");
    200     $strOutputFile =~ s/:/$ds/g;
    201 
     250   
    202251    print STDERR "Saving records to \"$strOutputFile\"\n";
    203252
    204253    # save record
    205254    open (ZOUT,">$strOutputFile")
    206     || die "Unable to save oai metadata record: $!\n";
     255    || die "Unable to save Z3950 record: $!\n";
    207256    print ZOUT $strRecords;
    208257    close(ZOUT);
    209258}
    210259
     260
    211261sub run_command_with_output
    212262{
     263    my ($self,$strCMD,$strStopRE) =@_;
     264   
     265    $self->run_command($strCMD);
     266   
     267    return $self->get_output($strStopRE);
     268 
     269}
     270
     271sub get{
     272   my ($self,$strShow,$numRecord) = @_; 
     273
     274   $self->run_command($strShow); 
     275   
     276   my $strFullOutput="";
     277   my $count=0;
     278   my $readRecord = 0;
     279   
     280   while (my $strLine = <YAZOUT>)
     281   {
     282   
     283       if ($strLine =~ m/Records: ([\d]*)/i ){
     284       $readRecord = 1;
     285       next; 
     286       }
     287     
     288      return $strFullOutput if ($strLine =~ m/nextResultSetPosition|Not connected/i);
     289       
     290      next if(!$readRecord);
     291     
     292      $strFullOutput .= $strLine;     
     293  }
     294   
     295}
     296
     297sub run_command_without_output
     298{
     299     my ($self) = shift (@_);
    213300    my ($strCMD) = @_;
    214    
    215     return &run_command($strCMD,"^Elapsed:.*\$");
    216 }
    217 
    218 sub run_command_without_output
    219 {
    220     my ($strCMD) = @_;
    221 
    222     &run_command($strCMD);
     301
     302    $self->run_command($strCMD);
    223303}
    224304
    225305sub run_command
    226306{
    227     my ($strCMD,$strStopRE) = @_;
    228    
    229  
    230     print YAZIN "$strCMD\n";
     307    my ($self,$strCMD) = @_;
     308 
     309    my $input = $self->{'YAZIN'};
     310
     311    print $input "$strCMD\n"; 
     312}
     313
     314sub get_output{
     315    my ($self,$strStopRE) = @_; 
     316
    231317    if (!defined $strStopRE){return "";}
    232318    else
    233319    {
    234320    my $strFullOutput;
    235     while (my $strLine = <YAZOUT>)
     321        my $output = $self->{'YAZOUT'};
     322    while (my $strLine = <$output>)
    236323    {
    237         $strFullOutput .= $strLine;
    238         if($strLine =~ m/$strStopRE/){return $strFullOutput;}
     324           $strFullOutput .= $strLine;   
     325       if($strLine =~ m/^$strStopRE|Not connected/i){return $strFullOutput;}
    239326    }
    240327    }
     
    245332    my ($self,$intMaxRecords) = @_;
    246333    my $strFileName = ($self->{'database'})."_".($self->{'find'})."_".($intMaxRecords);
     334 
    247335}
    248336
    249337sub dirFileSplit
    250338{
    251     my ($strFile) = @_;
    252 
    253     my @aryDirs = split("/",$strFile);
     339    my ($self,$strFile) = @_;
     340
     341    my @aryDirs = split("[/\]",$strFile);
     342   
    254343    my $strLocalFile = pop(@aryDirs);
    255344    my $strSubDirs = join("/",@aryDirs);
     
    258347}
    259348
     349sub url_information
     350{
     351   my ($self,$url) = @_;
     352
     353   $url = $self->{'url'} unless defined $url;
     354
     355   my  $yaz =  $self->{'yaz'};
     356   
     357   my $childpid = open2(*YAZOUT, *YAZIN, $yaz)
     358       or die "can't open pipe to yaz-client: $!";
     359 
     360   $self->{'YAZOUT'} = *YAZOUT;
     361   $self->{'YAZIN'} = *YAZIN;
     362
     363   my $strOpen = $self->open_connection("open $url");
     364   
     365    if (!$strOpen) {
     366        print STDERR "Cannot connect to $url\n";
     367        print STDERR "<<Finished>>\n"; 
     368    return 0;
     369    }
     370
     371   
     372   $strOpen = $self->run_command_with_output("open $url","^Options"); 
     373
     374
     375   $strOpen =~ s/Z> //g;
     376   $strOpen =~ s/Elapsed:.*//g;
     377
     378   print STDERR $strOpen;
     379
     380   print STDERR "<<Finished>>\n";
     381
     382   close(YAZOUT);
     383   close(YAZIN);
     384
     385   return 0;
     386
     387}
     388
    260389sub error
    261390{
    262     my ($strFunctionName,$strError) = @_;
     391    my ($self,$strFunctionName,$strError) = @_;
    263392    {
    264393    print STDERR "Error occoured in Z3950Download.pm\n".
Note: See TracChangeset for help on using the changeset viewer.