Ignore:
Timestamp:
2006-08-18T09:24:29+12:00 (18 years ago)
Author:
shaoqun
Message:

fixed th bugs on windows

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/downloaders/OAIDownload.pm

    r11783 r12465  
    3737use XMLParser;
    3838
    39 use IO::File;
    4039use POSIX qw(tmpnam);
     40use util;
    4141
    4242sub BEGIN {
     
    5656    'reqd' => "no"},
    5757      { 'name' => "get_doc",
    58     'disp' => "{OAIDownload.qet_doc_disp}",
     58    'disp' => "{OAIDownload.get_doc_disp}",
    5959    'desc' => "{OAIDownload.get_doc}",
    6060    'type' => "flag",
     
    7676my $self;
    7777
     78my $strWgetOptions="";
     79
    7880sub new
    7981{
     
    107109    my ($hashGeneralOptions) = @_;
    108110
    109     # Checking if the wget has been well setup
    110     # &WgetDownload::checkWgetSetup($self,$hashGeneralOptions->{'gli_call'});
    111    
    112     my $strOutputDir = $hashGeneralOptions->{"cache_dir"};
     111    print STDERR "here2";
     112   
     113    $strWgetOptions = $self->getWgetOptions();
     114    my $cmdWget = $strWgetOptions;
     115 
     116    my $strOutputDir ="";
     117    $strOutputDir = $hashGeneralOptions->{"cache_dir"};
    113118    my $strBasURL = $self->{'url'};
    114119    my $intMaxRecords = $self->{'max_records'};
     
    116121
    117122    print STDERR "<<Defined Maximum>>\n";
    118     my $strIDs = &getOAIIDs($self,$strBasURL);
    119     if($strIDs eq "")
     123
     124    my $strIDs = $self->getOAIIDs($strBasURL);
     125 
     126   if($strIDs eq "")
    120127    {
    121128    print STDERR "Error: No ID being found\n";
    122129    return 0;
    123130    }
    124     my $aryIDs = &parseOAIIDs($strIDs);
     131    my $aryIDs = $self->parseOAIIDs($strIDs);
    125132    my $intIDs = 0;
    126133    if($self->{'max_records'} < scalar(@$aryIDs))
     
    134141    print STDERR "<<Total number of record(s):$intIDs>>\n";
    135142
    136     &getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
     143    $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
     144
     145    my $tmp_file = "$ENV{GSDLHOME}/tmp/oai.tmp";
     146    &util::rm($tmp_file);
    137147
    138148    return 1;
     
    143153    my ($self,$strBasURL) = @_;
    144154    my ($cmdWget);
     155     
     156    my $wgetOptions = $self->getWgetOptions();
     157
     158    $cmdWget = $wgetOptions;
     159 
    145160    print STDERR  "Gathering OAI identifiers.....\n";
     161
    146162    if($self->{'set'} ne "")
    147163    {
    148     $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" ";
     164    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" ";
    149165    }
    150166    else
    151167    {
    152     $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" ";
    153     }
    154     my $strIDs =  &WgetDownload::useWget($cmdWget);
     168    $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" ";
     169    }
     170
     171 
     172    my $strIDs =  $self->useWget($cmdWget);
     173
     174    if (!defined $strIDs or $strIDs eq ""  ){
     175    print STDERR "Server information is unavailable.\n";
     176    print STDERR "<<Finished>>\n";
     177        return; 
     178    }
     179
     180    print STDERR "<<Download Information>>\n";
     181
     182    $self->parse_xml($strIDs);
     183
    155184    return $strIDs;
    156185}
     
    158187sub parseOAIIDs
    159188{   
    160     my ($strIDs) = @_;
     189    my ($self,$strIDs) = @_;
    161190
    162191    print STDERR "Parsing OAI identifiers.....\n";
     
    177206sub dirFileSplit
    178207{
    179     my ($strFile) = @_;
    180 
    181     my @aryDirs = split("/",$strFile);
     208    my ($self,$strFile) = @_;
     209
     210    my @aryDirs = split("[/\]",$strFile);
     211   
    182212    my $strLocalFile = pop(@aryDirs);
    183213    my $strSubDirs = join("/",@aryDirs);
     
    188218sub getOAIDoc
    189219{
    190     my ($strRecord, $strSubDirPath) = @_;
    191     
     220    my ($self,$strRecord, $strSubDirPath) = @_;
     221 
    192222    print  STDERR "Gathering source documents.....\n";
    193223    # look out for identifier tag in metadata section
     224   
    194225    if ($strRecord =~ m/<metadata>(.*)<\/metadata>/s)
    195226    {
     
    200231        my $strDocURL = $2;
    201232
    202         my ($unused,$strDocFile) = dirFileSplit($strDocURL);
    203 
    204         my $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs");
     233        my ($unused,$strDocFile) = $self->dirFileSplit($strDocURL);
     234
     235            my $strSoureDirPath ="";
     236
     237        $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs");
     238
    205239        &util::mk_dir($strSoureDirPath)  if (!-e "$strSoureDirPath");
    206240       
    207241        my $strFullDocFilePath = &util::filename_cat($strSoureDirPath,$strDocFile);
    208242       
    209         my $wget_cmd = "-q -O $strFullDocFilePath \"$strDocURL\"";
    210 
    211         my $strResponse =  &WgetDownload::useWget($wget_cmd,1);
     243        my $wget_cmd = $strWgetOptions." -q -O $strFullDocFilePath \"$strDocURL\"";
     244
     245        my $strResponse =  $self->useWget($wget_cmd,1);
    212246
    213247        if($strResponse ne "")
     
    233267sub getOAIRecords
    234268{
    235     my ($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
     269    my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
    236270
    237271    my $intDocCounter = 0;
     
    240274    {
    241275    print  STDERR "Gathering OAI record with ID:$strID.....\n";
    242     # wget it;
    243     my $cmdWget= "-q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\"";
    244     my $strRecord =  &WgetDownload::useWget($cmdWget);
     276       
     277    my $cmdWget= $strWgetOptions." -q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\"";
     278
     279    my $strRecord =  $self->useWget($cmdWget);
     280
     281       
     282        my @fileDirs = split(":",$strID); 
    245283
    246284    # setup directories
    247     my $strFileURL = "$strOutputDir/$strID.oai";
    248     $strFileURL =~ s/:/\//g;
    249 
     285
     286        $strOutputDir  =~ s/"//g;
     287 
     288    my $strFileURL = "$strOutputDir/$fileDirs[0]/$fileDirs[1].oai";
     289   
    250290    # prepare subdirectory for record (if needed)
    251     my ($strSubDirPath,$unused) = dirFileSplit($strFileURL);
     291    my ($strSubDirPath,$unused) = ("", "");
     292
     293        ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
     294   
    252295    &util::mk_all_dir($strSubDirPath);
    253296
    254297    my $ds = &util::get_dirsep();
    255     my $strOutputFile = &util::filename_cat($strOutputDir,"$strID.oai");
    256     $strOutputFile =~ s/:/$ds/g;
    257 
     298   
    258299    if($blnDownloadDoc)
    259300    {
    260         &getOAIDoc($strRecord,$strSubDirPath);
     301        $self->getOAIDoc($strRecord,$strSubDirPath);
    261302    }
    262303
    263304    # save record
    264     open (OAIOUT,">$strOutputFile")
     305    open (OAIOUT,">$strFileURL")
    265306        || die "Unable to save oai metadata record: $!\n";
    266307    print OAIOUT $strRecord;
    267308    close(OAIOUT);
    268309
    269     $intDocCounter ++;
    270     print STDERR "<<Done>>\n";
     310        print STDERR "Saving records to $strFileURL\n";
     311        print STDERR "<<Done>>\n";
     312    $intDocCounter ++; 
    271313    last if ($intDocCounter >= $intMaxRecords);
    272314    }
     315
    273316    ($intDocCounter >= $intMaxRecords) ?
    274317    print  STDERR "Reach maximum download records, use -max_records to set the maximum.\n":
    275318    print  STDERR "Complete download meta record from $strBasURL\n";
    276319
     320       print STDERR "<<Finished>>\n";
    277321}
    278322
     
    282326    if(!defined $self){ die "System Error: No \$self defined for url_information in OAIDownload\n";}
    283327   
    284     my $strBaseCMD = "-q -O - \"$self->{'url'}?_OPTS_\"";
     328    my $wgetOptions = $self->getWgetOptions();
     329    my $strBaseCMD = $wgetOptions." -q -O - \"$self->{'url'}?_OPTS_\"";
    285330 
    286331    my $strIdentify = "verb=Identify";
     
    290335    $strIdentifyCMD =~ s/_OPTS_/$strIdentify/; 
    291336
    292     my $strIdentifyText = &WgetDownload::useWget($strIdentifyCMD);
     337    my $strIdentifyText = $self->useWget($strIdentifyCMD);
     338
     339     if (!defined $strIdentifyText or $strIdentifyText eq ""  ){
     340    print STDERR "Server information is unavailable.\n";
     341    print STDERR "<<Finished>>\n";
     342        return; 
     343    }
    293344
    294345    print STDERR "General information:\n";
     
    297348    my $strListSetCMD = $strBaseCMD;
    298349    $strListSetCMD =~ s/_OPTS_/$strListSets/;   
    299     my $strListSetsText = &WgetDownload::useWget($strListSetCMD);
     350    my $strListSetsText = $self->useWget($strListSetCMD);
     351
     352
    300353    print STDERR "List Information:\n";
    301354    $self->parse_xml($strListSetsText);
     
    306359    my ($self) = shift (@_);
    307360    my ($strOutputText) = @_;
    308     my ($name,$fh);
    309    
     361   
    310362    #Open a temporary file to store OAI information, and store the information to the temp file
    311     do {$name = tmpnam()}
    312     until $fh = IO::File->new($name, O_RDWR|O_CREAT|O_EXCL);
    313     print $fh $strOutputText;
    314     close($fh);
     363    my $name = "$ENV{GSDLHOME}/tmp/oai.tmp";
     364
     365    open(*OAIOUT,"> $name");
     366   
     367    print OAIOUT $strOutputText;
     368    close(OAIOUT);
    315369
    316370    $self->{'temp_file_name'} = $name;
     
    340394    if ((defined $self->{'subfield'} && ($self->{'subfield'} ne ""))) {
    341395    $self->{'text'} .= $_[1];
    342     $self->{'text'} =~ s/[\n]|[" "]//g;
     396    $self->{'text'} =~ s/[\n]|([ ]{2,})//g;
    343397    if($self->{'text'} ne "")
    344398    {       
     
    352406{
    353407    my ($expat, $element, %attr) = @_;
     408
    354409    $self->{'subfield'} = $element;
     410   
    355411}
    356412
     
    364420sub error
    365421{
    366     my ($strFunctionName,$strError) = @_;
     422    my ($self,$strFunctionName,$strError) = @_;
    367423    {
    368424    print "Error occoured in OAIDownload.pm\n".
Note: See TracChangeset for help on using the changeset viewer.