Changeset 12465 for trunk/gsdl/perllib/downloaders/OAIDownload.pm
- Timestamp:
- 2006-08-18T09:24:29+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/downloaders/OAIDownload.pm
r11783 r12465 37 37 use XMLParser; 38 38 39 use IO::File;40 39 use POSIX qw(tmpnam); 40 use util; 41 41 42 42 sub BEGIN { … … 56 56 'reqd' => "no"}, 57 57 { 'name' => "get_doc", 58 'disp' => "{OAIDownload. qet_doc_disp}",58 'disp' => "{OAIDownload.get_doc_disp}", 59 59 'desc' => "{OAIDownload.get_doc}", 60 60 'type' => "flag", … … 76 76 my $self; 77 77 78 my $strWgetOptions=""; 79 78 80 sub new 79 81 { … … 107 109 my ($hashGeneralOptions) = @_; 108 110 109 # Checking if the wget has been well setup 110 # &WgetDownload::checkWgetSetup($self,$hashGeneralOptions->{'gli_call'}); 111 112 my $strOutputDir = $hashGeneralOptions->{"cache_dir"}; 111 print STDERR "here2"; 112 113 $strWgetOptions = $self->getWgetOptions(); 114 my $cmdWget = $strWgetOptions; 115 116 my $strOutputDir =""; 117 $strOutputDir = $hashGeneralOptions->{"cache_dir"}; 113 118 my $strBasURL = $self->{'url'}; 114 119 my $intMaxRecords = $self->{'max_records'}; … … 116 121 117 122 print STDERR "<<Defined Maximum>>\n"; 118 my $strIDs = &getOAIIDs($self,$strBasURL); 119 if($strIDs eq "") 123 124 my $strIDs = $self->getOAIIDs($strBasURL); 125 126 if($strIDs eq "") 120 127 { 121 128 print STDERR "Error: No ID being found\n"; 122 129 return 0; 123 130 } 124 my $aryIDs = &parseOAIIDs($strIDs);131 my $aryIDs = $self->parseOAIIDs($strIDs); 125 132 my $intIDs = 0; 126 133 if($self->{'max_records'} < scalar(@$aryIDs)) … … 134 141 print STDERR "<<Total number of record(s):$intIDs>>\n"; 135 142 136 &getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc); 143 $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc); 144 145 my $tmp_file = "$ENV{GSDLHOME}/tmp/oai.tmp"; 146 &util::rm($tmp_file); 137 147 138 148 return 1; … … 143 153 my ($self,$strBasURL) = @_; 144 154 my ($cmdWget); 155 156 my $wgetOptions = $self->getWgetOptions(); 157 158 $cmdWget = $wgetOptions; 159 145 160 print STDERR "Gathering OAI identifiers.....\n"; 161 146 162 if($self->{'set'} ne "") 147 163 { 148 $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" ";164 $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc&set=$self->{'set'}\" "; 149 165 } 150 166 else 151 167 { 152 $cmdWget = "-q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" "; 153 } 154 my $strIDs = &WgetDownload::useWget($cmdWget); 168 $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=oai_dc\" "; 169 } 170 171 172 my $strIDs = $self->useWget($cmdWget); 173 174 if (!defined $strIDs or $strIDs eq "" ){ 175 print STDERR "Server information is unavailable.\n"; 176 print STDERR "<<Finished>>\n"; 177 return; 178 } 179 180 print STDERR "<<Download Information>>\n"; 181 182 $self->parse_xml($strIDs); 183 155 184 return $strIDs; 156 185 } … … 158 187 sub parseOAIIDs 159 188 { 160 my ($s trIDs) = @_;189 my ($self,$strIDs) = @_; 161 190 162 191 print STDERR "Parsing OAI identifiers.....\n"; … … 177 206 sub dirFileSplit 178 207 { 179 my ($strFile) = @_; 180 181 my @aryDirs = split("/",$strFile); 208 my ($self,$strFile) = @_; 209 210 my @aryDirs = split("[/\]",$strFile); 211 182 212 my $strLocalFile = pop(@aryDirs); 183 213 my $strSubDirs = join("/",@aryDirs); … … 188 218 sub getOAIDoc 189 219 { 190 my ($s trRecord, $strSubDirPath) = @_;191 220 my ($self,$strRecord, $strSubDirPath) = @_; 221 192 222 print STDERR "Gathering source documents.....\n"; 193 223 # look out for identifier tag in metadata section 224 194 225 if ($strRecord =~ m/<metadata>(.*)<\/metadata>/s) 195 226 { … … 200 231 my $strDocURL = $2; 201 232 202 my ($unused,$strDocFile) = dirFileSplit($strDocURL); 203 204 my $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs"); 233 my ($unused,$strDocFile) = $self->dirFileSplit($strDocURL); 234 235 my $strSoureDirPath =""; 236 237 $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs"); 238 205 239 &util::mk_dir($strSoureDirPath) if (!-e "$strSoureDirPath"); 206 240 207 241 my $strFullDocFilePath = &util::filename_cat($strSoureDirPath,$strDocFile); 208 242 209 my $wget_cmd = "-q -O $strFullDocFilePath \"$strDocURL\"";210 211 my $strResponse = &WgetDownload::useWget($wget_cmd,1);243 my $wget_cmd = $strWgetOptions." -q -O $strFullDocFilePath \"$strDocURL\""; 244 245 my $strResponse = $self->useWget($wget_cmd,1); 212 246 213 247 if($strResponse ne "") … … 233 267 sub getOAIRecords 234 268 { 235 my ($ aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;269 my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_; 236 270 237 271 my $intDocCounter = 0; … … 240 274 { 241 275 print STDERR "Gathering OAI record with ID:$strID.....\n"; 242 # wget it; 243 my $cmdWget= "-q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\""; 244 my $strRecord = &WgetDownload::useWget($cmdWget); 276 277 my $cmdWget= $strWgetOptions." -q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=oai_dc&identifier=$strID\""; 278 279 my $strRecord = $self->useWget($cmdWget); 280 281 282 my @fileDirs = split(":",$strID); 245 283 246 284 # setup directories 247 my $strFileURL = "$strOutputDir/$strID.oai"; 248 $strFileURL =~ s/:/\//g; 249 285 286 $strOutputDir =~ s/"//g; 287 288 my $strFileURL = "$strOutputDir/$fileDirs[0]/$fileDirs[1].oai"; 289 250 290 # prepare subdirectory for record (if needed) 251 my ($strSubDirPath,$unused) = dirFileSplit($strFileURL); 291 my ($strSubDirPath,$unused) = ("", ""); 292 293 ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL); 294 252 295 &util::mk_all_dir($strSubDirPath); 253 296 254 297 my $ds = &util::get_dirsep(); 255 my $strOutputFile = &util::filename_cat($strOutputDir,"$strID.oai"); 256 $strOutputFile =~ s/:/$ds/g; 257 298 258 299 if($blnDownloadDoc) 259 300 { 260 &getOAIDoc($strRecord,$strSubDirPath);301 $self->getOAIDoc($strRecord,$strSubDirPath); 261 302 } 262 303 263 304 # save record 264 open (OAIOUT,">$str OutputFile")305 open (OAIOUT,">$strFileURL") 265 306 || die "Unable to save oai metadata record: $!\n"; 266 307 print OAIOUT $strRecord; 267 308 close(OAIOUT); 268 309 269 $intDocCounter ++; 270 print STDERR "<<Done>>\n"; 310 print STDERR "Saving records to $strFileURL\n"; 311 print STDERR "<<Done>>\n"; 312 $intDocCounter ++; 271 313 last if ($intDocCounter >= $intMaxRecords); 272 314 } 315 273 316 ($intDocCounter >= $intMaxRecords) ? 274 317 print STDERR "Reach maximum download records, use -max_records to set the maximum.\n": 275 318 print STDERR "Complete download meta record from $strBasURL\n"; 276 319 320 print STDERR "<<Finished>>\n"; 277 321 } 278 322 … … 282 326 if(!defined $self){ die "System Error: No \$self defined for url_information in OAIDownload\n";} 283 327 284 my $strBaseCMD = "-q -O - \"$self->{'url'}?_OPTS_\""; 328 my $wgetOptions = $self->getWgetOptions(); 329 my $strBaseCMD = $wgetOptions." -q -O - \"$self->{'url'}?_OPTS_\""; 285 330 286 331 my $strIdentify = "verb=Identify"; … … 290 335 $strIdentifyCMD =~ s/_OPTS_/$strIdentify/; 291 336 292 my $strIdentifyText = &WgetDownload::useWget($strIdentifyCMD); 337 my $strIdentifyText = $self->useWget($strIdentifyCMD); 338 339 if (!defined $strIdentifyText or $strIdentifyText eq "" ){ 340 print STDERR "Server information is unavailable.\n"; 341 print STDERR "<<Finished>>\n"; 342 return; 343 } 293 344 294 345 print STDERR "General information:\n"; … … 297 348 my $strListSetCMD = $strBaseCMD; 298 349 $strListSetCMD =~ s/_OPTS_/$strListSets/; 299 my $strListSetsText = &WgetDownload::useWget($strListSetCMD); 350 my $strListSetsText = $self->useWget($strListSetCMD); 351 352 300 353 print STDERR "List Information:\n"; 301 354 $self->parse_xml($strListSetsText); … … 306 359 my ($self) = shift (@_); 307 360 my ($strOutputText) = @_; 308 my ($name,$fh); 309 361 310 362 #Open a temporary file to store OAI information, and store the information to the temp file 311 do {$name = tmpnam()} 312 until $fh = IO::File->new($name, O_RDWR|O_CREAT|O_EXCL); 313 print $fh $strOutputText; 314 close($fh); 363 my $name = "$ENV{GSDLHOME}/tmp/oai.tmp"; 364 365 open(*OAIOUT,"> $name"); 366 367 print OAIOUT $strOutputText; 368 close(OAIOUT); 315 369 316 370 $self->{'temp_file_name'} = $name; … … 340 394 if ((defined $self->{'subfield'} && ($self->{'subfield'} ne ""))) { 341 395 $self->{'text'} .= $_[1]; 342 $self->{'text'} =~ s/[\n]| [" "]//g;396 $self->{'text'} =~ s/[\n]|([ ]{2,})//g; 343 397 if($self->{'text'} ne "") 344 398 { … … 352 406 { 353 407 my ($expat, $element, %attr) = @_; 408 354 409 $self->{'subfield'} = $element; 410 355 411 } 356 412 … … 364 420 sub error 365 421 { 366 my ($s trFunctionName,$strError) = @_;422 my ($self,$strFunctionName,$strError) = @_; 367 423 { 368 424 print "Error occoured in OAIDownload.pm\n".
Note:
See TracChangeset
for help on using the changeset viewer.