[11783] | 1 | ###########################################################################
|
---|
| 2 | #
|
---|
| 3 | # WebDownload.pm -- base class for all the import plugins
|
---|
| 4 | # A component of the Greenstone digital library software
|
---|
| 5 | # from the New Zealand Digital Library Project at the
|
---|
| 6 | # University of Waikato, New Zealand.
|
---|
| 7 | #
|
---|
| 8 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
| 9 | #
|
---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
---|
| 11 | # it under the terms of the GNU General Public License as published by
|
---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | # (at your option) any later version.
|
---|
| 14 | #
|
---|
| 15 | # This program is distributed in the hope that it will be useful,
|
---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | # GNU General Public License for more details.
|
---|
| 19 | #
|
---|
| 20 | # You should have received a copy of the GNU General Public License
|
---|
| 21 | # along with this program; if not, write to the Free Software
|
---|
| 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | #
|
---|
| 24 | ###########################################################################
|
---|
| 25 |
|
---|
| 26 | package OAIDownload;
|
---|
| 27 |
|
---|
| 28 | eval {require bytes};
|
---|
| 29 |
|
---|
| 30 | # suppress the annoying "subroutine redefined" warning that various
|
---|
| 31 | # plugins cause under perl 5.6
|
---|
| 32 | $SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
|
---|
| 33 |
|
---|
| 34 | use strict;
|
---|
| 35 |
|
---|
| 36 | use WgetDownload;
|
---|
| 37 | use XMLParser;
|
---|
| 38 |
|
---|
| 39 | use POSIX qw(tmpnam);
|
---|
[12465] | 40 | use util;
|
---|
[11783] | 41 |
|
---|
| 42 | sub BEGIN {
|
---|
| 43 | @OAIDownload::ISA = ('WgetDownload');
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | my $arguments =
|
---|
| 47 | [ { 'name' => "url",
|
---|
| 48 | 'disp' => "{OAIDownload.url_disp}",
|
---|
| 49 | 'desc' => "{OAIDownload.url}",
|
---|
| 50 | 'type' => "string",
|
---|
| 51 | 'reqd' => "yes"},
|
---|
[14941] | 52 | { 'name' => "metadata_prefix",
|
---|
| 53 | 'disp' => "{OAIDownload.metadata_prefix_disp}",
|
---|
| 54 | 'desc' => "{OAIDownload.metadata_prefix}",
|
---|
| 55 | 'type' => "string",
|
---|
| 56 | 'deft' => "oai_dc",
|
---|
| 57 | 'reqd' => "no"},
|
---|
[11783] | 58 | { 'name' => "set",
|
---|
| 59 | 'disp' => "{OAIDownload.set_disp}",
|
---|
| 60 | 'desc' => "{OAIDownload.set}",
|
---|
| 61 | 'type' => "string",
|
---|
| 62 | 'reqd' => "no"},
|
---|
| 63 | { 'name' => "get_doc",
|
---|
[12465] | 64 | 'disp' => "{OAIDownload.get_doc_disp}",
|
---|
[11783] | 65 | 'desc' => "{OAIDownload.get_doc}",
|
---|
| 66 | 'type' => "flag",
|
---|
| 67 | 'reqd' => "no"},
|
---|
| 68 | { 'name' => "max_records",
|
---|
| 69 | 'disp' => "{OAIDownload.max_records_disp}",
|
---|
| 70 | 'desc' => "{OAIDownload.max_records}",
|
---|
| 71 | 'type' => "int",
|
---|
| 72 | 'deft' => "500",
|
---|
| 73 | 'range' => "1,",
|
---|
| 74 | 'reqd' => "no"} ];
|
---|
| 75 |
|
---|
| 76 | my $options = { 'name' => "OAIDownload",
|
---|
| 77 | 'desc' => "{OAIDownload.desc}",
|
---|
| 78 | 'abstract' => "no",
|
---|
| 79 | 'inherits' => "yes",
|
---|
| 80 | 'args' => $arguments };
|
---|
| 81 |
|
---|
| 82 | my $self;
|
---|
| 83 |
|
---|
[12465] | 84 | my $strWgetOptions="";
|
---|
| 85 |
|
---|
[11783] | 86 | sub new
|
---|
| 87 | {
|
---|
| 88 | my ($class) = shift (@_);
|
---|
| 89 | my ($getlist,$inputargs,$hashArgOptLists) = @_;
|
---|
| 90 | push(@$getlist, $class);
|
---|
| 91 |
|
---|
| 92 | if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
|
---|
| 93 | if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
|
---|
| 94 |
|
---|
| 95 | $self = (defined $hashArgOptLists)? new WgetDownload($getlist,$inputargs,$hashArgOptLists): new WgetDownload($getlist,$inputargs);
|
---|
| 96 |
|
---|
| 97 | if ($self->{'info_only'}) {
|
---|
| 98 | # don't worry about any options etc
|
---|
| 99 | return bless $self, $class;
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | my $parser = new XML::Parser('Style' => 'Stream',
|
---|
| 103 | 'Handlers' => {'Char' => \&Char,
|
---|
| 104 | 'Start' => \&OAI_StartTag,
|
---|
| 105 | 'End' => \&OAI_EndTag
|
---|
| 106 | });
|
---|
| 107 | $self->{'parser'} = $parser;
|
---|
| 108 |
|
---|
[13961] | 109 | # make sure the tmp directory that we will use later exists
|
---|
| 110 | my $tmp_dir = "$ENV{GSDLHOME}/tmp";
|
---|
| 111 | if (! -e $tmp_dir) {
|
---|
| 112 | &util::mk_dir($tmp_dir);
|
---|
| 113 | }
|
---|
| 114 |
|
---|
[11783] | 115 | return bless $self, $class;
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | sub download
|
---|
| 119 | {
|
---|
| 120 | my ($self) = shift (@_);
|
---|
| 121 | my ($hashGeneralOptions) = @_;
|
---|
| 122 |
|
---|
[14948] | 123 | ## print STDERR "here2";
|
---|
[12465] | 124 |
|
---|
| 125 | $strWgetOptions = $self->getWgetOptions();
|
---|
| 126 | my $cmdWget = $strWgetOptions;
|
---|
| 127 |
|
---|
| 128 | my $strOutputDir ="";
|
---|
| 129 | $strOutputDir = $hashGeneralOptions->{"cache_dir"};
|
---|
[11783] | 130 | my $strBasURL = $self->{'url'};
|
---|
| 131 | my $intMaxRecords = $self->{'max_records'};
|
---|
| 132 | my $blnDownloadDoc = $self->{'get_doc'};
|
---|
| 133 |
|
---|
| 134 | print STDERR "<<Defined Maximum>>\n";
|
---|
[12465] | 135 |
|
---|
| 136 | my $strIDs = $self->getOAIIDs($strBasURL);
|
---|
| 137 |
|
---|
| 138 | if($strIDs eq "")
|
---|
[11783] | 139 | {
|
---|
| 140 | print STDERR "Error: No ID being found\n";
|
---|
| 141 | return 0;
|
---|
| 142 | }
|
---|
[12465] | 143 | my $aryIDs = $self->parseOAIIDs($strIDs);
|
---|
[11783] | 144 | my $intIDs = 0;
|
---|
| 145 | if($self->{'max_records'} < scalar(@$aryIDs))
|
---|
| 146 | {
|
---|
| 147 | $intIDs = $self->{'max_records'};
|
---|
| 148 | }
|
---|
| 149 | else
|
---|
| 150 | {
|
---|
| 151 | $intIDs = scalar(@$aryIDs);
|
---|
| 152 | }
|
---|
| 153 | print STDERR "<<Total number of record(s):$intIDs>>\n";
|
---|
| 154 |
|
---|
[12465] | 155 | $self->getOAIRecords($aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc);
|
---|
[11783] | 156 |
|
---|
[12465] | 157 | my $tmp_file = "$ENV{GSDLHOME}/tmp/oai.tmp";
|
---|
| 158 | &util::rm($tmp_file);
|
---|
| 159 |
|
---|
[11783] | 160 | return 1;
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | sub getOAIIDs
|
---|
| 164 | {
|
---|
| 165 | my ($self,$strBasURL) = @_;
|
---|
| 166 | my ($cmdWget);
|
---|
[12465] | 167 |
|
---|
| 168 | my $wgetOptions = $self->getWgetOptions();
|
---|
| 169 |
|
---|
| 170 | $cmdWget = $wgetOptions;
|
---|
| 171 |
|
---|
[11783] | 172 | print STDERR "Gathering OAI identifiers.....\n";
|
---|
[12465] | 173 |
|
---|
[14941] | 174 | my $metadata_prefix = $self->{'metadata_prefix'};
|
---|
| 175 | $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&metadataPrefix=$metadata_prefix";
|
---|
[12465] | 176 |
|
---|
[14941] | 177 | # if $set specified, add it in to URL
|
---|
| 178 | my $set = $self->{'set'};
|
---|
| 179 | $cmdWget .= "&set=$set" if ($set ne "");
|
---|
| 180 |
|
---|
| 181 | $cmdWget .= "\" ";
|
---|
| 182 |
|
---|
[14948] | 183 | my $accumulated_strIDs = "";
|
---|
[12465] | 184 | my $strIDs = $self->useWget($cmdWget);
|
---|
| 185 |
|
---|
| 186 | if (!defined $strIDs or $strIDs eq "" ){
|
---|
| 187 | print STDERR "Server information is unavailable.\n";
|
---|
| 188 | print STDERR "<<Finished>>\n";
|
---|
| 189 | return;
|
---|
| 190 | }
|
---|
| 191 |
|
---|
| 192 | print STDERR "<<Download Information>>\n";
|
---|
[14948] | 193 |
|
---|
[12465] | 194 | $self->parse_xml($strIDs);
|
---|
| 195 |
|
---|
[14948] | 196 | $accumulated_strIDs = $strIDs;
|
---|
| 197 |
|
---|
| 198 | while ($strIDs =~ m/<resumptionToken.*?>(.*?)<\/resumptionToken>/s) {
|
---|
| 199 | # top up list with further requests for IDs
|
---|
| 200 |
|
---|
| 201 | my $resumption_token = $1;
|
---|
| 202 |
|
---|
| 203 | $cmdWget = $wgetOptions;
|
---|
| 204 |
|
---|
| 205 | $cmdWget .= " -q -O - \"$strBasURL?verb=ListIdentifiers&resumptionToken=$resumption_token\"";
|
---|
| 206 |
|
---|
| 207 | $strIDs = $self->useWget($cmdWget);
|
---|
| 208 |
|
---|
| 209 | $self->parse_xml($strIDs);
|
---|
| 210 |
|
---|
| 211 | $accumulated_strIDs .= $strIDs;
|
---|
| 212 |
|
---|
| 213 | my @accumulated_identifiers
|
---|
| 214 | = ($accumulated_strIDs =~ m/<identifier>(.*?)<\/identifier>/sg);
|
---|
| 215 |
|
---|
| 216 | my $num_acc_identifiers = scalar(@accumulated_identifiers);
|
---|
| 217 | if ($num_acc_identifiers > $self->{'max_records'}) {
|
---|
| 218 | last;
|
---|
| 219 | }
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 | return $accumulated_strIDs;
|
---|
[11783] | 223 | }
|
---|
| 224 |
|
---|
| 225 | sub parseOAIIDs
|
---|
| 226 | {
|
---|
[12465] | 227 | my ($self,$strIDs) = @_;
|
---|
[11783] | 228 |
|
---|
| 229 | print STDERR "Parsing OAI identifiers.....\n";
|
---|
| 230 | $strIDs =~ s/^.*?<identifier>/<identifier>/s;
|
---|
| 231 | $strIDs =~ s/^(.*<\/identifier>).*$/$1/s;
|
---|
| 232 |
|
---|
| 233 | my @aryIDs = ();
|
---|
| 234 |
|
---|
| 235 | while ($strIDs =~ m/<identifier>(.*?)<\/identifier>(.*)$/s)
|
---|
| 236 | {
|
---|
| 237 | $strIDs = $2;
|
---|
| 238 | push(@aryIDs,$1);
|
---|
| 239 | }
|
---|
| 240 |
|
---|
| 241 | return \@aryIDs;
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | sub dirFileSplit
|
---|
| 245 | {
|
---|
[12465] | 246 | my ($self,$strFile) = @_;
|
---|
[11783] | 247 |
|
---|
[12465] | 248 | my @aryDirs = split("[/\]",$strFile);
|
---|
| 249 |
|
---|
[11783] | 250 | my $strLocalFile = pop(@aryDirs);
|
---|
| 251 | my $strSubDirs = join("/",@aryDirs);
|
---|
| 252 |
|
---|
| 253 | return ($strSubDirs,$strLocalFile);
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 | sub getOAIDoc
|
---|
| 257 | {
|
---|
[12465] | 258 | my ($self,$strRecord, $strSubDirPath) = @_;
|
---|
| 259 |
|
---|
[11783] | 260 | print STDERR "Gathering source documents.....\n";
|
---|
| 261 | # look out for identifier tag in metadata section
|
---|
[12465] | 262 |
|
---|
[11783] | 263 | if ($strRecord =~ m/<metadata>(.*)<\/metadata>/s)
|
---|
| 264 | {
|
---|
| 265 | my $strMetaTag = $1;
|
---|
| 266 |
|
---|
| 267 | if ($strMetaTag =~ m/<(dc:)?identifier>(.*?)<\/(dc:)?identifier>/s)
|
---|
| 268 | {
|
---|
| 269 | my $strDocURL = $2;
|
---|
| 270 |
|
---|
[12465] | 271 | my ($unused,$strDocFile) = $self->dirFileSplit($strDocURL);
|
---|
[11783] | 272 |
|
---|
[12465] | 273 | my $strSoureDirPath ="";
|
---|
| 274 |
|
---|
| 275 | $strSoureDirPath = &util::filename_cat($strSubDirPath,"srcdocs");
|
---|
| 276 |
|
---|
[11783] | 277 | &util::mk_dir($strSoureDirPath) if (!-e "$strSoureDirPath");
|
---|
| 278 |
|
---|
| 279 | my $strFullDocFilePath = &util::filename_cat($strSoureDirPath,$strDocFile);
|
---|
| 280 |
|
---|
[14179] | 281 | my $wget_cmd = $strWgetOptions." -q -O \"$strFullDocFilePath\" \"$strDocURL\"";
|
---|
[11783] | 282 |
|
---|
[12465] | 283 | my $strResponse = $self->useWget($wget_cmd,1);
|
---|
[11783] | 284 |
|
---|
| 285 | if($strResponse ne "")
|
---|
| 286 | {
|
---|
| 287 | print STDERR "Error occured while retriving OAI souce documents: $strResponse\n";
|
---|
| 288 | exit(-1);
|
---|
| 289 | }
|
---|
| 290 |
|
---|
| 291 | $strRecord =~ s/<metadata>(.*?)<(dc:)?identifier>$strDocURL<\/(dc:)?identifier>(.*?)<\/metadata>/<metadata>$1<OrigURL>$strDocURL<\/OrigURL>\n <identifier>srcdocs\/$strDocFile<\/identifier>$4<\/metadata>/s;
|
---|
| 292 | }
|
---|
| 293 | else
|
---|
| 294 | {
|
---|
| 295 | print STDERR "\tNo souce document URL is specified in the OAI record (No (dc:)?identifier is provided)\n";
|
---|
| 296 | }
|
---|
| 297 | }
|
---|
| 298 | else
|
---|
| 299 | {
|
---|
| 300 | print STDERR "\tNo souce document URL is specified in the OAI record (No metadata field is provided)\n";
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | }
|
---|
| 304 |
|
---|
| 305 | sub getOAIRecords
|
---|
| 306 | {
|
---|
[12465] | 307 | my ($self,$aryIDs, $strOutputDir, $strBasURL, $intMaxRecords, $blnDownloadDoc) = @_;
|
---|
[11783] | 308 |
|
---|
| 309 | my $intDocCounter = 0;
|
---|
| 310 |
|
---|
[14941] | 311 | my $metadata_prefix = $self->{'metadata_prefix'};
|
---|
| 312 |
|
---|
[11783] | 313 | foreach my $strID ( @$aryIDs)
|
---|
| 314 | {
|
---|
| 315 | print STDERR "Gathering OAI record with ID:$strID.....\n";
|
---|
[12465] | 316 |
|
---|
[14941] | 317 | my $cmdWget= $strWgetOptions." -q -O - \"$strBasURL?verb=GetRecord&metadataPrefix=$metadata_prefix&identifier=$strID\"";
|
---|
[11783] | 318 |
|
---|
[12465] | 319 | my $strRecord = $self->useWget($cmdWget);
|
---|
| 320 |
|
---|
| 321 |
|
---|
| 322 | my @fileDirs = split(":",$strID);
|
---|
| 323 |
|
---|
[11783] | 324 | # setup directories
|
---|
| 325 |
|
---|
[12580] | 326 | $strOutputDir =~ s/"//g; #"
|
---|
[13065] | 327 |
|
---|
| 328 | my $host =$self->{'url'};
|
---|
[12465] | 329 |
|
---|
[13065] | 330 | $host =~ s/http:\/\///g;
|
---|
| 331 |
|
---|
| 332 | $host =~ s/:.*//g;
|
---|
| 333 |
|
---|
[14179] | 334 | my $midDir = join ("/",@fileDirs);
|
---|
| 335 | my $strFileURL = "$strOutputDir/$host/".$midDir.".oai";
|
---|
| 336 |
|
---|
[11783] | 337 | # prepare subdirectory for record (if needed)
|
---|
[12465] | 338 | my ($strSubDirPath,$unused) = ("", "");
|
---|
| 339 |
|
---|
| 340 | ($strSubDirPath,$unused) = $self->dirFileSplit($strFileURL);
|
---|
| 341 |
|
---|
[11783] | 342 | &util::mk_all_dir($strSubDirPath);
|
---|
| 343 |
|
---|
| 344 | my $ds = &util::get_dirsep();
|
---|
[12465] | 345 |
|
---|
[11783] | 346 | if($blnDownloadDoc)
|
---|
| 347 | {
|
---|
[12465] | 348 | $self->getOAIDoc($strRecord,$strSubDirPath);
|
---|
[11783] | 349 | }
|
---|
| 350 |
|
---|
| 351 | # save record
|
---|
[12465] | 352 | open (OAIOUT,">$strFileURL")
|
---|
[11783] | 353 | || die "Unable to save oai metadata record: $!\n";
|
---|
| 354 | print OAIOUT $strRecord;
|
---|
| 355 | close(OAIOUT);
|
---|
| 356 |
|
---|
[12465] | 357 | print STDERR "Saving records to $strFileURL\n";
|
---|
| 358 | print STDERR "<<Done>>\n";
|
---|
| 359 | $intDocCounter ++;
|
---|
[11783] | 360 | last if ($intDocCounter >= $intMaxRecords);
|
---|
| 361 | }
|
---|
[12465] | 362 |
|
---|
[11783] | 363 | ($intDocCounter >= $intMaxRecords) ?
|
---|
[14926] | 364 | print STDERR "Reached maximum download records, use -max_records to set the maximum.\n":
|
---|
[11783] | 365 | print STDERR "Complete download meta record from $strBasURL\n";
|
---|
| 366 |
|
---|
[12465] | 367 | print STDERR "<<Finished>>\n";
|
---|
[11783] | 368 | }
|
---|
| 369 |
|
---|
| 370 | sub url_information
|
---|
| 371 | {
|
---|
| 372 | my ($self) = shift (@_);
|
---|
| 373 | if(!defined $self){ die "System Error: No \$self defined for url_information in OAIDownload\n";}
|
---|
| 374 |
|
---|
[12465] | 375 | my $wgetOptions = $self->getWgetOptions();
|
---|
| 376 | my $strBaseCMD = $wgetOptions." -q -O - \"$self->{'url'}?_OPTS_\"";
|
---|
[11783] | 377 |
|
---|
| 378 | my $strIdentify = "verb=Identify";
|
---|
| 379 | my $strListSets = "verb=ListSets";
|
---|
| 380 |
|
---|
| 381 | my $strIdentifyCMD = $strBaseCMD;
|
---|
| 382 | $strIdentifyCMD =~ s/_OPTS_/$strIdentify/;
|
---|
| 383 |
|
---|
[12465] | 384 | my $strIdentifyText = $self->useWget($strIdentifyCMD);
|
---|
[11783] | 385 |
|
---|
[12465] | 386 | if (!defined $strIdentifyText or $strIdentifyText eq "" ){
|
---|
| 387 | print STDERR "Server information is unavailable.\n";
|
---|
| 388 | print STDERR "<<Finished>>\n";
|
---|
| 389 | return;
|
---|
| 390 | }
|
---|
| 391 |
|
---|
[11783] | 392 | print STDERR "General information:\n";
|
---|
| 393 | $self->parse_xml($strIdentifyText);
|
---|
| 394 |
|
---|
| 395 | my $strListSetCMD = $strBaseCMD;
|
---|
| 396 | $strListSetCMD =~ s/_OPTS_/$strListSets/;
|
---|
[12465] | 397 | my $strListSetsText = $self->useWget($strListSetCMD);
|
---|
| 398 |
|
---|
| 399 |
|
---|
[11783] | 400 | print STDERR "List Information:\n";
|
---|
| 401 | $self->parse_xml($strListSetsText);
|
---|
| 402 | }
|
---|
| 403 |
|
---|
| 404 | sub parse_xml
|
---|
| 405 | {
|
---|
| 406 | my ($self) = shift (@_);
|
---|
| 407 | my ($strOutputText) = @_;
|
---|
[12465] | 408 |
|
---|
[11783] | 409 | #Open a temporary file to store OAI information, and store the information to the temp file
|
---|
[12465] | 410 | my $name = "$ENV{GSDLHOME}/tmp/oai.tmp";
|
---|
[11783] | 411 |
|
---|
[12465] | 412 | open(*OAIOUT,"> $name");
|
---|
| 413 |
|
---|
| 414 | print OAIOUT $strOutputText;
|
---|
| 415 | close(OAIOUT);
|
---|
| 416 |
|
---|
[11783] | 417 | $self->{'temp_file_name'} = $name;
|
---|
| 418 |
|
---|
| 419 | eval {
|
---|
| 420 | $self->{'parser'}->parsefile("$name");
|
---|
| 421 | };
|
---|
| 422 |
|
---|
| 423 | if ($@) {
|
---|
| 424 | die "OAI: $name is not a well formed XML file ($@)\n";
|
---|
| 425 | }
|
---|
| 426 | }
|
---|
| 427 |
|
---|
| 428 | END{
|
---|
| 429 | if($self->{'info'})
|
---|
| 430 | {
|
---|
| 431 | unlink($self->{'temp_file_name'}) or die "Could not unlink $self->{'temp_file_name'}: $!";
|
---|
| 432 | }
|
---|
| 433 | }
|
---|
| 434 |
|
---|
| 435 | # This Char function overrides the one in XML::Parser::Stream to overcome a
|
---|
| 436 | # problem where $expat->{Text} is treated as the return value, slowing
|
---|
| 437 | # things down significantly in some cases.
|
---|
| 438 | sub Char {
|
---|
| 439 | use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+
|
---|
| 440 | $_[0]->{'Text'} .= $_[1];
|
---|
| 441 | if ((defined $self->{'subfield'} && ($self->{'subfield'} ne ""))) {
|
---|
| 442 | $self->{'text'} .= $_[1];
|
---|
[12465] | 443 | $self->{'text'} =~ s/[\n]|([ ]{2,})//g;
|
---|
[11783] | 444 | if($self->{'text'} ne "")
|
---|
| 445 | {
|
---|
| 446 | print STDERR " $self->{'subfield'}:($self->{'text'})\n";
|
---|
| 447 | }
|
---|
| 448 | }
|
---|
| 449 | return undef;
|
---|
| 450 | }
|
---|
| 451 |
|
---|
| 452 | sub OAI_StartTag
|
---|
| 453 | {
|
---|
| 454 | my ($expat, $element, %attr) = @_;
|
---|
[12465] | 455 |
|
---|
[11783] | 456 | $self->{'subfield'} = $element;
|
---|
[12465] | 457 |
|
---|
[11783] | 458 | }
|
---|
| 459 |
|
---|
| 460 | sub OAI_EndTag
|
---|
| 461 | {
|
---|
| 462 | my ($expat, $element) = @_;
|
---|
| 463 | $self->{'text'} = "";
|
---|
| 464 | $self->{'subfield'} = "";
|
---|
| 465 | }
|
---|
| 466 |
|
---|
| 467 | sub error
|
---|
| 468 | {
|
---|
[12465] | 469 | my ($self,$strFunctionName,$strError) = @_;
|
---|
[11783] | 470 | {
|
---|
| 471 | print "Error occoured in OAIDownload.pm\n".
|
---|
| 472 | "In Function:".$strFunctionName."\n".
|
---|
| 473 | "Error Message:".$strError."\n";
|
---|
| 474 | exit(-1);
|
---|
| 475 | }
|
---|
| 476 | }
|
---|
| 477 |
|
---|
| 478 |
|
---|
| 479 |
|
---|
| 480 | 1;
|
---|