source: gs2-extensions/parallel-building/trunk/src/perllib/downloaders/SRWDownload.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 5.7 KB
Line 
1###########################################################################
2#
3# SRWDownload.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package SRWDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use strict;
35
36use Z3950Download;
37use IPC::Open2;
38
39sub BEGIN {
40 @SRWDownload::ISA = ('Z3950Download');
41}
42
43my $arguments = [
44 ];
45
46my $options = { 'name' => "SRWDownload",
47 'desc' => "{SRWDownload.desc}",
48 'abstract' => "no",
49 'inherits' => "yes"
50 };
51
52
53sub new
54{
55 my ($class) = shift (@_);
56 my ($getlist,$inputargs,$hashArgOptLists) = @_;
57 push(@$getlist, $class);
58
59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
60 push(@{$hashArgOptLists->{"OptList"}},$options);
61
62 my $self = new Z3950Download($getlist,$inputargs,$hashArgOptLists);
63
64 if ($self->{'info_only'}) {
65 # don't worry about any options etc
66 return bless $self, $class;
67 }
68
69 # Must set $self->{'url'}, since GLI use $self->{'url'} to calculate the log file name!
70 $self->{'url'} = $self->{'host'}.":".$self->{'port'};
71 return bless $self, $class;
72}
73
74sub download
75{
76 my ($self) = shift (@_);
77 my ($hashGeneralOptions) = @_;
78 my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
79
80 # If the url contains just the host and port (as it would for Z39.50), then prepend
81 # the http protocol. Otherwise the download is stuck in an infinite loop for SRW/SRU
82 $self->{'url'} = "http://$self->{'url'}" if $self->{'url'} !~ m/^http/;
83 my $url = $self->{'url'};
84
85 print STDERR "<<Defined Maximum>>\n";
86
87 $strOpen = $self->start_yaz($url);
88
89 print STDERR "Opening connection to \"$self->{'url'}\"\n";
90 print STDERR "Access database: \"$self->{'database'}\"\n";
91 $self->run_command_without_output("base $self->{'database'}");
92 $self->run_command_without_output("querytype prefix");
93 print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
94
95 $intAmount =$self->findAmount($self->{'find'});
96
97 if($intAmount <= 0)
98 {
99 ($intAmount == -1)?
100 print STDERR "Something wrong with the arguments,downloading can not be performed\n" :
101 print STDERR "No Record is found\n";
102 print STDERR "<<Finished>>\n";
103 return 0;
104 }
105 $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
106 print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
107
108 $strRecords = $self->getRecords($intMaxRecords);
109
110 $self->saveRecords($strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
111 print STDERR "Closing connection...\n";
112
113 $self->quit_yaz();
114 return 1;
115}
116
117
118sub saveRecords
119{
120 my ($self,$strRecords,$strOutputDir,$intMaxRecords) = @_;
121
122 # setup directories
123 # Currently only gather the MARC format
124 $strRecords ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<collection>$strRecords</collection>";
125 my $strFileName = $self->generateFileName($intMaxRecords);
126 my $host = $self->{'host'};
127 $host =~ s/http:\/\///;
128 $strOutputDir =~ s/"//g; #"
129 my $strOutputFile = &util::filename_cat($strOutputDir,$host,"$strFileName.xml");
130
131 # prepare subdirectory for record (if needed)
132
133 my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
134 &util::mk_all_dir($strSubDirPath);
135
136 print STDERR "Saving records to \"$strOutputFile\"\n";
137
138 # save record
139 open (ZOUT,">$strOutputFile")
140 || die "Unable to save oai metadata record: $!\n";
141 print ZOUT $strRecords;
142 close(ZOUT);
143}
144
145sub get {
146 my ($self,$strShow,$numRecord) = @_;
147
148 $self->run_command_without_output($strShow);
149
150 my $strFullOutput="";
151 my $count=0;
152 my $readRecord = 0;
153 my $endRecord = 0;
154
155 my $output = $self->{'YAZOUT'};
156 my $strLine;
157
158 while ($strLine = <$output>) #while (defined ($strLine = <$output>))
159 {
160 last if ($count >= $numRecord && $endRecord); # done, if we've reached the end of the last record
161
162 last if($strLine =~ m/^HTTP ERROR/i);
163
164 if ($strLine =~ m/pos=[\d]*/i ) {
165 $count++;
166 $readRecord = 1;
167 $endRecord = 0;
168 next;
169 }
170
171 if ($strLine =~ m/<\/record>/i ) { # end tag of record
172 $endRecord = 1;
173 }
174
175 next if(!$readRecord);
176
177 $strFullOutput .= $strLine;
178 }
179
180 return $strFullOutput;
181}
182
183sub url_information{
184 my ($self) = @_;
185
186 my $url = $self->{'url'};
187
188 $url =~ s#http://##;
189
190 return $self->SUPER::url_information($url);
191
192}
193
194sub error
195{
196 my ($self, $strFunctionName,$strError) = @_;
197 {
198 print STDERR "Error occoured in SRWDownload.pm\n".
199 "In Function:".$strFunctionName."\n".
200 "Error Message:".$strError."\n";
201 exit(-1);
202 }
203}
204
2051;
Note: See TracBrowser for help on using the repository browser.