source: gsdl/trunk/perllib/downloaders/SRWDownload.pm@ 17207

Last change on this file since 17207 was 17207, checked in by kjdon, 16 years ago

BasDownload renamed to BaseDownload, also tidied up the constructors

  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1###########################################################################
2#
3# SRWDownload.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package SRWDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use strict;
35
36use Z3950Download;
37use IPC::Open2;
38
39sub BEGIN {
40 @SRWDownload::ISA = ('Z3950Download');
41}
42
43my $arguments = [
44 ];
45
46my $options = { 'name' => "SRWDownload",
47 'desc' => "{SRWDownload.desc}",
48 'abstract' => "no",
49 'inherits' => "yes"
50 };
51
52
53sub new
54{
55 my ($class) = shift (@_);
56 my ($getlist,$inputargs,$hashArgOptLists) = @_;
57 push(@$getlist, $class);
58
59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
60 push(@{$hashArgOptLists->{"OptList"}},$options);
61
62 my $self = new Z3950Download($getlist,$inputargs,$hashArgOptLists);
63
64 if ($self->{'info_only'}) {
65 # don't worry about any options etc
66 return bless $self, $class;
67 }
68
69 # Must set $self->{'url'}, since GLI use $self->{'url'} to calculate the log file name!
70 $self->{'url'} = $self->{'host'}.":".$self->{'port'};
71 return bless $self, $class;
72}
73
74sub download
75{
76 my ($self) = shift (@_);
77 my ($hashGeneralOptions) = @_;
78 my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
79
80 my $url = $self->{'url'};
81
82 print STDERR "<<Defined Maximum>>\n";
83
84 my $yaz = $self->{'yaz'};
85
86 my $childpid = open2(*YAZOUT, *YAZIN, $yaz)
87 or (print STDERR "<<Finished>>\n" and die "can't open pipe to yaz-client: $!");
88
89 $self->{'YAZOUT'} = *YAZOUT;
90 $self->{'YAZIN'} = *YAZIN;
91
92 $strOpen = $self->open_connection("open $url");
93
94 if (!$strOpen) {
95 print STDERR "Cannot connect to $url\n";
96 print STDERR "<<Finished>>\n";
97 return 0;
98 }
99
100 print STDERR "Opening connection to \"$self->{'url'}\"\n";
101 print STDERR "Access database: \"$self->{'database'}\"\n";
102 $self->run_command_without_output("base $self->{'database'}");
103 $self->run_command_without_output("querytype prefix");
104 print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
105 $intAmount =$self->findAmount($self->{'find'});
106
107 if($intAmount <= 0)
108 {
109 ($intAmount == -1)?
110 print STDERR "Something wrong with the arguments,downloading can not be performed\n" :
111 print STDERR "No Record is found\n";
112 print STDERR "<<Finished>>\n";
113 return 0;
114 }
115 $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
116 print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
117
118 $strRecords = $self->getRecords($intMaxRecords);
119
120 $self->saveRecords($strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
121 print STDERR "Closing connection...\n";
122 print STDERR "<<Finished>>\n";
123 close(YAZOUT);
124 close(YAZIN);
125 return 1;
126}
127
128
129sub saveRecords
130{
131 my ($self,$strRecords,$strOutputDir,$intMaxRecords) = @_;
132
133 # setup directories
134 # Currently only gather the MARC format
135 $strRecords ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<collection>$strRecords</collection>";
136 my $strFileName = $self->generateFileName($intMaxRecords);
137 my $host = $self->{'host'};
138 $host =~ s/http:\/\///;
139 $strOutputDir =~ s/"//g; #"
140 my $strOutputFile = &util::filename_cat($strOutputDir,$host,"$strFileName.xml");
141
142 # prepare subdirectory for record (if needed)
143
144 my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
145 &util::mk_all_dir($strSubDirPath);
146
147 print STDERR "Saving records to \"$strOutputFile\"\n";
148
149 # save record
150 open (ZOUT,">$strOutputFile")
151 || die "Unable to save oai metadata record: $!\n";
152 print ZOUT $strRecords;
153 close(ZOUT);
154}
155
156sub get{
157 my ($self,$strShow,$numRecord) = @_;
158
159 $self->run_command($strShow);
160
161 my $strFullOutput="";
162 my $count=0;
163 my $readRecord = 0;
164
165 while (my $strLine = <YAZOUT>)
166 {
167
168 return $strFullOutput if ($count >= $numRecord);
169
170 return $strFullOutput if($strLine =~ m/^HTTP ERROR/i);
171
172 if ($strLine =~ m/pos=[\d]*/i ){
173 $count++;
174 $readRecord = 1;
175 next;
176 }
177
178 next if(!$readRecord);
179
180 $strFullOutput .= $strLine;
181 }
182
183}
184
185sub url_information{
186 my ($self) = @_;
187
188 my $url = $self->{'url'};
189
190 $url =~ s#http://##;
191
192 return $self->SUPER::url_information($url);
193
194}
195
196sub error
197{
198 my ($self, $strFunctionName,$strError) = @_;
199 {
200 print STDERR "Error occoured in SRWDownload.pm\n".
201 "In Function:".$strFunctionName."\n".
202 "Error Message:".$strError."\n";
203 exit(-1);
204 }
205}
206
2071;
Note: See TracBrowser for help on using the repository browser.