source: trunk/gsdl/perllib/downloaders/Z3950Download.pm@ 11783

Last change on this file since 11783 was 11783, checked in by kjdon, 18 years ago

Jefferey's new download modules

  • Property svn:keywords set to Author Date Id Revision
File size: 7.4 KB
Line 
1###########################################################################
2#
3# WebDownload.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package Z3950Download;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use strict;
35
36use BasDownload;
37use IPC::Open2;
38
39sub BEGIN {
40 @Z3950Download::ISA = ('BasDownload');
41}
42
43local (*YAZOUT, *YAZIN);
44
45my $arguments =
46 [ { 'name' => "host",
47 'disp' => "{Z3950Download.host_disp}",
48 'desc' => "{Z3950Download.host}",
49 'type' => "string",
50 'reqd' => "yes"},
51 { 'name' => "port",
52 'disp' => "{Z3950Download.port_disp}",
53 'desc' => "{Z3950Download.port}",
54 'type' => "string",
55 'reqd' => "yes"},
56 { 'name' => "database",
57 'disp' => "{Z3950Download.database_disp}",
58 'desc' => "{Z3950Download.database}",
59 'type' => "string",
60 'reqd' => "yes"},
61 { 'name' => "find",
62 'disp' => "{Z3950Download.find_disp}",
63 'desc' => "{Z3950Download.find}",
64 'type' => "string",
65 'deft' => "",
66 'reqd' => "yes"},
67 { 'name' => "max_records",
68 'disp' => "{Z3950Download.max_records_disp}",
69 'desc' => "{Z3950Download.max_records}",
70 'type' => "int",
71 'deft' => "500",
72 'reqd' => "no"}];
73
74my $options = { 'name' => "Z3950Download",
75 'desc' => "{Z3950Download.desc}",
76 'abstract' => "no",
77 'inherits' => "yes",
78 'args' => $arguments };
79
80
81sub new
82{
83 my ($class) = shift (@_);
84 my ($getlist,$inputargs,$hashArgOptLists) = @_;
85 push(@$getlist, $class);
86
87 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
88 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
89
90 my $self = (defined $hashArgOptLists)? new BasDownload($getlist,$inputargs,$hashArgOptLists): new BasDownload($getlist,$inputargs);
91
92 if ($self->{'info_only'}) {
93 # don't worry about any options etc
94 return bless $self, $class;
95 }
96
97 # Must set $self->{'url'}, since GLI use $self->{'url'} to calculate the log file name!
98 $self->{'url'} = $self->{'host'}.":".$self->{'port'};
99 return bless $self, $class;
100
101}
102
103sub download
104{
105 my ($self) = shift (@_);
106 my ($hashGeneralOptions) = @_;
107 my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
108 print STDERR "<<Defined Maximum>>\n";
109
110 my $url = $self->{'url'};
111 print STDERR "Opening connection to $url\n";
112
113 my $childpid = open2(*YAZOUT, *YAZIN, "yaz-client")
114 or die "can't open pipe to yaz-client: $!";
115
116 $strOpen = &run_command_with_output("open $url");
117 print STDERR "Access database: \"$self->{'database'}\"\n";
118 &run_command_without_output("base $self->{'database'}");
119 print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
120 $intAmount = &findAmount($self->{'find'});
121
122 if($intAmount <= 0)
123 {
124 ($intAmount == -1)?
125 print STDERR "Unexpected format, Parsing operation can not be performed\n" :
126 print STDERR "No Record is found\n";
127 return 0;
128 }
129 $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
130 print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
131 $strRecords = &getRecords($intMaxRecords);
132 print STDERR $strRecords;
133 &saveRecords($self,$strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
134 print STDERR "Closing connection...\n";
135 close(YAZOUT);
136 close(YAZIN);
137 waitpid($childpid, 0);
138 return 1;
139}
140
141sub findAmount
142{
143 my($strFindTarget) = @_;
144 my $strResponse = &run_command_with_output("find $strFindTarget");
145 return ($strResponse =~ m/^Number of hits: (\d+)/m)? $1:-1;
146}
147
148sub getRecords
149{
150 my ($intMaxRecords) = @_;
151 my ($strShow,$intStartNumber,$strResponse,$strRecords,$intRecordsLeft);
152
153 $intStartNumber = 1;
154 $intRecordsLeft = $intMaxRecords;
155 while ($intRecordsLeft > 0)
156 {
157 if($intRecordsLeft > 50)
158 {
159 print STDERR "<<Done:50>>\n";
160 print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+49)."\n";
161
162 $strShow = "show $intStartNumber+50";
163 $intStartNumber = $intStartNumber + 50;
164 $intRecordsLeft = $intRecordsLeft - 50;
165 }
166 else
167 {
168 print STDERR "<<Done:".($intRecordsLeft).">>\n";
169 print STDERR "Yaz is Gathering records: $intStartNumber - ".($intStartNumber+$intRecordsLeft-1)."\n";
170 $strShow = "show $intStartNumber+$intRecordsLeft";
171 $intRecordsLeft = 0;
172 }
173
174 $strResponse = &run_command_with_output($strShow);
175
176 if($strResponse =~ m/Records: (\d*?)\n(.*?)nextResultSetPosition = (\d*?)\n/s)
177 {
178 $strRecords .= $2;
179 }
180 }
181 return $strRecords;
182}
183
184sub saveRecords
185{
186 my ($self,$strRecords,$strOutputDir,$intMaxRecords) = @_;
187
188 # setup directories
189 # Currently only gather the MARC format
190 my $strFileName = &generateFileName($self,$intMaxRecords);
191 my $strFileURL = "$strOutputDir/$self->{'host'}/$strFileName.marc";
192 $strFileURL =~ s/:/\//g;
193
194 # prepare subdirectory for record (if needed)
195 my ($strSubDirPath,$unused) = dirFileSplit($strFileURL);
196 &util::mk_all_dir($strSubDirPath);
197
198 my $ds = &util::get_dirsep();
199 my $strOutputFile = &util::filename_cat($strOutputDir,$self->{'host'},"$strFileName.marc");
200 $strOutputFile =~ s/:/$ds/g;
201
202 print STDERR "Saving records to \"$strOutputFile\"\n";
203
204 # save record
205 open (ZOUT,">$strOutputFile")
206 || die "Unable to save oai metadata record: $!\n";
207 print ZOUT $strRecords;
208 close(ZOUT);
209}
210
211sub run_command_with_output
212{
213 my ($strCMD) = @_;
214
215 return &run_command($strCMD,"^Elapsed:.*\$");
216}
217
218sub run_command_without_output
219{
220 my ($strCMD) = @_;
221
222 &run_command($strCMD);
223}
224
225sub run_command
226{
227 my ($strCMD,$strStopRE) = @_;
228
229
230 print YAZIN "$strCMD\n";
231 if (!defined $strStopRE){return "";}
232 else
233 {
234 my $strFullOutput;
235 while (my $strLine = <YAZOUT>)
236 {
237 $strFullOutput .= $strLine;
238 if($strLine =~ m/$strStopRE/){return $strFullOutput;}
239 }
240 }
241}
242
243sub generateFileName
244{
245 my ($self,$intMaxRecords) = @_;
246 my $strFileName = ($self->{'database'})."_".($self->{'find'})."_".($intMaxRecords);
247}
248
249sub dirFileSplit
250{
251 my ($strFile) = @_;
252
253 my @aryDirs = split("/",$strFile);
254 my $strLocalFile = pop(@aryDirs);
255 my $strSubDirs = join("/",@aryDirs);
256
257 return ($strSubDirs,$strLocalFile);
258}
259
260sub error
261{
262 my ($strFunctionName,$strError) = @_;
263 {
264 print STDERR "Error occoured in Z3950Download.pm\n".
265 "In Function:".$strFunctionName."\n".
266 "Error Message:".$strError."\n";
267 exit(-1);
268 }
269}
270
2711;
Note: See TracBrowser for help on using the repository browser.