source: trunk/gsdl/perllib/downloaders/SRWDownload.pm@ 12902

Last change on this file since 12902 was 12902, checked in by kjdon, 18 years ago

tidied up saveRecords a bit. SRWDownload now uses filename_cat instead of foreward slashes.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1###########################################################################
2#
3# SRWDownload.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package SRWDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use strict;
35
36use Z3950Download;
37use IPC::Open2;
38
39sub BEGIN {
40 @SRWDownload::ISA = ('Z3950Download');
41}
42
43my $arguments;
44
45my $options = { 'name' => "SRWDownload",
46 'desc' => "{SRWDownload.desc}",
47 'abstract' => "no",
48 'inherits' => "yes"
49 };
50
51
52sub new
53{
54 my ($class) = shift (@_);
55 my ($getlist,$inputargs,$hashArgOptLists) = @_;
56 push(@$getlist, $class);
57
58 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
59 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
60
61 my $self = (defined $hashArgOptLists)? new Z3950Download($getlist,$inputargs,$hashArgOptLists): new Z3950Download($getlist,$inputargs);
62
63 if ($self->{'info_only'}) {
64 # don't worry about any options etc
65 return bless $self, $class;
66 }
67
68 # Must set $self->{'url'}, since GLI use $self->{'url'} to calculate the log file name!
69 $self->{'url'} = $self->{'host'}.":".$self->{'port'};
70 return bless $self, $class;
71}
72
73sub download
74{
75 my ($self) = shift (@_);
76 my ($hashGeneralOptions) = @_;
77 my ($strOpen,$strBase,$strFind,$strResponse,$intAmount,$intMaxRecords,$strRecords);
78
79 my $url = $self->{'url'};
80
81 print STDERR "<<Defined Maximum>>\n";
82
83 my $yaz = $self->{'yaz'};
84
85 my $childpid = open2(*YAZOUT, *YAZIN, $yaz)
86 or (print STDERR "<<Finished>>\n" and die "can't open pipe to yaz-client: $!");
87
88 $self->{'YAZOUT'} = *YAZOUT;
89 $self->{'YAZIN'} = *YAZIN;
90
91 $strOpen = $self->open_connection("open $url");
92
93 if (!$strOpen) {
94 print STDERR "Cannot connect to $url\n";
95 print STDERR "<<Finished>>\n";
96 return 0;
97 }
98
99 print STDERR "Opening connection to \"$self->{'url'}\"\n";
100 print STDERR "Access database: \"$self->{'database'}\"\n";
101 $self->run_command_without_output("base $self->{'database'}");
102 $self->run_command_without_output("querytype prefix");
103 print STDERR "Searching for keyword: \"$self->{'find'}\"\n";
104 $intAmount =$self->findAmount($self->{'find'});
105
106 if($intAmount <= 0)
107 {
108 ($intAmount == -1)?
109 print STDERR "Something wrong with the arguments,downloading can not be performed\n" :
110 print STDERR "No Record is found\n";
111 print STDERR "<<Finished>>\n";
112 return 0;
113 }
114 $intMaxRecords = ($self->{'max_records'} > $intAmount)? $intAmount : $self->{'max_records'};
115 print STDERR "<<Total number of record(s):$intMaxRecords>>\n";
116
117 $strRecords = $self->getRecords($intMaxRecords);
118
119 $self->saveRecords($strRecords,$hashGeneralOptions->{'cache_dir'},$intMaxRecords);
120 print STDERR "Closing connection...\n";
121 print STDERR "<<Finished>>\n";
122 close(YAZOUT);
123 close(YAZIN);
124 return 1;
125}
126
127
128sub saveRecords
129{
130 my ($self,$strRecords,$strOutputDir,$intMaxRecords) = @_;
131
132 # setup directories
133 # Currently only gather the MARC format
134 $strRecords ="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<collection>$strRecords</collection>";
135 my $strFileName = $self->generateFileName($intMaxRecords);
136 my $host = $self->{'host'};
137 $host =~ s/http:\/\///;
138 $strOutputDir =~ s/"//g; #"
139 my $strOutputFile = &util::filename_cat($strOutputDir,$host,"$strFileName.xml");
140
141 # prepare subdirectory for record (if needed)
142
143 my ($strSubDirPath,$unused) = $self->dirFileSplit($strOutputFile);
144 &util::mk_all_dir($strSubDirPath);
145
146 print STDERR "Saving records to \"$strOutputFile\"\n";
147
148 # save record
149 open (ZOUT,">$strOutputFile")
150 || die "Unable to save oai metadata record: $!\n";
151 print ZOUT $strRecords;
152 close(ZOUT);
153}
154
155sub get{
156 my ($self,$strShow,$numRecord) = @_;
157
158 $self->run_command($strShow);
159
160 my $strFullOutput="";
161 my $count=0;
162 my $readRecord = 0;
163
164 while (my $strLine = <YAZOUT>)
165 {
166
167 return $strFullOutput if ($count >= $numRecord);
168
169 return $strFullOutput if($strLine =~ m/^HTTP ERROR/i);
170
171 if ($strLine =~ m/pos=[\d]*/i ){
172 $count++;
173 $readRecord = 1;
174 next;
175 }
176
177 next if(!$readRecord);
178
179 $strFullOutput .= $strLine;
180 }
181
182}
183
184sub url_information{
185 my ($self) = @_;
186
187 my $url = $self->{'url'};
188
189 $url =~ s#http://##;
190
191 return $self->SUPER::url_information($url);
192
193}
194
195sub error
196{
197 my ($self, $strFunctionName,$strError) = @_;
198 {
199 print STDERR "Error occoured in SRWDownload.pm\n".
200 "In Function:".$strFunctionName."\n".
201 "Error Message:".$strError."\n";
202 exit(-1);
203 }
204}
205
2061;
Note: See TracBrowser for help on using the repository browser.