source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:keywords set to Author Date Id Revision
File size: 4.9 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45
46package ZIPPlug;
47
48use BasPlug;
49use plugin;
50use util;
51use Cwd;
52#$%^
53use parse2;
54
55
56BEGIN {
57 @ISA = ('BasPlug');
58}
59
60my $arguments =
61 [ { 'name' => "process_exp",
62 'desc' => "{BasPlug.process_exp}",
63 'type' => "string",
64 'deft' => &get_default_process_exp(),
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "ZIPPlug",
68 'desc' => "{ZIPPlug.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74
75 my ($class) = shift (@_);
76 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77 push(@$pluginlist, $class);
78
79 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
80 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
81
82 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
83
84 # BasPlug is explicitly set not to set process_exp if recursive plugin
85 # Not sure of this reasoning. Want it to be set in ZIPPlug, so explicitly
86 # pass it in as default value
87
88 ## do we need this???? --kjdon
89 if (!$self->{'process_exp'}) {
90 $self->{'process_exp'} = get_default_process_exp();
91 }
92
93 return bless $self, $class;
94}
95
96# this is a recursive plugin
97sub is_recursive {
98 my $self = shift (@_);
99
100 return 1;
101}
102
103sub get_default_process_exp {
104 return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^;
105}
106
107# return number of files processed, undef if can't process
108# Note that $base_dir might be "" and that $file might
109# include directories
110sub read {
111 my $self = shift (@_);
112 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
113 my $outhandle = $self->{'outhandle'};
114
115 if ($file =~ /$self->{'process_exp'}/) {
116
117 my $filename = $file;
118 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
119 if (!-e $filename) {
120 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
121 return undef;
122 }
123
124 my ($file_only) = $file =~ /([^\\\/]*)$/;
125 my $tmpdir = &util::get_tmp_filename ();
126 &util::mk_all_dir ($tmpdir);
127
128 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
129 if $self->{'verbosity'} > 1;
130
131 # save current working directory
132 my $cwd = cwd();
133 chdir ($tmpdir) || die "Unable to change to $tmpdir";
134 &util::cp ($filename, $tmpdir);
135
136 if ($file =~ /\.bz$/i) {
137 $self->bunzip ($file_only);
138 } elsif ($file =~ /\.bz2$/i) {
139 $self->bunzip2 ($file_only);
140 } elsif ($file =~ /\.(zip|jar)$/i) {
141 $self->unzip ($file_only);
142 } elsif ($file =~ /\.tar$/i) {
143 $self->untar ($file_only);
144 } else {
145 $self->gunzip ($file_only);
146 }
147
148 chdir ($cwd) || die "Unable to change back to $cwd";
149
150 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
151 &util::rm_r ($tmpdir);
152
153 $self->{'num_archives'} ++;
154
155 return $numdocs;
156
157 } else {
158 return undef;
159 }
160}
161
162sub bunzip {
163 my $self = shift (@_);
164 my ($file) = @_;
165
166 if (system ("bunzip $file")!=0)
167 {
168 &util::rm ($file);
169 }
170}
171
172sub bunzip2 {
173 my $self = shift (@_);
174 my ($file) = @_;
175
176 if (system ("bunzip2 $file")!=0)
177 {
178 &util::rm ($file);
179 }
180}
181
182sub unzip {
183 my $self = shift (@_);
184 my ($file) = @_;
185
186 system ("unzip $file");
187 &util::rm ($file) if -e $file;
188}
189
190sub untar {
191 my $self = shift (@_);
192 my ($file) = @_;
193
194 system ("tar xf $file");
195 &util::rm ($file) if -e $file;
196}
197
198sub gunzip {
199 my $self = shift (@_);
200 my ($file) = @_;
201
202 if (system ("gunzip $file")!=0)
203 {
204 &util::rm ($file);
205 };
206}
207
2081;
Note: See TracBrowser for help on using the repository browser.