source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 9706

Last change on this file since 9706 was 9229, checked in by davidb, 19 years ago

Added default_process_exp to ZIPPlug, so GLI can "see" that it likes
.tar files, (.tgz, ...)

  • Property svn:keywords set to Author Date Id Revision
File size: 5.0 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45# 12/05/02 Added usage datastructure - John Thompson
46
47package ZIPPlug;
48
49use BasPlug;
50use plugin;
51use util;
52use Cwd;
53
54
55BEGIN {
56 @ISA = ('BasPlug');
57}
58
59my $arguments =
60 [ { 'name' => "process_exp",
61 'desc' => "{BasPlug.process_exp}",
62 'type' => "string",
63 'deft' => &get_default_process_exp(),
64 'reqd' => "no" } ];
65
66my $options = { 'name' => "ZIPPlug",
67 'desc' => "{ZIPPlug.desc}",
68 'abstract' => "no",
69 'inherits' => "yes",
70 'args' => $arguments };
71
72sub new {
73 my ($class) = @_;
74 my $self = new BasPlug ("ZIPPlug", @_);
75 $self->{'plugin_type'} = "ZIPPlug";
76
77 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
78 my $option_list = $self->{'option_list'};
79 push( @{$option_list}, $options );
80
81 if (!parsargv::parse(\@_,
82 q^process_exp/.*/^, \$self->{'process_exp'},
83 "allow_extra_options")) {
84 print STDERR "\nIncorrect options passed to ZIPPlug, check your collect.cfg configuration file\n";
85 $self->print_txt_usage(""); # Use default resource bundle
86 die "\n";
87 }
88
89 # BasPlug is explicitly set not to set process_exp if recursive plugin
90 # Not sure of this reasoning. Want it to be set in ZIPPlug, so explicitly
91 # pass it in as default value
92 if (!$self->{'process_exp'}) {
93 $self->{'process_exp'} = get_default_process_exp();
94 }
95
96 return bless $self, $class;
97}
98
99# this is a recursive plugin
100sub is_recursive {
101 my $self = shift (@_);
102
103 return 1;
104}
105
106sub get_default_process_exp {
107 return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^;
108}
109
110# return number of files processed, undef if can't process
111# Note that $base_dir might be "" and that $file might
112# include directories
113sub read {
114 my $self = shift (@_);
115 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
116 my $outhandle = $self->{'outhandle'};
117
118 if ($file =~ /$self->{'process_exp'}/) {
119
120 my $filename = $file;
121 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
122 if (!-e $filename) {
123 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
124 return undef;
125 }
126
127 my ($file_only) = $file =~ /([^\\\/]*)$/;
128 my $tmpdir = &util::get_tmp_filename ();
129 &util::mk_all_dir ($tmpdir);
130
131 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n";
132
133 # save current working directory
134 my $cwd = cwd();
135 chdir ($tmpdir) || die "Unable to change to $tmpdir";
136 &util::cp ($filename, $tmpdir);
137
138 if ($file =~ /\.bz$/i) {
139 $self->bunzip ($file_only);
140 } elsif ($file =~ /\.bz2$/i) {
141 $self->bunzip2 ($file_only);
142 } elsif ($file =~ /\.(zip|jar)$/i) {
143 $self->unzip ($file_only);
144 } elsif ($file =~ /\.tar$/i) {
145 $self->untar ($file_only);
146 } else {
147 $self->gunzip ($file_only);
148 }
149
150 chdir ($cwd) || die "Unable to change back to $cwd";
151
152 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
153 &util::rm_r ($tmpdir);
154
155 $self->{'num_archives'} ++;
156
157 return $numdocs;
158
159 } else {
160 return undef;
161 }
162}
163
164sub bunzip {
165 my $self = shift (@_);
166 my ($file) = @_;
167
168 if (system ("bunzip $file")!=0)
169 {
170 &util::rm ($file);
171 }
172}
173
174sub bunzip2 {
175 my $self = shift (@_);
176 my ($file) = @_;
177
178 if (system ("bunzip2 $file")!=0)
179 {
180 &util::rm ($file);
181 }
182}
183
184sub unzip {
185 my $self = shift (@_);
186 my ($file) = @_;
187
188 system ("unzip $file");
189 &util::rm ($file) if -e $file;
190}
191
192sub untar {
193 my $self = shift (@_);
194 my ($file) = @_;
195
196 system ("tar xf $file");
197 &util::rm ($file) if -e $file;
198}
199
200sub gunzip {
201 my $self = shift (@_);
202 my ($file) = @_;
203
204 if (system ("gunzip $file")!=0)
205 {
206 &util::rm ($file);
207 };
208}
209
2101;
Note: See TracBrowser for help on using the repository browser.