source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 9960

Last change on this file since 9960 was 9960, checked in by davidb, 19 years ago

Minor refinement made to print statments the show how the plugin code is
progressing.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45# 12/05/02 Added usage datastructure - John Thompson
46
47package ZIPPlug;
48
49use BasPlug;
50use plugin;
51use util;
52use Cwd;
53
54
55BEGIN {
56 @ISA = ('BasPlug');
57}
58
59my $arguments =
60 [ { 'name' => "process_exp",
61 'desc' => "{BasPlug.process_exp}",
62 'type' => "string",
63 'deft' => &get_default_process_exp(),
64 'reqd' => "no" } ];
65
66my $options = { 'name' => "ZIPPlug",
67 'desc' => "{ZIPPlug.desc}",
68 'abstract' => "no",
69 'inherits' => "yes",
70 'args' => $arguments };
71
72sub new {
73 my ($class) = @_;
74 my $self = new BasPlug ("ZIPPlug", @_);
75 $self->{'plugin_type'} = "ZIPPlug";
76
77 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
78 my $option_list = $self->{'option_list'};
79 push( @{$option_list}, $options );
80
81 if (!parsargv::parse(\@_,
82 q^process_exp/.*/^, \$self->{'process_exp'},
83 "allow_extra_options")) {
84 print STDERR "\nIncorrect options passed to ZIPPlug, check your collect.cfg configuration file\n";
85 $self->print_txt_usage(""); # Use default resource bundle
86 die "\n";
87 }
88
89 # BasPlug is explicitly set not to set process_exp if recursive plugin
90 # Not sure of this reasoning. Want it to be set in ZIPPlug, so explicitly
91 # pass it in as default value
92 if (!$self->{'process_exp'}) {
93 $self->{'process_exp'} = get_default_process_exp();
94 }
95
96 return bless $self, $class;
97}
98
99# this is a recursive plugin
100sub is_recursive {
101 my $self = shift (@_);
102
103 return 1;
104}
105
106sub get_default_process_exp {
107 return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^;
108}
109
110# return number of files processed, undef if can't process
111# Note that $base_dir might be "" and that $file might
112# include directories
113sub read {
114 my $self = shift (@_);
115 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
116 my $outhandle = $self->{'outhandle'};
117
118 if ($file =~ /$self->{'process_exp'}/) {
119
120 my $filename = $file;
121 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
122 if (!-e $filename) {
123 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
124 return undef;
125 }
126
127 my ($file_only) = $file =~ /([^\\\/]*)$/;
128 my $tmpdir = &util::get_tmp_filename ();
129 &util::mk_all_dir ($tmpdir);
130
131 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
132 if $self->{'verbosity'} > 1;
133
134 # save current working directory
135 my $cwd = cwd();
136 chdir ($tmpdir) || die "Unable to change to $tmpdir";
137 &util::cp ($filename, $tmpdir);
138
139 if ($file =~ /\.bz$/i) {
140 $self->bunzip ($file_only);
141 } elsif ($file =~ /\.bz2$/i) {
142 $self->bunzip2 ($file_only);
143 } elsif ($file =~ /\.(zip|jar)$/i) {
144 $self->unzip ($file_only);
145 } elsif ($file =~ /\.tar$/i) {
146 $self->untar ($file_only);
147 } else {
148 $self->gunzip ($file_only);
149 }
150
151 chdir ($cwd) || die "Unable to change back to $cwd";
152
153 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
154 &util::rm_r ($tmpdir);
155
156 $self->{'num_archives'} ++;
157
158 return $numdocs;
159
160 } else {
161 return undef;
162 }
163}
164
165sub bunzip {
166 my $self = shift (@_);
167 my ($file) = @_;
168
169 if (system ("bunzip $file")!=0)
170 {
171 &util::rm ($file);
172 }
173}
174
175sub bunzip2 {
176 my $self = shift (@_);
177 my ($file) = @_;
178
179 if (system ("bunzip2 $file")!=0)
180 {
181 &util::rm ($file);
182 }
183}
184
185sub unzip {
186 my $self = shift (@_);
187 my ($file) = @_;
188
189 system ("unzip $file");
190 &util::rm ($file) if -e $file;
191}
192
193sub untar {
194 my $self = shift (@_);
195 my ($file) = @_;
196
197 system ("tar xf $file");
198 &util::rm ($file) if -e $file;
199}
200
201sub gunzip {
202 my $self = shift (@_);
203 my ($file) = @_;
204
205 if (system ("gunzip $file")!=0)
206 {
207 &util::rm ($file);
208 };
209}
210
2111;
Note: See TracBrowser for help on using the repository browser.