source: main/trunk/greenstone2/perllib/plugins/ZIPPlugin.pm@ 22597

Last change on this file since 22597 was 16824, checked in by davidb, 16 years ago

Filenames are now quoted in system calls, so they can have spaces in them.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1###########################################################################
2#
3# ZIPPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45
46package ZIPPlugin;
47
48use BasePlugin;
49use plugin;
50use util;
51use Cwd;
52
53use strict;
54no strict 'refs'; # allow filehandles to be variables and viceversa
55
56BEGIN {
57 @ZIPPlugin::ISA = ('BasePlugin');
58}
59
60my $arguments =
61 [ { 'name' => "process_exp",
62 'desc' => "{BasePlugin.process_exp}",
63 'type' => "string",
64 'deft' => &get_default_process_exp(),
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "ZIPPlugin",
68 'desc' => "{ZIPPlugin.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74
75 my ($class) = shift (@_);
76 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77 push(@$pluginlist, $class);
78
79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80 push(@{$hashArgOptLists->{"OptList"}},$options);
81
82 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
83
84 return bless $self, $class;
85}
86
87# this is a recursive plugin
88sub is_recursive {
89 my $self = shift (@_);
90
91 return 1;
92}
93
94sub get_default_process_exp {
95 return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^;
96}
97
98# return number of files processed, undef if can't process
99# Note that $base_dir might be "" and that $file might
100# include directories
101sub read {
102 my $self = shift (@_);
103 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
104 my $outhandle = $self->{'outhandle'};
105
106 # can we process this file??
107 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
108 return undef unless $self->can_process_this_file($filename_full_path);
109
110 my $tmpdir = &util::get_tmp_filename ();
111 &util::mk_all_dir ($tmpdir);
112
113 print $outhandle "ZIPPlugin: extracting $filename_no_path to $tmpdir\n"
114 if $self->{'verbosity'} > 1;
115
116 # save current working directory
117 my $cwd = cwd();
118 chdir ($tmpdir) || die "Unable to change to $tmpdir";
119 &util::cp ($filename_full_path, $tmpdir);
120
121 if ($file =~ /\.bz$/i) {
122 $self->bunzip ($filename_no_path);
123 } elsif ($file =~ /\.bz2$/i) {
124 $self->bunzip2 ($filename_no_path);
125 } elsif ($file =~ /\.(zip|jar)$/i) {
126 $self->unzip ($filename_no_path);
127 } elsif ($file =~ /\.tar$/i) {
128 $self->untar ($filename_no_path);
129 } else {
130 $self->gunzip ($filename_no_path);
131 }
132
133 chdir ($cwd) || die "Unable to change back to $cwd";
134
135 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli);
136 &util::rm_r ($tmpdir);
137
138 $self->{'num_archives'} ++;
139
140 return $numdocs;
141
142}
143
144sub bunzip {
145 my $self = shift (@_);
146 my ($file) = @_;
147
148 if (system ("bunzip \"$file\"")!=0)
149 {
150 &util::rm ($file);
151 }
152}
153
154sub bunzip2 {
155 my $self = shift (@_);
156 my ($file) = @_;
157
158 if (system ("bunzip2 \"$file\"")!=0)
159 {
160 &util::rm ($file);
161 }
162}
163
164sub unzip {
165 my $self = shift (@_);
166 my ($file) = @_;
167
168 system ("unzip \"$file\"");
169 &util::rm ($file) if -e $file;
170}
171
172sub untar {
173 my $self = shift (@_);
174 my ($file) = @_;
175
176 system ("tar xf \"$file\"");
177 &util::rm ($file) if -e $file;
178}
179
180sub gunzip {
181 my $self = shift (@_);
182 my ($file) = @_;
183
184 if (system ("gunzip \"$file\"")!=0)
185 {
186 &util::rm ($file);
187 };
188}
189
1901;
Note: See TracBrowser for help on using the repository browser.