source: gsdl/trunk/perllib/plugins/ZIPPlugin.pm@ 15872

Last change on this file since 15872 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 4.6 KB
Line 
1###########################################################################
2#
3# ZIPPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45
46package ZIPPlugin;
47
48use AbstractPlugin;
49use plugin;
50use util;
51use Cwd;
52
53use strict;
54no strict 'refs'; # allow filehandles to be variables and viceversa
55
56BEGIN {
57 @ZIPPlugin::ISA = ('AbstractPlugin');
58}
59
60my $arguments =
61 [ { 'name' => "process_exp",
62 'desc' => "{BasePlugin.process_exp}",
63 'type' => "string",
64 'deft' => &get_default_process_exp(),
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "ZIPPlugin",
68 'desc' => "{ZIPPlugin.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73sub new {
74
75 my ($class) = shift (@_);
76 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77 push(@$pluginlist, $class);
78
79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80 push(@{$hashArgOptLists->{"OptList"}},$options);
81
82 my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists);
83
84 return bless $self, $class;
85}
86
87# this is a recursive plugin
88sub is_recursive {
89 my $self = shift (@_);
90
91 return 1;
92}
93
94sub get_default_process_exp {
95 return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^;
96}
97
98# return number of files processed, undef if can't process
99# Note that $base_dir might be "" and that $file might
100# include directories
101sub read {
102 my $self = shift (@_);
103 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
104 my $outhandle = $self->{'outhandle'};
105
106 # check process_exp, block_exp, associate_ext etc
107 my ($block_status,$filename) = $self->read_block(@_);
108 return $block_status if ((!defined $block_status) || ($block_status==0));
109
110 my ($file_only) = $file =~ /([^\\\/]*)$/;
111 my $tmpdir = &util::get_tmp_filename ();
112 &util::mk_all_dir ($tmpdir);
113
114 print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n"
115 if $self->{'verbosity'} > 1;
116
117 # save current working directory
118 my $cwd = cwd();
119 chdir ($tmpdir) || die "Unable to change to $tmpdir";
120 &util::cp ($filename, $tmpdir);
121
122 if ($file =~ /\.bz$/i) {
123 $self->bunzip ($file_only);
124 } elsif ($file =~ /\.bz2$/i) {
125 $self->bunzip2 ($file_only);
126 } elsif ($file =~ /\.(zip|jar)$/i) {
127 $self->unzip ($file_only);
128 } elsif ($file =~ /\.tar$/i) {
129 $self->untar ($file_only);
130 } else {
131 $self->gunzip ($file_only);
132 }
133
134 chdir ($cwd) || die "Unable to change back to $cwd";
135
136 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli);
137 &util::rm_r ($tmpdir);
138
139 $self->{'num_archives'} ++;
140
141 return $numdocs;
142
143}
144
145sub bunzip {
146 my $self = shift (@_);
147 my ($file) = @_;
148
149 if (system ("bunzip $file")!=0)
150 {
151 &util::rm ($file);
152 }
153}
154
155sub bunzip2 {
156 my $self = shift (@_);
157 my ($file) = @_;
158
159 if (system ("bunzip2 $file")!=0)
160 {
161 &util::rm ($file);
162 }
163}
164
165sub unzip {
166 my $self = shift (@_);
167 my ($file) = @_;
168
169 system ("unzip $file");
170 &util::rm ($file) if -e $file;
171}
172
173sub untar {
174 my $self = shift (@_);
175 my ($file) = @_;
176
177 system ("tar xf $file");
178 &util::rm ($file) if -e $file;
179}
180
181sub gunzip {
182 my $self = shift (@_);
183 my ($file) = @_;
184
185 if (system ("gunzip $file")!=0)
186 {
187 &util::rm ($file);
188 };
189}
190
1911;
Note: See TracBrowser for help on using the repository browser.