source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 3540

Last change on this file since 3540 was 3540, checked in by kjdon, 22 years ago

added John T's changes into CVS - added info to enable retrieval of usage info in xml

  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45# 12/05/02 Added usage datastructure - John Thompson
46
47package ZIPPlug;
48
49use BasPlug;
50use plugin;
51use util;
52use Cwd;
53
54
55BEGIN {
56 @ISA = ('BasPlug');
57}
58
59my $options = { 'name' => "ZIPPlug",
60 'desc' => "Plugin which handles compressed and/or archived input formats currently handled formats and file extensions are:\ngzip (.gz, .z, .tgz, .taz)\nbzip (.bz)\nbzip2 (.bz2)\nzip (.zip .jar)\ntar (.tar)\n\nThis plugin relies on the following utilities being present (if trying to process the corresponding formats):\ngunzip (for gzip)\nbunzip (for bzip)\nbunzip2 \nunzip (for zip)\ntar (for tar)",
61 'inherits' => "yes" };
62
63sub new {
64 my ($class) = @_;
65 my $self = new BasPlug ("ZIPPlug", @_);
66
67 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
68 my $option_list = $self->{'option_list'};
69 push( @{$option_list}, $options );
70
71 return bless $self, $class;
72}
73
74# this is a recursive plugin
75sub is_recursive {
76 my $self = shift (@_);
77
78 return 1;
79}
80
81# return number of files processed, undef if can't process
82# Note that $base_dir might be "" and that $file might
83# include directories
84sub read {
85 my $self = shift (@_);
86 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
87 my $outhandle = $self->{'outhandle'};
88
89 if ($file =~ /\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$/i) {
90
91 my $filename = $file;
92 $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
93 if (!-e $filename) {
94 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
95 return undef;
96 }
97
98 my ($file_only) = $file =~ /([^\\\/]*)$/;
99 my $tmpdir = &util::get_tmp_filename ();
100 &util::mk_all_dir ($tmpdir);
101
102 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n";
103
104 # save current working directory
105 my $cwd = cwd();
106 chdir ($tmpdir) || die "Unable to change to $tmpdir";
107 &util::cp ($filename, $tmpdir);
108
109 if ($file =~ /\.bz$/i) {
110 $self->bunzip ($file_only);
111 } elsif ($file =~ /\.bz2$/i) {
112 $self->bunzip2 ($file_only);
113 } elsif ($file =~ /\.(zip|jar)$/i) {
114 $self->unzip ($file_only);
115 } elsif ($file =~ /\.tar$/i) {
116 $self->untar ($file_only);
117 } else {
118 $self->gunzip ($file_only);
119 }
120
121 chdir ($cwd) || die "Unable to change back to $cwd";
122
123 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
124 &util::rm_r ($tmpdir);
125
126 $self->{'num_archives'} ++;
127
128 return $numdocs;
129
130 } else {
131 return undef;
132 }
133}
134
135sub bunzip {
136 my $self = shift (@_);
137 my ($file) = @_;
138
139 if (system ("bunzip $file")!=0)
140 {
141 &util::rm ($file);
142 }
143}
144
145sub bunzip2 {
146 my $self = shift (@_);
147 my ($file) = @_;
148
149 if (system ("bunzip2 $file")!=0)
150 {
151 &util::rm ($file);
152 }
153}
154
155sub unzip {
156 my $self = shift (@_);
157 my ($file) = @_;
158
159 system ("unzip $file");
160 &util::rm ($file) if -e $file;
161}
162
163sub untar {
164 my $self = shift (@_);
165 my ($file) = @_;
166
167 system ("tar xf $file");
168 &util::rm ($file) if -e $file;
169}
170
171sub gunzip {
172 my $self = shift (@_);
173 my ($file) = @_;
174
175 if (system ("gunzip $file")!=0)
176 {
177 &util::rm ($file);
178 };
179}
180
1811;
Note: See TracBrowser for help on using the repository browser.