source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 2785

Last change on this file since 2785 was 2785, checked in by sjboddie, 23 years ago

The build process now creates a summary of how many files were included,
which were rejected, etc. A link to a page containing this summary is
provided from the final page of the collector (once the collection is built
successfully) and from the default "about this collection" text for
collections built by the collector.

Also did a little bit of tidying in a couple of places

  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# bzip2 (.bz2)
33# zip (.zip .jar)
34# tar (.tar)
35#
36# this plugin relies on the following utilities being present
37# (if trying to process the corresponding formats)
38#
39# gunzip (for gzip)
40# bunzip (for bzip)
41# bunzip2
42# unzip (for zip)
43# tar (for tar)
44
45# ZIPPlug is currently disabled on windows as we can't expect any of the
46# above utilities to be present on that OS. We should probably provide
47# binaries with Greenstone some day.
48
49package ZIPPlug;
50
51use BasPlug;
52use plugin;
53use util;
54use Cwd;
55
56
57BEGIN {
58 @ISA = ('BasPlug');
59}
60
61sub new {
62 my ($class) = @_;
63 my $self = new BasPlug ("ZIPPlug", @_);
64
65 return bless $self, $class;
66}
67
68# this is a recursive plugin
69sub is_recursive {
70 my $self = shift (@_);
71
72 return 1;
73}
74
75# return number of files processed, undef if can't process
76# Note that $base_dir might be "" and that $file might
77# include directories
78sub read {
79 my $self = shift (@_);
80 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
81 my $outhandle = $self->{'outhandle'};
82
83 # disabled on windows
84 return undef if ($ENV{'GSDLOS'} =~ /^windows$/i);
85
86 if ($file =~ /\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$/i) {
87
88 my $filename = &util::filename_cat ($base_dir, $file);
89 if (!-e $filename) {
90 print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
91 return undef;
92 }
93
94 my ($file_only) = $file =~ /([^\\\/]*)$/;
95 my $tmpdir = &util::get_tmp_filename ();
96 &util::mk_all_dir ($tmpdir);
97
98 print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n";
99
100 # save current working directory
101 my $cwd = cwd();
102 chdir ($tmpdir) || die "Unable to change to $tmpdir";
103 &util::cp ($filename, $tmpdir);
104
105 if ($file =~ /\.bz$/i) {
106 $self->bunzip ($file_only);
107 } elsif ($file =~ /\.bz2$/i) {
108 $self->bunzip2 ($file_only);
109 } elsif ($file =~ /\.(zip|jar)$/i) {
110 $self->unzip ($file_only);
111 } elsif ($file =~ /\.tar$/i) {
112 $self->untar ($file_only);
113 } else {
114 $self->gunzip ($file_only);
115 }
116
117 chdir ($cwd) || die "Unable to change back to $cwd";
118
119 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
120 &util::rm_r ($tmpdir);
121
122 $self->{'num_archives'} ++;
123
124 return $numdocs;
125
126 } else {
127 return undef;
128 }
129}
130
131sub bunzip {
132 my $self = shift (@_);
133 my ($file) = @_;
134 if (system ("bunzip $file")!=0)
135 {
136 &util::rm ($file);
137 }
138}
139
140sub bunzip2 {
141 my $self = shift (@_);
142 my ($file) = @_;
143 if (system ("bunzip2 $file")!=0)
144 {
145 &util::rm ($file);
146 }
147}
148
149sub unzip {
150 my $self = shift (@_);
151 my ($file) = @_;
152 system ("unzip $file");
153 &util::rm ($file) if -e $file;
154}
155
156sub untar {
157 my $self = shift (@_);
158 my ($file) = @_;
159 system ("tar xf $file");
160 &util::rm ($file) if -e $file;
161}
162
163sub gunzip {
164 my $self = shift (@_);
165 my ($file) = @_;
166 if (system ("gunzip $file")!=0)
167 {
168 &util::rm ($file);
169 };
170}
171
1721;
Note: See TracBrowser for help on using the repository browser.