source: trunk/gsdl/perllib/plugins/ZIPPlug.pm@ 1269

Last change on this file since 1269 was 1269, checked in by sjboddie, 24 years ago

Added ZIPPlug plugin for handling input documents that have been compressed
and/or archived with zip, gzip, bzip or tar. Currently uses unix utilities
but is simple enough to extend (maybe use pkunzip for zip files on windows?)

  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1###########################################################################
2#
3# ZIPPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which handles compressed and/or archived input formats
27#
28# currently handled formats and file extensions are:
29#
30# gzip (.gz, .z, .tgz, .taz)
31# bzip (.bz)
32# zip (.zip)
33# tar (.tar)
34#
35# this plugin relies on the following utilities being present
36# (if trying to process the corresponding formats)
37#
38# gunzip (for gzip)
39# bunzip (for bzip)
40# unzip (for zip)
41# tar (for tar)
42
43package ZIPPlug;
44
45use BasPlug;
46use plugin;
47use util;
48use Cwd;
49
50
51BEGIN {
52 @ISA = ('BasPlug');
53}
54
55use strict;
56
57sub new {
58 my ($class) = @_;
59 my $self = new BasPlug ("ZIPPlug", @_);
60
61 return bless $self, $class;
62}
63
64# this is a recursive plugin
65sub is_recursive {
66 my $self = shift (@_);
67
68 return 1;
69}
70
71# return number of files processed, undef if can't process
72# Note that $base_dir might be "" and that $file might
73# include directories
74sub read {
75 my $self = shift (@_);
76 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
77
78 if ($file =~ /\.(gz|tgz|z|taz|bz|zip|tar)$/i) {
79
80 my $filename = &util::filename_cat ($base_dir, $file);
81 if (!-e $filename) {
82 print STDERR "ZIPPLug: WARNING: $filename does not exist\n";
83 return undef;
84 }
85
86 my ($file_only) = $file =~ /([^\\\/]*)$/;
87 my $tmpdir = &util::get_tmp_filename ();
88 &util::mk_all_dir ($tmpdir);
89
90 print STDERR "ZIPPlug: extracting $file_only to $tmpdir\n";
91
92 # save current working directory
93 my $cwd = cwd();
94 chdir ($tmpdir) || die;
95 &util::cp ($filename, $tmpdir);
96
97 if ($file =~ /\.bz$/i) {
98 $self->bunzip ($file_only);
99 } elsif ($file =~ /\.zip$/i) {
100 $self->unzip ($file_only);
101 } elsif ($file =~ /\.tar$/i) {
102 $self->untar ($file_only);
103 } else {
104 $self->gunzip ($file_only);
105 }
106
107 chdir ($cwd) || die;
108
109 my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
110 &util::rm_r ($tmpdir);
111 return $numdocs;
112
113 } else {
114 return undef;
115 }
116}
117
118sub bunzip {
119 my $self = shift (@_);
120 my ($file) = @_;
121 system ("bunzip $file");
122}
123
124sub unzip {
125 my $self = shift (@_);
126 my ($file) = @_;
127 system ("unzip $file");
128 &util::rm ($file) if -e $file;
129}
130
131sub untar {
132 my $self = shift (@_);
133 my ($file) = @_;
134 system ("tar xf $file");
135 &util::rm ($file) if -e $file;
136}
137
138sub gunzip {
139 my $self = shift (@_);
140 my ($file) = @_;
141 system ("gunzip $file");
142}
143
1441;
Note: See TracBrowser for help on using the repository browser.