########################################################################### # # ZIPPlug.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # plugin which handles compressed and/or archived input formats # # currently handled formats and file extensions are: # # gzip (.gz, .z, .tgz, .taz) # bzip (.bz) # bzip2 (.bz2) # zip (.zip .jar) # tar (.tar) # # this plugin relies on the following utilities being present # (if trying to process the corresponding formats) # # gunzip (for gzip) # bunzip (for bzip) # bunzip2 # unzip (for zip) # tar (for tar) package ZIPPlug; use BasPlug; use plugin; use util; use Cwd; BEGIN { @ISA = ('BasPlug'); } sub new { my ($class) = @_; my $self = new BasPlug ("ZIPPlug", @_); return bless $self, $class; } # this is a recursive plugin sub is_recursive { my $self = shift (@_); return 1; } # return number of files processed, undef if can't process # Note that $base_dir might be "" and that $file might # include directories sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_; my $outhandle = $self->{'outhandle'}; if ($file =~ /\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$/i) { my $filename = $file; $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/; if (!-e $filename) { print $outhandle "ZIPPLug: WARNING: $filename does not exist\n"; return undef; } my ($file_only) = $file =~ /([^\\\/]*)$/; my $tmpdir = &util::get_tmp_filename (); &util::mk_all_dir ($tmpdir); print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"; # save current working directory my $cwd = cwd(); chdir ($tmpdir) || die "Unable to change to $tmpdir"; &util::cp ($filename, $tmpdir); if ($file =~ /\.bz$/i) { $self->bunzip ($file_only); } elsif ($file =~ /\.bz2$/i) { $self->bunzip2 ($file_only); } elsif ($file =~ /\.(zip|jar)$/i) { $self->unzip ($file_only); } elsif ($file =~ /\.tar$/i) { $self->untar ($file_only); } else { $self->gunzip ($file_only); } chdir ($cwd) || die "Unable to change back to $cwd"; my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs); &util::rm_r ($tmpdir); $self->{'num_archives'} ++; return $numdocs; } else { return undef; } } sub bunzip { my $self = shift (@_); my ($file) = @_; if (system ("bunzip $file")!=0) { &util::rm ($file); } } sub bunzip2 { my $self = shift (@_); my ($file) = @_; if (system ("bunzip2 $file")!=0) { &util::rm ($file); } } sub unzip { my $self = shift (@_); my ($file) = @_; system ("unzip $file"); &util::rm ($file) if -e $file; } sub untar { my $self = shift (@_); my ($file) = @_; system ("tar xf $file"); &util::rm ($file) if -e $file; } sub gunzip { my $self = shift (@_); my ($file) = @_; if (system ("gunzip $file")!=0) { &util::rm ($file); }; } 1;