########################################################################### # # ZIPPlugin.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # plugin which handles compressed and/or archived input formats # # currently handled formats and file extensions are: # # gzip (.gz, .z, .tgz, .taz) # bzip (.bz) # bzip2 (.bz2) # zip (.zip .jar) # tar (.tar) # # this plugin relies on the following utilities being present # (if trying to process the corresponding formats) # # gunzip (for gzip) # bunzip (for bzip) # bunzip2 # unzip (for zip) # tar (for tar) package ZIPPlugin; use BasePlugin; use plugin; use util; use Cwd; use strict; no strict 'refs'; # allow filehandles to be variables and viceversa BEGIN { @ZIPPlugin::ISA = ('BasePlugin'); } my $arguments = [ { 'name' => "process_exp", 'desc' => "{BasePlugin.process_exp}", 'type' => "string", 'deft' => &get_default_process_exp(), 'reqd' => "no" } ]; my $options = { 'name' => "ZIPPlugin", 'desc' => "{ZIPPlugin.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); return bless $self, $class; } # this is a recursive plugin sub is_recursive { my $self = shift (@_); return 1; } sub get_default_process_exp { return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^; } # return number of files processed, undef if can't process # Note that $base_dir might be "" and that $file might # include directories sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; my $outhandle = $self->{'outhandle'}; # can we process this file?? my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); return undef unless $self->can_process_this_file($filename_full_path); my $tmpdir = &util::get_tmp_filename (); &util::mk_all_dir ($tmpdir); print $outhandle "ZIPPlugin: extracting $filename_no_path to $tmpdir\n" if $self->{'verbosity'} > 1; # save current working directory my $cwd = cwd(); chdir ($tmpdir) || die "Unable to change to $tmpdir"; &util::cp ($filename_full_path, $tmpdir); if ($file =~ /\.bz$/i) { $self->bunzip ($filename_no_path); } elsif ($file =~ /\.bz2$/i) { $self->bunzip2 ($filename_no_path); } elsif ($file =~ /\.(zip|jar)$/i) { $self->unzip ($filename_no_path); } elsif ($file =~ /\.tar$/i) { $self->untar ($filename_no_path); } else { $self->gunzip ($filename_no_path); } chdir ($cwd) || die "Unable to change back to $cwd"; my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli); &util::rm_r ($tmpdir); $self->{'num_archives'} ++; return $numdocs; } sub bunzip { my $self = shift (@_); my ($file) = @_; if (system ("bunzip \"$file\"")!=0) { &util::rm ($file); } } sub bunzip2 { my $self = shift (@_); my ($file) = @_; if (system ("bunzip2 \"$file\"")!=0) { &util::rm ($file); } } sub unzip { my $self = shift (@_); my ($file) = @_; system ("unzip \"$file\""); &util::rm ($file) if -e $file; } sub untar { my $self = shift (@_); my ($file) = @_; system ("tar xf \"$file\""); &util::rm ($file) if -e $file; } sub gunzip { my $self = shift (@_); my ($file) = @_; if (system ("gunzip \"$file\"")!=0) { &util::rm ($file); }; } 1;