########################################################################### # # ZIPPlug.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # plugin which handles compressed and/or archived input formats # # currently handled formats and file extensions are: # # gzip (.gz, .z, .tgz, .taz) # bzip (.bz) # bzip2 (.bz2) # zip (.zip .jar) # tar (.tar) # # this plugin relies on the following utilities being present # (if trying to process the corresponding formats) # # gunzip (for gzip) # bunzip (for bzip) # bunzip2 # unzip (for zip) # tar (for tar) package ZIPPlug; use BasPlug; use plugin; use util; use Cwd; use strict; no strict 'refs'; # allow filehandles to be variables and viceversa BEGIN { @ZIPPlug::ISA = ('BasPlug'); } my $arguments = [ { 'name' => "process_exp", 'desc' => "{BasPlug.process_exp}", 'type' => "string", 'deft' => &get_default_process_exp(), 'reqd' => "no" } ]; my $options = { 'name' => "ZIPPlug", 'desc' => "{ZIPPlug.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); return bless $self, $class; } # this is a recursive plugin sub is_recursive { my $self = shift (@_); return 1; } sub get_default_process_exp { return q^(?i)\.(gz|tgz|z|taz|bz|bz2|zip|jar|tar)$^; } # return number of files processed, undef if can't process # Note that $base_dir might be "" and that $file might # include directories sub read { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; my $outhandle = $self->{'outhandle'}; # check process_exp, block_exp, associate_ext etc my ($block_status,$filename) = $self->read_block(@_); return $block_status if ((!defined $block_status) || ($block_status==0)); my ($file_only) = $file =~ /([^\\\/]*)$/; my $tmpdir = &util::get_tmp_filename (); &util::mk_all_dir ($tmpdir); print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n" if $self->{'verbosity'} > 1; # save current working directory my $cwd = cwd(); chdir ($tmpdir) || die "Unable to change to $tmpdir"; &util::cp ($filename, $tmpdir); if ($file =~ /\.bz$/i) { $self->bunzip ($file_only); } elsif ($file =~ /\.bz2$/i) { $self->bunzip2 ($file_only); } elsif ($file =~ /\.(zip|jar)$/i) { $self->unzip ($file_only); } elsif ($file =~ /\.tar$/i) { $self->untar ($file_only); } else { $self->gunzip ($file_only); } chdir ($cwd) || die "Unable to change back to $cwd"; my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs, $total_count, $gli); &util::rm_r ($tmpdir); $self->{'num_archives'} ++; return $numdocs; } sub bunzip { my $self = shift (@_); my ($file) = @_; if (system ("bunzip $file")!=0) { &util::rm ($file); } } sub bunzip2 { my $self = shift (@_); my ($file) = @_; if (system ("bunzip2 $file")!=0) { &util::rm ($file); } } sub unzip { my $self = shift (@_); my ($file) = @_; system ("unzip $file"); &util::rm ($file) if -e $file; } sub untar { my $self = shift (@_); my ($file) = @_; system ("tar xf $file"); &util::rm ($file) if -e $file; } sub gunzip { my $self = shift (@_); my ($file) = @_; if (system ("gunzip $file")!=0) { &util::rm ($file); }; } 1;