########################################################################### # # OpenOfficePlugin.pm -- for processing standalone images # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package OpenOfficePlugin; use ConvertBinaryFile; use OpenOfficeConverter; use strict; no strict 'refs'; # allow filehandles to be variables and viceversa no strict 'subs'; use gsprintf 'gsprintf'; sub BEGIN { @OpenOfficePlugin::ISA = ('ConvertBinaryFile', 'OpenOfficeConverter'); } my $word_pe = "doc|dot|docx|odt|wpd"; my $rtf_pe = "rtf"; my $ppt_pe = "ppt|pptx|odp"; my $xls_pe = "xls|xlsx|ods"; my $arguments = [ { 'name' => "process_exp", 'desc' => "{BasePlugin.process_exp}", 'type' => "regexp", 'deft' => &get_default_process_exp(), 'reqd' => "no" }, ]; # This is not quite fully functional, eg doesn't do proper PowerPoint processing. So I have made it hidden in gli for now. my $options = { 'name' => "OpenOfficePlugin", 'desc' => "{OpenOfficePlugin.desc}", 'abstract' => "no", 'inherits' => "yes", 'hiddengli' => "yes", 'args' => $arguments }; sub new { my ($class) = shift (@_); my ($pluginlist,$inputargs,$hashArgOptLists) = @_; push(@$pluginlist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $ooc_self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists); my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); my $self = BasePlugin::merge_inheritance($ooc_self, $cbf_self); if ($self->{'info_only'}) { # don't worry about any options etc return bless $self, $class; } if ($OpenOfficeConverter::openoffice_conversion_available) { $self->{'openoffice_ext_working'} = 1; } else { $self->{'openoffice_ext_working'} = 0; } $self->{'convert_to'} = "structuredhtml"; $self = bless $self, $class; # set convert_to_plugin and convert_to_ext $self->set_standard_convert_settings(); # set up appropriate secondary plugin options here!!! my $secondary_plugin_name = $self->{'convert_to_plugin'}; my $secondary_plugin_options = $self->{'secondary_plugin_options'}; if (!defined $secondary_plugin_options->{$secondary_plugin_name}) { $secondary_plugin_options->{$secondary_plugin_name} = []; } my $specific_options = $secondary_plugin_options->{$secondary_plugin_name}; $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists); return $self; } sub init { my $self = shift (@_); my ($verbosity, $outhandle, $failhandle) = @_; $self->SUPER::init(@_); $self->OpenOfficeConverter::init(); } sub begin { my $self = shift (@_); my ($pluginfo, $base_dir, $processor, $maxdocs) = @_; $self->SUPER::begin(@_); $self->OpenOfficeConverter::begin(@_); } sub get_default_process_exp { my $self = shift (@_); return "(?i)\.($word_pe|$ppt_pe|$rtf_pe|$xls_pe)\$"; } sub read_into_doc_obj { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; if (!$self->{'openoffice_ext_working'}) { # use BasePlugin version to set up the doc obj, then we will do a little bit more in process return $self->BasePlugin::read_into_doc_obj(@_); } # use convertbinaryfile version, will call tmp_area_convert_file and use # secondary plugins return $self->ConvertBinaryFile::read_into_doc_obj(@_); } # override this to use our own convert method sub tmp_area_convert_file { my $self = shift (@_); my ($output_ext, $input_filename, $textref) = @_; my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext); if ($result != 0) { return $new_filename; } my $outhandle=$self->{'outhandle'}; print $outhandle "Open Office Conversion error\n"; print $outhandle $result_str; return ""; } # first return value is used in _iconxx_ to give a srcicon, # second return value is used for FileFormat metadata sub get_file_type_from_extension { my $self = shift (@_); my ($file) = @_; #check against the various bit of process_exp if ($file =~ /$word_pe/) { return ("doc", "Word"); } if ($file =~ /$ppt_pe/) { return ("ppt", "PPT"); } if ($file =~ /$xls_pe/) { return ("xls", "Excel"); } if ($file =~ /$rtf_pe/) { return ("rtf", "RTF"); } return ("unknown", "Unknown"); } sub process { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; my $cursection = $doc_obj->get_top_section(); # store original file as associated file my $filename = &util::filename_cat($base_dir, $file); my $assocfilename = $doc_obj->get_assocfile_from_sourcefile(); $doc_obj->associate_file($filename, $assocfilename, undef, $cursection); my ($ext, $format) = $self->get_file_type_from_extension($file); # overwrite the one set by secondary plugin $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $format); $doc_obj->add_utf8_metadata ($cursection, "srclink_file", "[SourceFile]"); $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$ext."_"); # if oo conversion not available, we have no text, so add some if (!$self->{'openoffice_ext_working'}) { if ($gli) { &gsprintf(STDERR, ""); } print STDERR "OpenOfficePlugin: no conversion available, just adding $file as is\n"; #we have no text - adds dummy text and NoText metadata $self->add_dummy_text($doc_obj, $doc_obj->get_top_section()); } } sub process_old { my $self = shift (@_); my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; # old from here # need to check that not empty my $doc_ext = $self->{'filename_extension'}; my $file_type = "unknown"; $file_type = $self->{'file_type'} if defined $self->{'file_type'}; # associate original file with doc object my $cursection = $doc_obj->get_top_section(); my $filename = &util::filename_cat($base_dir, $file); my $assocfilename = "doc.$doc_ext"; if ($self->{'keep_original_filename'} == 1) { # this should be the same filename that was used for the Source and SourceFile metadata, # as we will use [SourceFile] in the srclink $assocfilename = $doc_obj->get_assocfile_from_sourcefile(); } $doc_obj->associate_file($filename, $assocfilename, undef, $cursection); # We use set instead of add here because we only want one value $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $file_type); my $srclink_filename = "doc.$doc_ext"; #my $doclink = ""; if ($self->{'keep_original_filename'} == 1) { $srclink_filename = "[SourceFile]"; #$doclink = ""; } #$doc_obj->add_utf8_metadata ($cursection, "srclink", $doclink); $doc_obj->add_utf8_metadata ($cursection, "srcicon", "_icon".$doc_ext."_"); #$doc_obj->add_utf8_metadata ($cursection, "/srclink", ""); $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename); return 1; }