Changeset 22428 for main/trunk


Ignore:
Timestamp:
2010-07-19T13:27:02+12:00 (14 years ago)
Author:
davidb
Message:

Restructuring of WordPlugin to dynamically inherit from ConvertBinaryFile or OpenOfficeConverter (if it can be detected). OpenOfficeConverter itself inherits from ConvertBinaryFile.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/WordPlugin.pm

    r21801 r22428  
    2626
    2727use ConvertBinaryFile;
     28
     29
    2830use strict;
    2931no strict 'refs'; # allow filehandles to be variables and viceversa
    3032
    31 sub BEGIN {
    32     @WordPlugin::ISA = ('ConvertBinaryFile');
    33 }
     33
     34# @ISA dynamically configured to be either OpenOfficeConverter or ConvertBinaryFile
    3435
    3536my $arguments =
     
    4445      ];
    4546
     47
     48my $opt_windows_args = [ { 'name' => "windows_scripting",
     49               'desc' => "{WordPlugin.windows_scripting}",
     50               'type' => "flag",
     51               'reqd' => "no" } ];
     52
     53my $opt_office_args = [ { 'name' => "metadata_fields",
     54              'desc' => "{WordPlugin.metadata_fields}",
     55              'type' => "string",
     56              'deft' => "Title" },
     57            { 'name' => "level1_header",
     58              'desc' => "{StructuredHTMLPlugin.level1_header}",
     59              'type' => "regexp",
     60              'reqd' => "no",
     61              'deft' => "" },
     62            { 'name' => "level2_header",
     63              'desc' => "{StructuredHTMLPlugin.level2_header}",
     64              'type' => "regexp",
     65              'reqd' => "no",
     66              'deft' => "" },
     67            { 'name' => "level3_header",
     68              'desc' => "{StructuredHTMLPlugin.level3_header}",
     69              'type' => "regexp",
     70              'reqd' => "no",
     71              'deft' => "" },
     72            { 'name' => "title_header",
     73              'desc' => "{StructuredHTMLPlugin.title_header}",
     74              'type' => "regexp",
     75              'reqd' => "no",
     76              'deft' => "" },
     77            { 'name' => "delete_toc",
     78              'desc' => "{StructuredHTMLPlugin.delete_toc}",
     79              'type' => "flag",
     80              'reqd' => "no" },
     81            { 'name' => "toc_header",
     82              'desc' => "{StructuredHTMLPlugin.toc_header}",
     83              'type' => "regexp",
     84              'reqd' => "no",
     85              'deft' => "" } ];
     86
     87
    4688my $options = { 'name'     => "WordPlugin",
    4789        'desc'     => "{WordPlugin.desc}",
     
    5698    push(@$pluginlist, $class);
    5799
     100    my $openoffice_ext_installed;
     101    eval("require OpenOfficeConverter");
     102    if ($@) {
     103    # Useful debugging statement if there is a syntax error in OpenOfficeConverter 
     104    print STDERR "$@\n";
     105
     106    push(@WordPlugin::ISA,"ConvertBinaryFile");
     107    $openoffice_ext_installed = 0;
     108    }
     109    else {
     110    # Successfully found
     111    print STDERR "WordPlugin: OpenOffice Extension to Greenstone detected\n";
     112    push(@WordPlugin::ISA,"OpenOfficeConverter");
     113    $openoffice_ext_installed = 1;
     114    }
     115
    58116    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    59     my $ws_arg = [ { 'name' => "windows_scripting",
    60              'desc' => "{WordPlugin.windows_scripting}",
    61              'type' => "flag",
    62                  'reqd' => "no" },
    63                { 'name' => "metadata_fields",
    64              'desc' => "{WordPlugin.metadata_fields}",
    65              'type' => "string",
    66              'deft' => "Title" },
    67                { 'name' => "metadata_field_separator",
    68              'desc' => "{HTMLPlugin.metadata_field_separator}",
    69              'type' => "string",
    70              'deft' => "" },
    71                { 'name' => "level1_header",
    72              'desc' => "{StructuredHTMLPlugin.level1_header}",
    73              'type' => "regexp",
    74              'reqd' => "no",
    75              'deft' => "" },
    76                { 'name' => "level2_header",
    77              'desc' => "{StructuredHTMLPlugin.level2_header}",
    78              'type' => "regexp",
    79              'reqd' => "no",
    80              'deft' => "" },
    81                { 'name' => "level3_header",
    82              'desc' => "{StructuredHTMLPlugin.level3_header}",
    83              'type' => "regexp",
    84              'reqd' => "no",
    85              'deft' => "" },
    86                { 'name' => "title_header",
    87              'desc' => "{StructuredHTMLPlugin.title_header}",
    88              'type' => "regexp",
    89              'reqd' => "no",
    90              'deft' => "" },
    91                { 'name' => "delete_toc",
    92              'desc' => "{StructuredHTMLPlugin.delete_toc}",
    93              'type' => "flag",
    94              'reqd' => "no" },
    95                { 'name' => "toc_header",
    96              'desc' => "{StructuredHTMLPlugin.toc_header}",
    97              'type' => "regexp",
    98              'reqd' => "no",
    99              'deft' => "" }
    100                ];
    101    
    102     push(@$arguments,@$ws_arg);
     117    push(@$arguments,@$opt_windows_args);
     118    push(@$arguments,@$opt_office_args);
     119    }
     120    elsif ($openoffice_ext_installed) {
     121    push(@$arguments,@$opt_office_args);
    103122    }
    104123
     
    106125    push(@{$hashArgOptLists->{"OptList"}},$options);
    107126
    108     my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     127    my $self = {};
     128    my $outhandle;
     129
     130    if ($openoffice_ext_installed) {
     131    $self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists);
     132    $outhandle = $self->{'outhandle'};
     133
     134    if ($self->{'openoffice_conversion_available'}) {
     135        print $outhandle "WordPlugin: OpenOffice scripting functionality available\n";
     136
     137        # Override default process expression
     138        $self->{'process_exp'} = q^(?i)\.(doc|dot|docx|odt)$^;
     139
     140        if ($self->{'openoffice_scripting'}) {
     141        print $outhandle "WordPlugin: Activating OpenOffice scripting functionality\n";
     142        }
     143    }
     144    else {
     145        print $outhandle "WordPlugin: Unable to run 'soffice'\n";
     146        print $outhandle "WordPlugin: Defaulting to ConvertBinaryFile inheritence\n";
     147    }
     148    }
     149    else {
     150    $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     151    $outhandle = $self->{'outhandle'};
     152    }
    109153
    110154    if ($self->{'info_only'}) {
     
    116160    $self->{'file_type'} = "Word";
    117161
    118     #this is passed through to gsConvert.pl by ConvertBinaryFile.pm
    119     $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
     162    if ($self->{'windows_scripting'}) {
     163    $self->{'convert_options'} = "-windows_scripting";
     164    $self->{'office_scripting'} = 1;
     165    }   
     166    if ($self->{'openoffice_scripting'}) {
     167    if ($self->{'windows_scripting'}) {
     168        print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_scripting\n";
     169        print $outhandle "         on at the same time.  Defaulting to -windows_scripting\n";
     170    }
     171    else {
     172        $self->{'convert_options'} = "-openoffice_scripting";
     173        $self->{'office_scripting'} = 1;
     174    }
     175    }
    120176
    121177    # we always save as utf-8
     
    125181
    126182    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    127     if (defined $self->{'windows_scripting'}) {
     183    if (defined $self->{'office_scripting'}) {
    128184    if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){
    129185        $secondary_plugin_options->{'StructuredHTMLPlugin'} = [];
     
    134190        push (@$structhtml_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>");
    135191        push (@$structhtml_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    136         push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'};
     192        push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'}; # What about openoffice???
    137193        push (@$structhtml_options, "-extract_language") if $self->{'extract_language'};
    138194        push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'};
     
    187243}
    188244
     245sub init {
     246    my $self = shift (@_);
     247    my ($verbosity, $outhandle, $failhandle) = @_;
     248   
     249    $self->SUPER::init($verbosity,$outhandle,$failhandle);
     250}
     251
     252sub deinit {
     253    # called only once, after all plugin passes have been done
     254    my ($self) = @_;
     255   
     256    $self->SUPER::deinit();
     257}
     258
    189259sub get_default_process_exp {
    190260    my $self = shift (@_);
Note: See TracChangeset for help on using the changeset viewer.