Ignore:
Timestamp:
2008-06-05T09:29:32+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/WordPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # WordPlug.pm -- plugin for importing Microsoft Word documents
     3# WordPlugin.pm -- plugin for importing Microsoft Word documents
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2525# 12/05/02 Added usage datastructure - John Thompson
    2626
    27 package WordPlug;
    28 
    29 use ConvertToPlug;
     27package WordPlugin;
     28
     29use ConvertBinaryFile;
    3030use strict;
    3131no strict 'refs'; # allow filehandles to be variables and viceversa
    3232
    3333sub BEGIN {
    34     @WordPlug::ISA = ('ConvertToPlug');
     34    @WordPlugin::ISA = ('ConvertBinaryFile');
    3535}
    3636
    3737my $arguments =
    3838    [ { 'name' => "process_exp",
    39     'desc' => "{BasPlug.process_exp}",
     39    'desc' => "{BasePlugin.process_exp}",
    4040    'type' => "regexp",
    4141    'deft' => &get_default_process_exp(),
    4242    'reqd' => "no" },
    4343      { 'name' => "description_tags",
    44     'desc' => "{HTMLPlug.description_tags}",
     44    'desc' => "{HTMLPlugin.description_tags}",
    4545    'type' => "flag" }
    4646      ];
    4747
    48 my $options = { 'name'     => "WordPlug",
    49         'desc'     => "{WordPlug.desc}",
     48my $options = { 'name'     => "WordPlugin",
     49        'desc'     => "{WordPlugin.desc}",
    5050        'abstract' => "no",
    5151        'inherits' => "yes",
     
    6060    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    6161    my $ws_arg = [ { 'name' => "windows_scripting",
    62              'desc' => "{WordPlug.windows_scripting}",
     62             'desc' => "{WordPlugin.windows_scripting}",
    6363             'type' => "flag",
    6464                 'reqd' => "no" },
     
    6767             'deft' => "Title" },
    6868               { 'name' => "level1_header",
    69              'desc' => "{StructuredHTMLPlug.level1_header}",
     69             'desc' => "{StructuredHTMLPlugin.level1_header}",
    7070             'type' => "regexp",
    7171             'reqd' => "no",
    7272             'deft' => "" },
    7373               { 'name' => "level2_header",
    74              'desc' => "{StructuredHTMLPlug.level2_header}",
     74             'desc' => "{StructuredHTMLPlugin.level2_header}",
    7575             'type' => "regexp",
    7676             'reqd' => "no",
    7777             'deft' => "" },
    7878               { 'name' => "level3_header",
    79              'desc' => "{StructuredHTMLPlug.level3_header}",
     79             'desc' => "{StructuredHTMLPlugin.level3_header}",
    8080             'type' => "regexp",
    8181             'reqd' => "no",
    8282             'deft' => "" },
    8383               { 'name' => "title_header",
    84              'desc' => "{StructuredHTMLPlug.title_header}",
     84             'desc' => "{StructuredHTMLPlugin.title_header}",
    8585             'type' => "regexp",
    8686             'reqd' => "no",
    8787             'deft' => "" },
    8888               { 'name' => "delete_toc",
    89              'desc' => "{StructuredHTMLPlug.delete_toc}",
     89             'desc' => "{StructuredHTMLPlugin.delete_toc}",
    9090             'type' => "flag",
    9191             'reqd' => "no",
     
    9494             'modegli' => "3"},
    9595               { 'name' => "toc_header",
    96              'desc' => "{StructuredHTMLPlug.toc_header}",
     96             'desc' => "{StructuredHTMLPlugin.toc_header}",
    9797             'type' => "regexp",
    9898             'reqd' => "no",
     
    103103    }
    104104
    105     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    106     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    107 
    108     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     105    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     106    push(@{$hashArgOptLists->{"OptList"}},$options);
     107
     108    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    109109
    110110    if ($self->{'info_only'}) {
     
    113113    }
    114114
    115     #this is passed through to gsConvert.pl by ConvertToPlug.pm
     115    $self->{'filename_extension'} = "doc";
     116    $self->{'file_type'} = "Word";
     117
     118    #this is passed through to gsConvert.pl by ConvertBinaryFile.pm
    116119    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
    117120
     
    123126    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    124127    if (defined $self->{'windows_scripting'}) {
    125     if (!defined $secondary_plugin_options->{'StructuredHTMLPlug'}){
    126         $secondary_plugin_options->{'StructuredHTMLPlug'} = [];
    127         my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
     128    if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){
     129        $secondary_plugin_options->{'StructuredHTMLPlugin'} = [];
     130        my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};
    128131       
    129         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     132        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    130133        # to extract these metadata fields from the HEAD META fields
    131134        push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     
    142145    }
    143146    }
    144     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
    145     $secondary_plugin_options->{'HTMLPlug'} = [];
    146     }
    147     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
    148     $secondary_plugin_options->{'TEXTPlug'} = [];
    149     }
    150 
    151     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    152     my $text_options = $secondary_plugin_options->{'TextPlug'};
    153     my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};   
    154     # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this
     147    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
     148    $secondary_plugin_options->{'HTMLPlugin'} = [];
     149    }
     150    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
     151    $secondary_plugin_options->{'TextPlugin'} = [];
     152    }
     153
     154    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
     155    my $text_options = $secondary_plugin_options->{'TextPlugin'};
     156    my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};   
     157    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this
    155158    push(@$html_options,"-input_encoding", "utf8");
    156159    push(@$html_options,"-extract_language") if $self->{'extract_language'};
    157160    push(@$html_options, "-description_tags") if $self->{'description_tags'};
    158161
    159     # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     162    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    160163    # to extract these metadata fields from the HEAD META fields
    161164    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     
    181184}
    182185
    183 sub convert_post_process
     186sub convert_post_process_old
    184187{
    185188    my $self = shift (@_);
     
    199202    # Write it out again!
    200203    #$self->utf8_write_file (\$text, $conv_filename);
    201 }
    202 
    203 sub get_file_type {
    204     my $self = shift (@_);
    205     my $file_type = "Word";
    206     return $file_type;
    207204}
    208205
     
    230227}
    231228
    232 # do plugin specific processing of doc_obj for HTML type
    233 sub process {
    234     my $self = shift (@_);
    235     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    236 
    237     return $self->process_type("doc", $base_dir, $file, $doc_obj);
    238 }
    239229
    2402301;
Note: See TracChangeset for help on using the changeset viewer.