Ignore:
Timestamp:
2005-07-25T14:16:44+12:00 (19 years ago)
Author:
chi
Message:

A modification to allow a secondary plugin setting through ConvertToPlug

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10254 r10279  
    2929
    3030use ConvertToPlug;
     31require StructuredHTMLPlug;
    3132use strict;
    3233no strict 'refs'; # allow filehandles to be variables and viceversa
    3334
     35
    3436sub BEGIN {
    35     @WordPlug::ISA = ('ConvertToPlug');
     37    @WordPlug::ISA = ('ConvertToPlug','StructuredHTMLPlug');
    3638}
    3739
     
    4143    'type' => "regexp",
    4244    'deft' => &get_default_process_exp(),
    43     'reqd' => "no" } ];
     45    'reqd' => "no" },
     46      { 'name' => "title_header",
     47    'desc' => "{WordPlug.title_header}",
     48    'type' => "string" },
     49      { 'name' => "check_toc",
     50    'desc' => "{WordPlug.check_toc}",
     51    'type' => "flag" },
     52      { 'name' => "toc_header",
     53    'desc' => "{WordPlug.toc_header}",
     54    'type' => "string" },
     55      { 'name' => "tof_header",
     56    'desc' => "{WordPlug.tof_header}",
     57    'type' => "string" },
     58      { 'name' => "level1_header",
     59    'desc' => "{WordPlug.level1_header}",
     60    'type' => "string" },
     61      { 'name' => "level2_header",
     62    'desc' => "{WordPlug.level2_header}",
     63    'type' => "string" },     
     64      { 'name' => "level3_header",
     65    'desc' => "{WordPlug.level3_header}",
     66    'type' => "string" }];
    4467
    4568my $options = { 'name'     => "WordPlug",
     
    5477    push(@$pluginlist, $class);
    5578
     79    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     80    my $ws_arg = { 'name' => "windows_scripting",
     81               'desc' => "{WordPlug.windows_scripting}",
     82               'type' => "flag" };
     83    push(@$arguments,$ws_arg);
     84    }
     85
    5686    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    5787    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     
    5989    my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
    6090
    61      # wvWare will always produce html files encoded as utf-8
     91    #this is passed through to gsConvert.pl by ConvertToPlug.pm
     92    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
     93   
     94    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     95   
     96    if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
     97    $secondary_plugin_options->{'HTMLPlug'} = [];
     98    }
     99    if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
     100    $secondary_plugin_options->{'TEXTPlug'} = [];
     101    }
     102
     103    my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     104    my $text_options = $secondary_plugin_options->{'TextPlug'};
     105   
     106    # wvWare will always produce html files encoded as utf-8
    62107    if ($self->{'input_encoding'} eq "auto") {
    63108    $self->{'input_encoding'} = "utf8";
    64109    $self->{'extract_language'} = 1;
     110    push(@$html_options,"-input_encoding", "utf8");
     111    push(@$html_options,"-extract_language");
     112   
     113    # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     114    # to extract these metadata fields from the HEAD META fields
     115    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     116    #push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    65117    }
     118   
     119    $self = bless $self, $class;
     120   
     121    $self->load_secondary_plugins($class,$secondary_plugin_options);
    66122
    67     return bless $self, $class;
     123    #return bless $self, $class;
     124    return bless $self;
    68125}
    69126
     
    74131}
    75132
     133sub convert_post_process
     134{
     135    my $self = shift (@_);
     136    my ($conv_filename) = @_;
     137
     138    my $outhandle=$self->{'outhandle'};
     139
     140    my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
     141
     142    # read in file ($text will be in utf8)
     143    my $text = "";
     144    $self->read_file ($conv_filename, $encoding, $language, \$text);
     145
     146    # turn any high bytes that aren't valid utf-8 into utf-8.
     147    unicode::ensure_utf8(\$text);
     148
     149    # Write it out again!
     150}
     151
     152sub get_file_type {
     153    my $self = shift (@_);
     154    my $file_type = "Word";
     155    return $file_type;
     156}
     157
     158# Modified to cache HTML files for efficieny reasons rather
     159# than delete all.  HTML is modified not to use IE's VML.
     160# VML uses WML files, so these can be deleted.
     161sub cleanup_tmp_area {
     162    my ($self) = @_;
     163    if (defined $self->{'files_dir'}) {
     164    my $html_files_dir = $self->{'files_dir'};
     165
     166    if (opendir(DIN,$html_files_dir)) {
     167        my @wmz_files = grep( /\.wmz$/, readdir(DIN));
     168        foreach my $f (@wmz_files) {
     169        my $full_f = &util::filename_cat($html_files_dir,$f);
     170        &util::rm($full_f);
     171        }
     172        closedir(DIN);
     173    }
     174    else {
     175        # if HTML file has no supporting images, then no _files dir made
     176        # => do nothing
     177    }
     178    }
     179}
     180
    76181# do plugin specific processing of doc_obj for HTML type
    77182sub process {
    78183    my $self = shift (@_);
    79     #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    80 
    81     my $outhandle = $self->{'outhandle'};
    82     print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
    83     if $self->{'verbosity'} > 1;
    84     print STDERR "<Processing n='$_[3]' p='WordPlug'>\n" if ($_[6]);
    85 
    86     return ConvertToPlug::process_type($self,"doc",@_);
     184    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     185   
     186    return $self->process_type("doc", $base_dir, $file, $doc_obj);
    87187}
    88188
    891891;
     190
Note: See TracChangeset for help on using the changeset viewer.