Changeset 10279

Show
Ignore:
Timestamp:
25.07.2005 14:16:44 (15 years ago)
Author:
chi
Message:

A modification to allow a secondary plugin setting through ConvertToPlug?

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10254 r10279  
    2929 
    3030use ConvertToPlug; 
     31require StructuredHTMLPlug; 
    3132use strict; 
    3233no strict 'refs'; # allow filehandles to be variables and viceversa 
    3334 
     35 
    3436sub BEGIN { 
    35     @WordPlug::ISA = ('ConvertToPlug'); 
     37    @WordPlug::ISA = ('ConvertToPlug','StructuredHTMLPlug'); 
    3638} 
    3739 
     
    4143    'type' => "regexp", 
    4244    'deft' => &get_default_process_exp(), 
    43     'reqd' => "no" } ]; 
     45    'reqd' => "no" }, 
     46      { 'name' => "title_header", 
     47    'desc' => "{WordPlug.title_header}", 
     48    'type' => "string" }, 
     49      { 'name' => "check_toc", 
     50    'desc' => "{WordPlug.check_toc}", 
     51    'type' => "flag" }, 
     52      { 'name' => "toc_header", 
     53    'desc' => "{WordPlug.toc_header}", 
     54    'type' => "string" }, 
     55      { 'name' => "tof_header", 
     56    'desc' => "{WordPlug.tof_header}", 
     57    'type' => "string" }, 
     58      { 'name' => "level1_header", 
     59    'desc' => "{WordPlug.level1_header}", 
     60    'type' => "string" }, 
     61      { 'name' => "level2_header", 
     62    'desc' => "{WordPlug.level2_header}", 
     63    'type' => "string" },      
     64      { 'name' => "level3_header", 
     65    'desc' => "{WordPlug.level3_header}", 
     66    'type' => "string" }]; 
    4467 
    4568my $options = { 'name'     => "WordPlug", 
     
    5477    push(@$pluginlist, $class); 
    5578 
     79    if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 
     80    my $ws_arg = { 'name' => "windows_scripting", 
     81               'desc' => "{WordPlug.windows_scripting}", 
     82               'type' => "flag" }; 
     83    push(@$arguments,$ws_arg); 
     84    } 
     85 
    5686    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    5787    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     
    5989    my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs); 
    6090 
    61      # wvWare will always produce html files encoded as utf-8 
     91    #this is passed through to gsConvert.pl by ConvertToPlug.pm 
     92    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 
     93     
     94    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
     95     
     96    if (!defined $secondary_plugin_options->{'HTMLPlug'}) { 
     97    $secondary_plugin_options->{'HTMLPlug'} = []; 
     98    } 
     99    if (!defined $secondary_plugin_options->{'TEXTPlug'}) { 
     100    $secondary_plugin_options->{'TEXTPlug'} = []; 
     101    } 
     102 
     103    my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
     104    my $text_options = $secondary_plugin_options->{'TextPlug'}; 
     105     
     106    # wvWare will always produce html files encoded as utf-8 
    62107    if ($self->{'input_encoding'} eq "auto") { 
    63108    $self->{'input_encoding'} = "utf8"; 
    64109    $self->{'extract_language'} = 1; 
     110    push(@$html_options,"-input_encoding", "utf8"); 
     111    push(@$html_options,"-extract_language"); 
     112     
     113    # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 
     114    # to extract these metadata fields from the HEAD META fields 
     115    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
     116    #push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    65117    } 
     118     
     119    $self = bless $self, $class; 
     120     
     121    $self->load_secondary_plugins($class,$secondary_plugin_options); 
    66122 
    67     return bless $self, $class; 
     123    #return bless $self, $class; 
     124    return bless $self; 
    68125} 
    69126 
     
    74131} 
    75132 
     133sub convert_post_process 
     134{ 
     135    my $self = shift (@_); 
     136    my ($conv_filename) = @_; 
     137 
     138    my $outhandle=$self->{'outhandle'}; 
     139 
     140    my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename); 
     141 
     142    # read in file ($text will be in utf8) 
     143    my $text = ""; 
     144    $self->read_file ($conv_filename, $encoding, $language, \$text); 
     145 
     146    # turn any high bytes that aren't valid utf-8 into utf-8. 
     147    unicode::ensure_utf8(\$text); 
     148 
     149    # Write it out again! 
     150} 
     151 
     152sub get_file_type { 
     153    my $self = shift (@_); 
     154    my $file_type = "Word"; 
     155    return $file_type; 
     156} 
     157 
     158# Modified to cache HTML files for efficieny reasons rather 
     159# than delete all.  HTML is modified not to use IE's VML. 
     160# VML uses WML files, so these can be deleted. 
     161sub cleanup_tmp_area { 
     162    my ($self) = @_; 
     163    if (defined $self->{'files_dir'}) { 
     164    my $html_files_dir = $self->{'files_dir'}; 
     165 
     166    if (opendir(DIN,$html_files_dir)) { 
     167        my @wmz_files = grep( /\.wmz$/, readdir(DIN)); 
     168        foreach my $f (@wmz_files) { 
     169        my $full_f = &util::filename_cat($html_files_dir,$f); 
     170        &util::rm($full_f); 
     171        } 
     172        closedir(DIN); 
     173    } 
     174    else { 
     175        # if HTML file has no supporting images, then no _files dir made 
     176        # => do nothing 
     177    } 
     178    } 
     179} 
     180 
    76181# do plugin specific processing of doc_obj for HTML type 
    77182sub process { 
    78183    my $self = shift (@_); 
    79     #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    80  
    81     my $outhandle = $self->{'outhandle'};  
    82     print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"  
    83     if $self->{'verbosity'} > 1; 
    84     print STDERR "<Processing n='$_[3]' p='WordPlug'>\n" if ($_[6]); 
    85  
    86     return ConvertToPlug::process_type($self,"doc",@_); 
     184    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     185    
     186    return $self->process_type("doc", $base_dir, $file, $doc_obj); 
    87187} 
    88188 
    891891; 
     190