Changeset 10279 for trunk/gsdl/perllib/plugins/WordPlug.pm
- Timestamp:
- 2005-07-25T14:16:44+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/WordPlug.pm
r10254 r10279 29 29 30 30 use ConvertToPlug; 31 require StructuredHTMLPlug; 31 32 use strict; 32 33 no strict 'refs'; # allow filehandles to be variables and viceversa 33 34 35 34 36 sub BEGIN { 35 @WordPlug::ISA = ('ConvertToPlug' );37 @WordPlug::ISA = ('ConvertToPlug','StructuredHTMLPlug'); 36 38 } 37 39 … … 41 43 'type' => "regexp", 42 44 'deft' => &get_default_process_exp(), 43 'reqd' => "no" } ]; 45 'reqd' => "no" }, 46 { 'name' => "title_header", 47 'desc' => "{WordPlug.title_header}", 48 'type' => "string" }, 49 { 'name' => "check_toc", 50 'desc' => "{WordPlug.check_toc}", 51 'type' => "flag" }, 52 { 'name' => "toc_header", 53 'desc' => "{WordPlug.toc_header}", 54 'type' => "string" }, 55 { 'name' => "tof_header", 56 'desc' => "{WordPlug.tof_header}", 57 'type' => "string" }, 58 { 'name' => "level1_header", 59 'desc' => "{WordPlug.level1_header}", 60 'type' => "string" }, 61 { 'name' => "level2_header", 62 'desc' => "{WordPlug.level2_header}", 63 'type' => "string" }, 64 { 'name' => "level3_header", 65 'desc' => "{WordPlug.level3_header}", 66 'type' => "string" }]; 44 67 45 68 my $options = { 'name' => "WordPlug", … … 54 77 push(@$pluginlist, $class); 55 78 79 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 80 my $ws_arg = { 'name' => "windows_scripting", 81 'desc' => "{WordPlug.windows_scripting}", 82 'type' => "flag" }; 83 push(@$arguments,$ws_arg); 84 } 85 56 86 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 57 87 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; … … 59 89 my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs); 60 90 61 # wvWare will always produce html files encoded as utf-8 91 #this is passed through to gsConvert.pl by ConvertToPlug.pm 92 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 93 94 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 95 96 if (!defined $secondary_plugin_options->{'HTMLPlug'}) { 97 $secondary_plugin_options->{'HTMLPlug'} = []; 98 } 99 if (!defined $secondary_plugin_options->{'TEXTPlug'}) { 100 $secondary_plugin_options->{'TEXTPlug'} = []; 101 } 102 103 my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 104 my $text_options = $secondary_plugin_options->{'TextPlug'}; 105 106 # wvWare will always produce html files encoded as utf-8 62 107 if ($self->{'input_encoding'} eq "auto") { 63 108 $self->{'input_encoding'} = "utf8"; 64 109 $self->{'extract_language'} = 1; 110 push(@$html_options,"-input_encoding", "utf8"); 111 push(@$html_options,"-extract_language"); 112 113 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 114 # to extract these metadata fields from the HEAD META fields 115 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 116 #push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 65 117 } 118 119 $self = bless $self, $class; 120 121 $self->load_secondary_plugins($class,$secondary_plugin_options); 66 122 67 return bless $self, $class; 123 #return bless $self, $class; 124 return bless $self; 68 125 } 69 126 … … 74 131 } 75 132 133 sub convert_post_process 134 { 135 my $self = shift (@_); 136 my ($conv_filename) = @_; 137 138 my $outhandle=$self->{'outhandle'}; 139 140 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename); 141 142 # read in file ($text will be in utf8) 143 my $text = ""; 144 $self->read_file ($conv_filename, $encoding, $language, \$text); 145 146 # turn any high bytes that aren't valid utf-8 into utf-8. 147 unicode::ensure_utf8(\$text); 148 149 # Write it out again! 150 } 151 152 sub get_file_type { 153 my $self = shift (@_); 154 my $file_type = "Word"; 155 return $file_type; 156 } 157 158 # Modified to cache HTML files for efficieny reasons rather 159 # than delete all. HTML is modified not to use IE's VML. 160 # VML uses WML files, so these can be deleted. 161 sub cleanup_tmp_area { 162 my ($self) = @_; 163 if (defined $self->{'files_dir'}) { 164 my $html_files_dir = $self->{'files_dir'}; 165 166 if (opendir(DIN,$html_files_dir)) { 167 my @wmz_files = grep( /\.wmz$/, readdir(DIN)); 168 foreach my $f (@wmz_files) { 169 my $full_f = &util::filename_cat($html_files_dir,$f); 170 &util::rm($full_f); 171 } 172 closedir(DIN); 173 } 174 else { 175 # if HTML file has no supporting images, then no _files dir made 176 # => do nothing 177 } 178 } 179 } 180 76 181 # do plugin specific processing of doc_obj for HTML type 77 182 sub process { 78 183 my $self = shift (@_); 79 #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 80 81 my $outhandle = $self->{'outhandle'}; 82 print $outhandle "WordPlug: passing $_[3] on to $self->{'converted_to'}Plug\n" 83 if $self->{'verbosity'} > 1; 84 print STDERR "<Processing n='$_[3]' p='WordPlug'>\n" if ($_[6]); 85 86 return ConvertToPlug::process_type($self,"doc",@_); 184 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 185 186 return $self->process_type("doc", $base_dir, $file, $doc_obj); 87 187 } 88 188 89 189 1; 190
Note:
See TracChangeset
for help on using the changeset viewer.