Changeset 12834
- Timestamp:
- 2006-09-22T16:03:36+12:00 (18 years ago)
- Location:
- trunk/gsdl/perllib/plugins
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/ExcelPlug.pm
r12169 r12834 74 74 #$self->{'extract_language'} = 1; 75 75 push(@$html_options, "-input_encoding", "utf8"); 76 push(@$html_options,"-extract_language") ;76 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 77 77 $self = bless $self, $class; 78 78 -
trunk/gsdl/perllib/plugins/PDFPlug.pm
r12169 r12834 149 149 150 150 if ($self->{'input_encoding'} eq "auto") { 151 # pdftohtml will always produce html files encoded as utf-8152 # => restrict primary PDFPlug and secondary HTML plugin to use153 # utf8 and extract language.154 151 $self->{'input_encoding'} = "utf8"; 155 $self->{'extract_language'} = 1; 156 157 push(@$html_options,"-extract_language"); 158 } 152 } 153 159 154 # if pdftohtml is always producing utf8, then htmlplug always needs this option 160 155 push(@$html_options,"-input_encoding", "utf8"); 161 156 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 162 157 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 163 158 # to extract these metadata fields from the HEAD META fields -
trunk/gsdl/perllib/plugins/PPTPlug.pm
r12169 r12834 112 112 if ($self->{'input_encoding'} eq "auto") { 113 113 $self->{'input_encoding'} = "utf8"; 114 $self->{'extract_language'} = 1;115 114 if (defined $secondary_plugin_options->{'HTMLPlug'}){ 116 115 push(@$html_options,"-input_encoding", "utf8"); 117 push(@$html_options,"-extract_language") ;116 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 118 117 119 118 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) … … 123 122 if (defined $secondary_plugin_options->{'PagedImgPlug'}){ 124 123 push(@$pageimg_options,"-input_encoding", "utf8"); 125 push(@$pageimg_options,"-extract_language") ;124 push(@$pageimg_options,"-extract_language") if $self->{'extract_language'}; 126 125 } 127 126 } -
trunk/gsdl/perllib/plugins/PSPlug.pm
r12169 r12834 124 124 #$self->{'extract_language'} = 1; 125 125 push(@$text_options, "-input_encoding", "utf8"); 126 push(@$text_options,"-extract_language") ;126 push(@$text_options,"-extract_language") if $self->{'extract_language'}; 127 127 push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 128 128 -
trunk/gsdl/perllib/plugins/RTFPlug.pm
r12169 r12834 82 82 #$self->{'extract_language'} = 1; 83 83 push(@$text_options, "-input_encoding", "utf8"); 84 push(@$text_options,"-extract_language"); 85 if ($self->{'description_tags'} == 1) { 86 push(@$html_options, "-description_tags"); 87 } 88 84 push(@$text_options,"-extract_language") if $self->{'extract_language'}; 85 push(@$html_options, "-description_tags") if $self->{'description_tags'}; 86 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 89 87 90 88 $self = bless $self, $class; -
trunk/gsdl/perllib/plugins/WordPlug.pm
r12169 r12834 115 115 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 116 116 117 # we always save as utf-8 118 if ($self->{'input_encoding'} eq "auto") { 119 $self->{'input_encoding'} = "utf8"; 120 } 121 117 122 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 118 123 if (defined $self->{'windows_scripting'}) { … … 120 125 $secondary_plugin_options->{'StructuredHTMLPlug'} = []; 121 126 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'}; 122 if ($self->{'input_encoding'} eq "auto") { 123 $self->{'input_encoding'} = "utf8"; 124 $self->{'extract_language'} = 1; 125 #push(@$structhtml_options,"-input_encoding", "utf8"); 126 push(@$structhtml_options,"-extract_language"); 127 } 128 127 129 128 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 130 129 # to extract these metadata fields from the HEAD META fields … … 132 131 push (@$structhtml_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 133 132 push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'}; 133 push(@$structhtml_options,"-extract_language") if $self->{'extract_language'}; 134 134 push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'}; 135 135 push (@$structhtml_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'}; … … 153 153 # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this 154 154 push(@$html_options,"-input_encoding", "utf8"); 155 156 if ($self->{'input_encoding'} eq "auto") { 157 $self->{'input_encoding'} = "utf8"; 158 $self->{'extract_language'} = 1; 159 push(@$html_options,"-extract_language"); 160 } 161 if ($self->{'description_tags'} == 1) { 162 push(@$html_options, "-description_tags"); 163 } 155 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 156 push(@$html_options, "-description_tags") if $self->{'description_tags'}; 157 164 158 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj) 165 159 # to extract these metadata fields from the HEAD META fields
Note:
See TracChangeset
for help on using the changeset viewer.