Changeset 15872 for gsdl/trunk/perllib/plugins/WordPlugin.pm
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/WordPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # WordPlug .pm -- plugin for importing Microsoft Word documents3 # WordPlugin.pm -- plugin for importing Microsoft Word documents 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 25 25 # 12/05/02 Added usage datastructure - John Thompson 26 26 27 package WordPlug ;28 29 use Convert ToPlug;27 package WordPlugin; 28 29 use ConvertBinaryFile; 30 30 use strict; 31 31 no strict 'refs'; # allow filehandles to be variables and viceversa 32 32 33 33 sub BEGIN { 34 @WordPlug ::ISA = ('ConvertToPlug');34 @WordPlugin::ISA = ('ConvertBinaryFile'); 35 35 } 36 36 37 37 my $arguments = 38 38 [ { 'name' => "process_exp", 39 'desc' => "{Bas Plug.process_exp}",39 'desc' => "{BasePlugin.process_exp}", 40 40 'type' => "regexp", 41 41 'deft' => &get_default_process_exp(), 42 42 'reqd' => "no" }, 43 43 { 'name' => "description_tags", 44 'desc' => "{HTMLPlug .description_tags}",44 'desc' => "{HTMLPlugin.description_tags}", 45 45 'type' => "flag" } 46 46 ]; 47 47 48 my $options = { 'name' => "WordPlug ",49 'desc' => "{WordPlug .desc}",48 my $options = { 'name' => "WordPlugin", 49 'desc' => "{WordPlugin.desc}", 50 50 'abstract' => "no", 51 51 'inherits' => "yes", … … 60 60 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 61 61 my $ws_arg = [ { 'name' => "windows_scripting", 62 'desc' => "{WordPlug .windows_scripting}",62 'desc' => "{WordPlugin.windows_scripting}", 63 63 'type' => "flag", 64 64 'reqd' => "no" }, … … 67 67 'deft' => "Title" }, 68 68 { 'name' => "level1_header", 69 'desc' => "{StructuredHTMLPlug .level1_header}",69 'desc' => "{StructuredHTMLPlugin.level1_header}", 70 70 'type' => "regexp", 71 71 'reqd' => "no", 72 72 'deft' => "" }, 73 73 { 'name' => "level2_header", 74 'desc' => "{StructuredHTMLPlug .level2_header}",74 'desc' => "{StructuredHTMLPlugin.level2_header}", 75 75 'type' => "regexp", 76 76 'reqd' => "no", 77 77 'deft' => "" }, 78 78 { 'name' => "level3_header", 79 'desc' => "{StructuredHTMLPlug .level3_header}",79 'desc' => "{StructuredHTMLPlugin.level3_header}", 80 80 'type' => "regexp", 81 81 'reqd' => "no", 82 82 'deft' => "" }, 83 83 { 'name' => "title_header", 84 'desc' => "{StructuredHTMLPlug .title_header}",84 'desc' => "{StructuredHTMLPlugin.title_header}", 85 85 'type' => "regexp", 86 86 'reqd' => "no", 87 87 'deft' => "" }, 88 88 { 'name' => "delete_toc", 89 'desc' => "{StructuredHTMLPlug .delete_toc}",89 'desc' => "{StructuredHTMLPlugin.delete_toc}", 90 90 'type' => "flag", 91 91 'reqd' => "no", … … 94 94 'modegli' => "3"}, 95 95 { 'name' => "toc_header", 96 'desc' => "{StructuredHTMLPlug .toc_header}",96 'desc' => "{StructuredHTMLPlugin.toc_header}", 97 97 'type' => "regexp", 98 98 'reqd' => "no", … … 103 103 } 104 104 105 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}106 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};107 108 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);105 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 106 push(@{$hashArgOptLists->{"OptList"}},$options); 107 108 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 109 109 110 110 if ($self->{'info_only'}) { … … 113 113 } 114 114 115 #this is passed through to gsConvert.pl by ConvertToPlug.pm 115 $self->{'filename_extension'} = "doc"; 116 $self->{'file_type'} = "Word"; 117 118 #this is passed through to gsConvert.pl by ConvertBinaryFile.pm 116 119 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 117 120 … … 123 126 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 124 127 if (defined $self->{'windows_scripting'}) { 125 if (!defined $secondary_plugin_options->{'StructuredHTMLPlug '}){126 $secondary_plugin_options->{'StructuredHTMLPlug '} = [];127 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug '};128 if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){ 129 $secondary_plugin_options->{'StructuredHTMLPlugin'} = []; 130 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'}; 128 131 129 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)132 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 130 133 # to extract these metadata fields from the HEAD META fields 131 134 push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); … … 142 145 } 143 146 } 144 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {145 $secondary_plugin_options->{'HTMLPlug '} = [];146 } 147 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {148 $secondary_plugin_options->{'T EXTPlug'} = [];149 } 150 151 my $html_options = $secondary_plugin_options->{'HTMLPlug '};152 my $text_options = $secondary_plugin_options->{'TextPlug '};153 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug '};154 # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this147 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 148 $secondary_plugin_options->{'HTMLPlugin'} = []; 149 } 150 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 151 $secondary_plugin_options->{'TextPlugin'} = []; 152 } 153 154 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 155 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 156 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'}; 157 # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this 155 158 push(@$html_options,"-input_encoding", "utf8"); 156 159 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 157 160 push(@$html_options, "-description_tags") if $self->{'description_tags'}; 158 161 159 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)162 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 160 163 # to extract these metadata fields from the HEAD META fields 161 164 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); … … 181 184 } 182 185 183 sub convert_post_process 186 sub convert_post_process_old 184 187 { 185 188 my $self = shift (@_); … … 199 202 # Write it out again! 200 203 #$self->utf8_write_file (\$text, $conv_filename); 201 }202 203 sub get_file_type {204 my $self = shift (@_);205 my $file_type = "Word";206 return $file_type;207 204 } 208 205 … … 230 227 } 231 228 232 # do plugin specific processing of doc_obj for HTML type233 sub process {234 my $self = shift (@_);235 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;236 237 return $self->process_type("doc", $base_dir, $file, $doc_obj);238 }239 229 240 230 1;
Note:
See TracChangeset
for help on using the changeset viewer.