Changeset 22428
- Timestamp:
- 2010-07-19T13:27:02+12:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/WordPlugin.pm
r21801 r22428 26 26 27 27 use ConvertBinaryFile; 28 29 28 30 use strict; 29 31 no strict 'refs'; # allow filehandles to be variables and viceversa 30 32 31 sub BEGIN { 32 @WordPlugin::ISA = ('ConvertBinaryFile'); 33 } 33 34 # @ISA dynamically configured to be either OpenOfficeConverter or ConvertBinaryFile 34 35 35 36 my $arguments = … … 44 45 ]; 45 46 47 48 my $opt_windows_args = [ { 'name' => "windows_scripting", 49 'desc' => "{WordPlugin.windows_scripting}", 50 'type' => "flag", 51 'reqd' => "no" } ]; 52 53 my $opt_office_args = [ { 'name' => "metadata_fields", 54 'desc' => "{WordPlugin.metadata_fields}", 55 'type' => "string", 56 'deft' => "Title" }, 57 { 'name' => "level1_header", 58 'desc' => "{StructuredHTMLPlugin.level1_header}", 59 'type' => "regexp", 60 'reqd' => "no", 61 'deft' => "" }, 62 { 'name' => "level2_header", 63 'desc' => "{StructuredHTMLPlugin.level2_header}", 64 'type' => "regexp", 65 'reqd' => "no", 66 'deft' => "" }, 67 { 'name' => "level3_header", 68 'desc' => "{StructuredHTMLPlugin.level3_header}", 69 'type' => "regexp", 70 'reqd' => "no", 71 'deft' => "" }, 72 { 'name' => "title_header", 73 'desc' => "{StructuredHTMLPlugin.title_header}", 74 'type' => "regexp", 75 'reqd' => "no", 76 'deft' => "" }, 77 { 'name' => "delete_toc", 78 'desc' => "{StructuredHTMLPlugin.delete_toc}", 79 'type' => "flag", 80 'reqd' => "no" }, 81 { 'name' => "toc_header", 82 'desc' => "{StructuredHTMLPlugin.toc_header}", 83 'type' => "regexp", 84 'reqd' => "no", 85 'deft' => "" } ]; 86 87 46 88 my $options = { 'name' => "WordPlugin", 47 89 'desc' => "{WordPlugin.desc}", … … 56 98 push(@$pluginlist, $class); 57 99 100 my $openoffice_ext_installed; 101 eval("require OpenOfficeConverter"); 102 if ($@) { 103 # Useful debugging statement if there is a syntax error in OpenOfficeConverter 104 print STDERR "$@\n"; 105 106 push(@WordPlugin::ISA,"ConvertBinaryFile"); 107 $openoffice_ext_installed = 0; 108 } 109 else { 110 # Successfully found 111 print STDERR "WordPlugin: OpenOffice Extension to Greenstone detected\n"; 112 push(@WordPlugin::ISA,"OpenOfficeConverter"); 113 $openoffice_ext_installed = 1; 114 } 115 58 116 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 59 my $ws_arg = [ { 'name' => "windows_scripting", 60 'desc' => "{WordPlugin.windows_scripting}", 61 'type' => "flag", 62 'reqd' => "no" }, 63 { 'name' => "metadata_fields", 64 'desc' => "{WordPlugin.metadata_fields}", 65 'type' => "string", 66 'deft' => "Title" }, 67 { 'name' => "metadata_field_separator", 68 'desc' => "{HTMLPlugin.metadata_field_separator}", 69 'type' => "string", 70 'deft' => "" }, 71 { 'name' => "level1_header", 72 'desc' => "{StructuredHTMLPlugin.level1_header}", 73 'type' => "regexp", 74 'reqd' => "no", 75 'deft' => "" }, 76 { 'name' => "level2_header", 77 'desc' => "{StructuredHTMLPlugin.level2_header}", 78 'type' => "regexp", 79 'reqd' => "no", 80 'deft' => "" }, 81 { 'name' => "level3_header", 82 'desc' => "{StructuredHTMLPlugin.level3_header}", 83 'type' => "regexp", 84 'reqd' => "no", 85 'deft' => "" }, 86 { 'name' => "title_header", 87 'desc' => "{StructuredHTMLPlugin.title_header}", 88 'type' => "regexp", 89 'reqd' => "no", 90 'deft' => "" }, 91 { 'name' => "delete_toc", 92 'desc' => "{StructuredHTMLPlugin.delete_toc}", 93 'type' => "flag", 94 'reqd' => "no" }, 95 { 'name' => "toc_header", 96 'desc' => "{StructuredHTMLPlugin.toc_header}", 97 'type' => "regexp", 98 'reqd' => "no", 99 'deft' => "" } 100 ]; 101 102 push(@$arguments,@$ws_arg); 117 push(@$arguments,@$opt_windows_args); 118 push(@$arguments,@$opt_office_args); 119 } 120 elsif ($openoffice_ext_installed) { 121 push(@$arguments,@$opt_office_args); 103 122 } 104 123 … … 106 125 push(@{$hashArgOptLists->{"OptList"}},$options); 107 126 108 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 127 my $self = {}; 128 my $outhandle; 129 130 if ($openoffice_ext_installed) { 131 $self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists); 132 $outhandle = $self->{'outhandle'}; 133 134 if ($self->{'openoffice_conversion_available'}) { 135 print $outhandle "WordPlugin: OpenOffice scripting functionality available\n"; 136 137 # Override default process expression 138 $self->{'process_exp'} = q^(?i)\.(doc|dot|docx|odt)$^; 139 140 if ($self->{'openoffice_scripting'}) { 141 print $outhandle "WordPlugin: Activating OpenOffice scripting functionality\n"; 142 } 143 } 144 else { 145 print $outhandle "WordPlugin: Unable to run 'soffice'\n"; 146 print $outhandle "WordPlugin: Defaulting to ConvertBinaryFile inheritence\n"; 147 } 148 } 149 else { 150 $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 151 $outhandle = $self->{'outhandle'}; 152 } 109 153 110 154 if ($self->{'info_only'}) { … … 116 160 $self->{'file_type'} = "Word"; 117 161 118 #this is passed through to gsConvert.pl by ConvertBinaryFile.pm 119 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 162 if ($self->{'windows_scripting'}) { 163 $self->{'convert_options'} = "-windows_scripting"; 164 $self->{'office_scripting'} = 1; 165 } 166 if ($self->{'openoffice_scripting'}) { 167 if ($self->{'windows_scripting'}) { 168 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_scripting\n"; 169 print $outhandle " on at the same time. Defaulting to -windows_scripting\n"; 170 } 171 else { 172 $self->{'convert_options'} = "-openoffice_scripting"; 173 $self->{'office_scripting'} = 1; 174 } 175 } 120 176 121 177 # we always save as utf-8 … … 125 181 126 182 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 127 if (defined $self->{' windows_scripting'}) {183 if (defined $self->{'office_scripting'}) { 128 184 if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){ 129 185 $secondary_plugin_options->{'StructuredHTMLPlugin'} = []; … … 134 190 push (@$structhtml_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>"); 135 191 push (@$structhtml_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 136 push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'}; 192 push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'}; # What about openoffice??? 137 193 push (@$structhtml_options, "-extract_language") if $self->{'extract_language'}; 138 194 push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'}; … … 187 243 } 188 244 245 sub init { 246 my $self = shift (@_); 247 my ($verbosity, $outhandle, $failhandle) = @_; 248 249 $self->SUPER::init($verbosity,$outhandle,$failhandle); 250 } 251 252 sub deinit { 253 # called only once, after all plugin passes have been done 254 my ($self) = @_; 255 256 $self->SUPER::deinit(); 257 } 258 189 259 sub get_default_process_exp { 190 260 my $self = shift (@_);
Note:
See TracChangeset
for help on using the changeset viewer.