Changeset 22861
- Timestamp:
- 2010-09-07T12:08:44+12:00 (13 years ago)
- Location:
- main/trunk/greenstone2/perllib/plugins
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/ExcelPlugin.pm
r22709 r22861 34 34 use gsprintf 'gsprintf'; 35 35 36 use AutoloadConverterScripting; 36 use AutoLoadConverters; 37 use ConvertBinaryFile; 37 38 38 @ExcelPlugin::ISA = ('AutoloadConverterScripting'); 39 sub BEGIN { 40 @ExcelPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters'); 41 } 39 42 43 my $openoffice_available = 0; 40 44 41 45 my $arguments = … … 44 48 'type' => "regexp", 45 49 'reqd' => "no", 46 'deft' => "&get_default_process_exp( \$self)" # delayed (see below)50 'deft' => "&get_default_process_exp()" # delayed (see below) 47 51 } 48 52 ]; … … 60 64 push(@$pluginlist, $class); 61 65 62 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 66 # this bit needs to happen later after the arguments array has been 67 # finished - used for parsing the input args. 68 # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 69 # this one needs to go in first, to get the print info in the right order 63 70 push(@{$hashArgOptLists->{"OptList"}},$options); 64 71 65 my $self 66 = new AutoloadConverterScripting("OpenOfficeConverter",$pluginlist, 67 $inputargs, $hashArgOptLists); 72 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1); 68 73 69 # plugin's process_exp can only be correctly determined once autoloading 70 # has taken place 71 my $plug_options = $self->{'option_list'}->[0]; 72 my $plug_args = $plug_options->{'args'}; 73 74 foreach my $a (@$plug_args) { 75 # consider changing this to search for all values that are 76 # tagged as 'deft-delayed' = 1 ?!? 77 74 # evaluate the default for process_exp - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args. 75 foreach my $a (@$arguments) { 78 76 if ($a->{'name'} eq "process_exp") { 79 77 my $eval_expr = $a->{'deft'}; 80 78 $a->{'deft'} = eval "$eval_expr"; 81 82 # Now see if process_exp needs updating 83 my $process_exp = $self->{'process_exp'}; 84 if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) { 85 # process_exp is only defined if not 'info_only' 86 # 87 # if it does exist and it equals the unevaluated $eval_expr 88 # then it was set to the default (rather than overriden by 89 # the collect.cfg file) 90 91 $self->{'process_exp'} = $a->{'deft'}; 92 } 79 last; 93 80 } 94 81 } 82 83 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 84 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 85 my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self); 86 95 87 96 88 if ($self->{'info_only'}) { … … 99 91 } 100 92 93 $self = bless $self, $class; 101 94 $self->{'filename_extension'} = "xls"; 102 95 $self->{'file_type'} = "Excel"; … … 109 102 } 110 103 111 $self = bless $self, $class;112 104 # set convert_to_plugin and convert_to_ext 113 105 $self->set_standard_convert_settings(); … … 133 125 134 126 127 sub get_default_process_exp { 128 my $self = shift (@_); 129 130 if ($openoffice_available) { 131 return q^(?i)\.(xls|xlsx|ods)$^; 132 } 133 134 return q^(?i)\.xls$^; 135 } 136 137 sub init { 138 my $self = shift (@_); 139 140 # ConvertBinaryFile init 141 $self->SUPER::init(@_); 142 $self->AutoLoadConverters::init(); 143 144 } 145 146 sub begin { 147 my $self = shift (@_); 148 149 $self->AutoLoadConverters::begin(); 150 $self->SUPER::begin(@_); 151 152 } 153 154 sub deinit { 155 my $self = shift (@_); 156 157 $self->AutoLoadConverters::deinit(); 158 $self->SUPER::deinit(@_); 159 160 } 161 162 sub tmp_area_convert_file { 163 164 my $self = shift (@_); 165 return $self->AutoLoadConverters::tmp_area_convert_file(@_); 166 167 } 168 135 169 sub convert_post_process_old 136 170 { … … 153 187 } 154 188 155 sub get_default_process_exp {156 my $self = shift (@_);157 158 if ($self->{'scripting_ext_working'}) {159 return q^(?i)\.(xls|xlsx|ods)$^;160 }161 162 return q^(?i)\.xls$^;163 }164 165 166 189 1; -
main/trunk/greenstone2/perllib/plugins/PDFPlugin.pm
r22705 r22861 31 31 use unicode; 32 32 33 use Auto loadConverterScripting;34 35 @PDFPlugin::ISA = (' AutoloadConverterScripting', 'ReadTextFile');33 use AutoLoadConverters; 34 35 @PDFPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters', 'ReadTextFile'); 36 36 37 37 … … 120 120 push(@{$hashArgOptLists->{"OptList"}},$options); 121 121 122 my $self = new AutoloadConverterScripting("PDFBoxConverter",$pluginlist, $inputargs, $hashArgOptLists); 122 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["PDFBoxConverter"],1); 123 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 124 my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self); 123 125 124 126 if ($self->{'info_only'}) { … … 126 128 return bless $self, $class; 127 129 } 128 130 131 $self = bless $self, $class; 129 132 $self->{'filename_extension'} = "pdf"; 130 133 $self->{'file_type'} = "PDF"; … … 169 172 170 173 if ($secondary_plugin_name eq "HTMLPlugin") { 171 # pdftohtml always produces utf8 174 # pdftohtml always produces utf8 - What about pdfbox??? 172 175 push(@$specific_options, "-input_encoding", "utf8"); 173 176 push(@$specific_options, "-extract_language") if $self->{'extract_language'}; … … 209 212 return ""; 210 213 } 211 214 215 sub init { 216 my $self = shift (@_); 217 218 # ConvertBinaryFile init 219 $self->SUPER::init(@_); 220 $self->AutoLoadConverters::init(); 221 222 } 223 224 sub begin { 225 my $self = shift (@_); 226 227 $self->AutoLoadConverters::begin(); 228 $self->SUPER::begin(@_); 229 230 } 231 232 sub deinit { 233 my $self = shift (@_); 234 235 $self->AutoLoadConverters::deinit(); 236 $self->SUPER::deinit(@_); 237 238 } 239 240 241 sub tmp_area_convert_file { 242 243 my $self = shift (@_); 244 return $self->AutoLoadConverters::tmp_area_convert_file(@_); 245 246 } 247 212 248 sub convert_post_process 213 249 { -
main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm
r22709 r22861 3 3 # PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files. 4 4 # (basic version supports versions 95 and 97) 5 # (through OpenOffice extension, supports all contempo ary formats)5 # (through OpenOffice extension, supports all contemporary formats) 6 6 # 7 7 # A component of the Greenstone digital library software … … 35 35 use gsprintf 'gsprintf'; 36 36 37 use AutoloadConverterScripting; 38 39 @PowerPointPlugin::ISA = ('AutoloadConverterScripting'); 40 37 use AutoLoadConverters; 38 use ConvertBinaryFile; 39 40 sub BEGIN { 41 @PowerPointPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters'); 42 } 43 44 my $openoffice_available = 0; 41 45 42 46 my $windows_convert_to_list = … … 55 59 ]; 56 60 61 my $openoffice_convert_to_list = 62 [ { 'name' => "auto", 63 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 64 { 'name' => "html", 65 'desc' => "{PowerPointPlugin.convert_to.oo_html}" }, 66 { 'name' => "text", 67 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 68 { 'name' => "pagedimg", 69 'desc' => "{PowerPointPlugin.convert_to.pagedimg}" } 70 ]; 71 57 72 my $arguments = 58 73 [ { 'name' => "process_exp", … … 60 75 'type' => "regexp", 61 76 'reqd' => "no", 62 'deft' => "&get_default_process_exp( \$self)", # delayed (see below)77 'deft' => "&get_default_process_exp()", # delayed (see below) 63 78 } 64 79 ]; … … 77 92 ]; 78 93 94 my $opt_office_args = 95 [ { 'name' => "convert_to", 96 'desc' => "{ConvertBinaryFile.convert_to}", 97 'type' => "enum", 98 'reqd' => "yes", 99 'list' => $openoffice_convert_to_list, 100 'deft' => "html" } 101 ]; 102 79 103 my $options = { 'name' => "PowerPointPlugin", 80 104 'desc' => "{PowerPointPlugin.desc}", … … 89 113 push(@$pluginlist, $class); 90 114 115 # this bit needs to happen later after the arguments array has been 116 # finished - used for parsing the input args. 117 # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 118 # this one needs to go in first, to get the print info in the right order 119 push(@{$hashArgOptLists->{"OptList"}},$options); 120 91 121 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 92 122 push(@$arguments,@$opt_windows_args); 93 123 } 94 124 95 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 96 push(@{$hashArgOptLists->{"OptList"}},$options); 97 98 99 my $self 100 = new AutoloadConverterScripting("OpenOfficeConverter", 101 $pluginlist, $inputargs, 102 $hashArgOptLists); 103 104 105 # plugin's process_exp can only be correctly determined once autoloading 106 # has taken place 107 my $plug_options = $self->{'option_list'}->[0]; 108 my $plug_args = $plug_options->{'args'}; 109 110 foreach my $a (@$plug_args) { 111 # consider changing this to search for all values that are 112 # tagged as 'deft-delayed' = 1 ?!? 113 125 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1); 126 127 if ($auto_converter_self->{'openoffice_available'}) { 128 push (@$arguments,@$opt_office_args); 129 $openoffice_available = 1; 130 } 131 # TODO need to do the case where they are both enabled!!! what will the convert to list be??? 132 133 # evaluate the default for process_exp - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args. 134 foreach my $a (@$arguments) { 114 135 if ($a->{'name'} eq "process_exp") { 115 136 my $eval_expr = $a->{'deft'}; 116 137 $a->{'deft'} = eval "$eval_expr"; 117 118 # Now see if process_exp needs updating 119 my $process_exp = $self->{'process_exp'}; 120 if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) { 121 # process_exp is only defined if not 'info_only' 122 # 123 # if it does exist and it equals the unevaluated $eval_expr 124 # then it was set to the default (rather than overriden by 125 # the collect.cfg file) 126 127 $self->{'process_exp'} = $a->{'deft'}; 128 } 129 } 130 } 131 132 138 last; 139 } 140 } 141 142 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 143 144 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 145 my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self); 146 133 147 if ($self->{'info_only'}) { 134 148 # don't worry about any options etc … … 136 150 } 137 151 152 $self = bless $self, $class; 138 153 $self->{'filename_extension'} = "ppt"; 139 154 $self->{'file_type'} = "PPT"; … … 150 165 my $outhandle = $self->{'outhandle'}; 151 166 152 # can't have windows_scripting and openoffice_ scriptingat the same time153 if ($self->{'windows_scripting'} && $self->{'openoffice_ scripting'}) {154 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_ scripting\n";167 # can't have windows_scripting and openoffice_conversion at the same time 168 if ($self->{'windows_scripting'} && $self->{'openoffice_conversion'}) { 169 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n"; 155 170 print $outhandle " on at the same time. Defaulting to -windows_scripting\n"; 156 $self->{'openoffice_ scripting'} = 0;171 $self->{'openoffice_conversion'} = 0; 157 172 } 158 173 … … 193 208 my $self = shift (@_); 194 209 195 if ($ self->{'scripting_ext_working'}) {210 if ($openoffice_available) { 196 211 return q^(?i)\.(ppt|pptx|odp)$^; 197 212 } … … 200 215 } 201 216 217 sub init { 218 my $self = shift (@_); 219 220 # ConvertBinaryFile init 221 $self->SUPER::init(@_); 222 $self->AutoLoadConverters::init(); 223 224 } 225 226 sub begin { 227 my $self = shift (@_); 228 229 $self->AutoLoadConverters::begin(); 230 $self->SUPER::begin(@_); 231 232 } 233 234 sub deinit { 235 my $self = shift (@_); 236 237 $self->AutoLoadConverters::deinit(); 238 $self->SUPER::deinit(@_); 239 240 } 241 242 # override AutoLoadConverters version, as we need to do more stuff once its converted if we are converting to item file 243 sub tmp_area_convert_file { 244 my $self = shift (@_); 245 my ($output_ext, $input_filename, $textref) = @_; 246 247 if ($self->{'openoffice_conversion'}) { 248 if ($self->{'convert_to'} eq "pagedimg") { 249 $output_ext = "html"; # first convert to html 250 } 251 my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext); 252 if ($result == 0) { 253 my $outhandle=$self->{'outhandle'}; 254 print $outhandle "OpenOfficeConverter Conversion error\n"; 255 print $outhandle $result_str; 256 return ""; 257 258 } 259 #print STDERR "result = $result\n"; 260 if ($self->{'convert_to'} eq "pagedimg") { 261 #my $item_filename = $self->generate_item_file($new_filename); 262 #return $item_filename; 263 return "/research/kjdon/home/gsdl/collect/openoffice/test.item"; 264 } 265 return $new_filename; 266 267 } 268 else { 269 return $self->ConvertBinaryFile::tmp_area_convert_file(@_); 270 } 271 # get tmp filename 272 } 273 274 # override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed. 275 sub read_XX { 276 my $self = shift (@_); 277 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 278 279 # can we process this file?? 280 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file); 281 282 return undef unless $self->can_process_this_file($filename_full_path); 283 284 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 285 286 if ((defined $process_status) && ($process_status == 1)) { 287 288 # process the document 289 $processor->process($doc_obj); 290 291 $self->{'num_processed'} ++; 292 undef $doc_obj; 293 } 294 # delete any temp files that we may have created 295 $self->clean_up_after_doc_obj_processing(); 296 297 298 # if process_status == 1, then the file has been processed. 299 return $process_status; 300 301 } 302 202 303 1; 203 304 -
main/trunk/greenstone2/perllib/plugins/WordPlugin.pm
r22709 r22861 31 31 use gsprintf 'gsprintf'; 32 32 33 use AutoloadConverterScripting; 34 35 @WordPlugin::ISA = ('AutoloadConverterScripting'); 36 33 use AutoLoadConverters; 34 use ConvertBinaryFile; 35 36 sub BEGIN { 37 @WordPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters'); 38 } 39 40 my $openoffice_available = 0; 37 41 38 42 my $arguments = … … 40 44 'desc' => "{BasePlugin.process_exp}", 41 45 'type' => "regexp", 42 'deft' => "&get_default_process_exp( \$self)",# delayed (see below)46 'deft' => "&get_default_process_exp()", # delayed (see below) 43 47 'reqd' => "no" }, 44 48 { 'name' => "description_tags", … … 51 55 'desc' => "{WordPlugin.windows_scripting}", 52 56 'type' => "flag", 57 53 58 'reqd' => "no" } ]; 54 59 … … 100 105 push(@$pluginlist, $class); 101 106 107 # this bit needs to happen later after the arguments array has been 108 # finished - used for parsing the input args. 109 # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 110 # this one needs to go in first, to get the print info in the right order 111 push(@{$hashArgOptLists->{"OptList"}},$options); 112 102 113 my $office_capable = 0; 103 114 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 104 115 push(@$arguments,@$opt_windows_args); 105 116 $office_capable = 1; 106 } 107 if ($AutoloadConverterScripting::openoffice_ext_working) { 117 } 118 119 my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1); 120 121 if ($auto_converter_self->{'openoffice_available'}) { 108 122 $office_capable = 1; 109 } 110 # these office args apply to windows scripting or to openoffice scripting 123 $openoffice_available = 1; 124 } 125 126 # these office args apply to windows scripting or to openoffice conversion 111 127 if ($office_capable) { 112 128 push(@$arguments,@$opt_office_args); 113 129 } 114 130 115 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 116 push(@{$hashArgOptLists->{"OptList"}},$options); 117 118 my $self 119 = new AutoloadConverterScripting("OpenOfficeConverter",$pluginlist, 120 $inputargs, $hashArgOptLists); 121 122 # plugin's process_exp can only be correctly determined once autoloading 123 # has taken place 124 my $plug_options = $self->{'option_list'}->[0]; 125 my $plug_args = $plug_options->{'args'}; 126 127 foreach my $a (@$plug_args) { 128 # consider changing this to search for all values that are 129 # tagged as 'deft-delayed' = 1 ?!? 130 131 # evaluate the default for process_exp - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args. 132 foreach my $a (@$arguments) { 131 133 if ($a->{'name'} eq "process_exp") { 132 134 my $eval_expr = $a->{'deft'}; 133 135 $a->{'deft'} = eval "$eval_expr"; 134 135 # Now see if process_exp needs updating 136 my $process_exp = $self->{'process_exp'}; 137 if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) { 138 # process_exp is only defined if not 'info_only' 139 # 140 # if it does exist and it equals the unevaluated $eval_expr 141 # then it was set to the default (rather than overriden by 142 # the collect.cfg file) 143 144 $self->{'process_exp'} = $a->{'deft'}; 145 } 146 } 147 } 148 136 last; 137 } 138 } 139 140 # have finished modifying our arguments, add them to ArgList 141 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 142 143 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 144 my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self); 149 145 150 146 if ($self->{'info_only'}) { … … 153 149 } 154 150 151 $self = bless $self, $class; 155 152 $self->{'filename_extension'} = "doc"; 156 153 $self->{'file_type'} = "Word"; … … 162 159 $self->{'office_scripting'} = 1; 163 160 } 164 if ($self->{'openoffice_ scripting'}) {161 if ($self->{'openoffice_conversion'}) { 165 162 if ($self->{'windows_scripting'}) { 166 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_ scripting\n";163 print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n"; 167 164 print $outhandle " on at the same time. Defaulting to -windows_scripting\n"; 168 $self->{'openoffice_ scripting'} = 0;165 $self->{'openoffice_conversion'} = 0; 169 166 } 170 167 else { … … 234 231 $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists); 235 232 236 return bless $self, $class;233 return $self; 237 234 } 238 235 239 236 sub get_default_process_exp { 240 237 my $self = shift (@_); 241 if ($self->{'scripting_ext_working'}) { 238 239 if ($openoffice_available) { 242 240 return q^(?i)\.(doc|dot|docx|odt)$^; 243 241 } 244 242 return q^(?i)\.(doc|dot)$^; 245 243 } 244 245 sub init { 246 my $self = shift (@_); 247 248 # ConvertBinaryFile init 249 $self->SUPER::init(@_); 250 $self->AutoLoadConverters::init(); 251 252 } 253 254 sub begin { 255 my $self = shift (@_); 256 257 $self->AutoLoadConverters::begin(); 258 $self->SUPER::begin(@_); 259 260 } 261 262 sub deinit { 263 my $self = shift (@_); 264 265 $self->AutoLoadConverters::deinit(); 266 $self->SUPER::deinit(@_); 267 268 } 269 270 sub tmp_area_convert_file { 271 272 my $self = shift (@_); 273 return $self->AutoLoadConverters::tmp_area_convert_file(@_); 274 275 } 276 246 277 247 278 sub convert_post_process_old
Note:
See TracChangeset
for help on using the changeset viewer.