Changeset 22880
- Timestamp:
- 2010-09-08T12:58:08+12:00 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm
r22874 r22880 62 62 [ { 'name' => "auto", 63 63 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 64 { 'name' => "html ",65 'desc' => "{PowerPointPlugin.convert_to. oo_html}" },64 { 'name' => "html_multi", 65 'desc' => "{PowerPointPlugin.convert_to.html_multi}" }, 66 66 { 'name' => "text", 67 67 'desc' => "{ConvertBinaryFile.convert_to.text}" }, … … 274 274 275 275 # override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed. 276 sub read _XX{276 sub read { 277 277 my $self = shift (@_); 278 278 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; … … 283 283 return undef unless $self->can_process_this_file($filename_full_path); 284 284 285 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 286 287 if ((defined $process_status) && ($process_status == 1)) { 285 # we are only doing something special for html_multi 286 if (!($self->{'openoffice_conversion'} && $self->{'convert_to'} eq "html_multi")) { 287 return $self->BasePlugin::read(@_); 288 } 289 my $outhandle = $self->{'outhandle'}; 290 print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli); 291 print $outhandle "$self->{'plugin_type'} processing $file\n" 292 if $self->{'verbosity'} > 1; 293 294 my $conv_filename = $self->tmp_area_convert_file("html", $filename_full_path); 295 if ("$conv_filename" eq "") {return -1;} # had an error, will be passed down pipeline 296 if (! -e "$conv_filename") {return -1;} 297 298 my ($tailname, $html_dirname, $suffix) 299 = &File::Basename::fileparse($conv_filename, "\\.[^\\.]+\$"); 300 301 my $collect_file = &util::filename_within_collection($filename_full_path); 302 my $dirname_within_collection = &util::filename_within_collection($html_dirname); 303 my $secondary_plugin = $self->{'secondary_plugins'}->{"HTMLPlugin"}; 304 305 my @dir; 306 if (!opendir (DIR, $html_dirname)) { 307 print $outhandle "PowerPointPlugin: Couldn't read directory $html_dirname\n"; 308 # just process the original file 309 @dir = ("$tailname.$suffix"); 310 311 } else { 312 @dir = readdir (DIR); 313 closedir (DIR); 314 } 315 316 foreach my $file (@dir) { 317 next unless $file =~ /\.html$/; 318 319 my ($rv, $doc_obj) = 320 $secondary_plugin->read_into_doc_obj ($pluginfo,"", &util::filename_cat($html_dirname,$file), $block_hash, {}, $processor, $maxdocs, $total_count, $gli); 321 if ((!defined $rv) || ($rv<1)) { 322 # wasn't processed 323 return $rv; 324 } 325 326 # next block copied from ConvertBinaryFile 327 # from here ... 328 # Override previous gsdlsourcefilename set by secondary plugin 329 330 $doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'}); 331 ## set_source_filename does not set the doc_obj source_path which is used in archives dbs for incremental 332 # build. so set it manually. 333 $doc_obj->{'source_path'} = $filename_full_path; 334 $doc_obj->set_converted_filename(&util::filename_cat($dirname_within_collection, $file)); 335 336 $self->set_Source_metadata($doc_obj, $filename_no_path); 337 338 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 339 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 340 341 342 my ($tailname, $dirname, $suffix) 343 = &File::Basename::fileparse($filename_full_path, "\\.[^\\.]+\$"); 344 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FilenameRoot", $tailname); 345 346 347 my $topsection = $doc_obj->get_top_section(); 348 $self->add_associated_files($doc_obj, $filename_full_path); 349 350 # extra_metadata is already called by sec plugin in process?? 351 $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here?? 352 # do any automatic metadata extraction 353 $self->auto_extract_metadata ($doc_obj); 354 355 # have we found a Title?? 356 $self->title_fallback($doc_obj,$topsection,$filename_no_path); 357 358 # use the one generated by HTMLPlugin, otherwise they all end up with same id. 359 #$self->add_OID($doc_obj); 360 # to here... 361 362 # process it 363 $processor->process($doc_obj); 364 undef $doc_obj; 365 } 366 $self->{'num_processed'} ++; 367 368 # my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 369 370 # if ((defined $process_status) && ($process_status == 1)) { 288 371 289 372 # process the document 290 $processor->process($doc_obj);291 292 $self->{'num_processed'} ++;293 undef $doc_obj;294 }373 # $processor->process($doc_obj); 374 375 # $self->{'num_processed'} ++; 376 # undef $doc_obj; 377 # } 295 378 # delete any temp files that we may have created 296 379 $self->clean_up_after_doc_obj_processing(); … … 298 381 299 382 # if process_status == 1, then the file has been processed. 300 return $process_status;383 return 1; 301 384 302 385 }
Note:
See TracChangeset
for help on using the changeset viewer.