Changeset 22880

Show
Ignore:
Timestamp:
08.09.2010 12:58:08 (9 years ago)
Author:
kjdon
Message:

implemented the read method for when using open office to convert to html multi - the powerpoint gets converted to individual html files, two per slide. one for the image, one for the text. each one gets passed to HTMLPlugin for processing, so all the slides end up as individual documents, but the first page, back, continue etc links work to link them all together

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22874 r22880  
    6262    [ { 'name' => "auto", 
    6363    'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 
    64       { 'name' => "html", 
    65     'desc' => "{PowerPointPlugin.convert_to.oo_html}" }, 
     64      { 'name' => "html_multi", 
     65    'desc' => "{PowerPointPlugin.convert_to.html_multi}" }, 
    6666      { 'name' => "text", 
    6767    'desc' => "{ConvertBinaryFile.convert_to.text}" }, 
     
    274274 
    275275# override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed. 
    276 sub read_XX { 
     276sub read { 
    277277    my $self = shift (@_);   
    278278    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     
    283283    return undef unless $self->can_process_this_file($filename_full_path); 
    284284     
    285     my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 
    286      
    287     if ((defined $process_status) && ($process_status == 1)) { 
     285    # we are only doing something special for html_multi 
     286    if (!($self->{'openoffice_conversion'} && $self->{'convert_to'} eq "html_multi")) { 
     287    return $self->BasePlugin::read(@_); 
     288    } 
     289    my $outhandle = $self->{'outhandle'}; 
     290    print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli); 
     291    print $outhandle "$self->{'plugin_type'} processing $file\n" 
     292        if $self->{'verbosity'} > 1; 
     293 
     294    my $conv_filename = $self->tmp_area_convert_file("html", $filename_full_path); 
     295    if ("$conv_filename" eq "") {return -1;} # had an error, will be passed down pipeline  
     296    if (! -e "$conv_filename") {return -1;}  
     297 
     298    my ($tailname, $html_dirname, $suffix) 
     299    = &File::Basename::fileparse($conv_filename, "\\.[^\\.]+\$"); 
     300 
     301    my $collect_file = &util::filename_within_collection($filename_full_path); 
     302    my $dirname_within_collection = &util::filename_within_collection($html_dirname); 
     303    my $secondary_plugin = $self->{'secondary_plugins'}->{"HTMLPlugin"}; 
     304 
     305    my @dir; 
     306    if (!opendir (DIR, $html_dirname)) { 
     307    print $outhandle "PowerPointPlugin: Couldn't read directory $html_dirname\n"; 
     308    # just process the original file 
     309    @dir = ("$tailname.$suffix"); 
     310     
     311    } else { 
     312    @dir = readdir (DIR); 
     313    closedir (DIR); 
     314    } 
     315 
     316    foreach my $file (@dir) { 
     317    next unless $file =~ /\.html$/; 
     318     
     319    my ($rv, $doc_obj) =  
     320        $secondary_plugin->read_into_doc_obj ($pluginfo,"", &util::filename_cat($html_dirname,$file), $block_hash, {}, $processor, $maxdocs, $total_count, $gli); 
     321    if ((!defined $rv) || ($rv<1)) { 
     322        # wasn't processed 
     323        return $rv; 
     324    } 
     325 
     326    # next block copied from ConvertBinaryFile 
     327    # from here ... 
     328    # Override previous gsdlsourcefilename set by secondary plugin 
     329     
     330    $doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'});  
     331    ## set_source_filename does not set the doc_obj source_path which is used in archives dbs for incremental 
     332    # build. so set it manually. 
     333    $doc_obj->{'source_path'} = $filename_full_path; 
     334    $doc_obj->set_converted_filename(&util::filename_cat($dirname_within_collection, $file)); 
     335     
     336    $self->set_Source_metadata($doc_obj, $filename_no_path); 
     337         
     338    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
     339    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path)); 
     340 
     341     
     342    my ($tailname, $dirname, $suffix) 
     343        = &File::Basename::fileparse($filename_full_path, "\\.[^\\.]+\$"); 
     344    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FilenameRoot", $tailname); 
     345     
     346 
     347    my $topsection = $doc_obj->get_top_section(); 
     348    $self->add_associated_files($doc_obj, $filename_full_path); 
     349     
     350    # extra_metadata is already called by sec plugin in process?? 
     351    $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here?? 
     352    # do any automatic metadata extraction 
     353    $self->auto_extract_metadata ($doc_obj); 
     354     
     355    # have we found a Title?? 
     356    $self->title_fallback($doc_obj,$topsection,$filename_no_path); 
     357     
     358    # use the one generated by HTMLPlugin, otherwise they all end up with same id. 
     359    #$self->add_OID($doc_obj); 
     360    # to here... 
     361 
     362    # process it 
     363    $processor->process($doc_obj); 
     364    undef $doc_obj; 
     365    } 
     366    $self->{'num_processed'} ++; 
     367 
     368#    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 
     369     
     370#    if ((defined $process_status) && ($process_status == 1)) { 
    288371     
    289372    # process the document 
    290     $processor->process($doc_obj); 
    291  
    292     $self->{'num_processed'} ++; 
    293     undef $doc_obj;  
    294     } 
     373#   $processor->process($doc_obj); 
     374 
     375#   $self->{'num_processed'} ++; 
     376#   undef $doc_obj;  
     377#    } 
    295378    # delete any temp files that we may have created 
    296379    $self->clean_up_after_doc_obj_processing(); 
     
    298381 
    299382    # if process_status == 1, then the file has been processed. 
    300     return $process_status; 
     383    return 1; 
    301384 
    302385}