Changeset 22880


Ignore:
Timestamp:
2010-09-08T12:58:08+12:00 (14 years ago)
Author:
kjdon
Message:

implemented the read method for when using open office to convert to html multi - the powerpoint gets converted to individual html files, two per slide. one for the image, one for the text. each one gets passed to HTMLPlugin for processing, so all the slides end up as individual documents, but the first page, back, continue etc links work to link them all together

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22874 r22880  
    6262    [ { 'name' => "auto",
    6363    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    64       { 'name' => "html",
    65     'desc' => "{PowerPointPlugin.convert_to.oo_html}" },
     64      { 'name' => "html_multi",
     65    'desc' => "{PowerPointPlugin.convert_to.html_multi}" },
    6666      { 'name' => "text",
    6767    'desc' => "{ConvertBinaryFile.convert_to.text}" },
     
    274274
    275275# override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed.
    276 sub read_XX {
     276sub read {
    277277    my $self = shift (@_); 
    278278    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     
    283283    return undef unless $self->can_process_this_file($filename_full_path);
    284284   
    285     my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
    286    
    287     if ((defined $process_status) && ($process_status == 1)) {
     285    # we are only doing something special for html_multi
     286    if (!($self->{'openoffice_conversion'} && $self->{'convert_to'} eq "html_multi")) {
     287    return $self->BasePlugin::read(@_);
     288    }
     289    my $outhandle = $self->{'outhandle'};
     290    print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli);
     291    print $outhandle "$self->{'plugin_type'} processing $file\n"
     292        if $self->{'verbosity'} > 1;
     293
     294    my $conv_filename = $self->tmp_area_convert_file("html", $filename_full_path);
     295    if ("$conv_filename" eq "") {return -1;} # had an error, will be passed down pipeline
     296    if (! -e "$conv_filename") {return -1;}
     297
     298    my ($tailname, $html_dirname, $suffix)
     299    = &File::Basename::fileparse($conv_filename, "\\.[^\\.]+\$");
     300
     301    my $collect_file = &util::filename_within_collection($filename_full_path);
     302    my $dirname_within_collection = &util::filename_within_collection($html_dirname);
     303    my $secondary_plugin = $self->{'secondary_plugins'}->{"HTMLPlugin"};
     304
     305    my @dir;
     306    if (!opendir (DIR, $html_dirname)) {
     307    print $outhandle "PowerPointPlugin: Couldn't read directory $html_dirname\n";
     308    # just process the original file
     309    @dir = ("$tailname.$suffix");
     310   
     311    } else {
     312    @dir = readdir (DIR);
     313    closedir (DIR);
     314    }
     315
     316    foreach my $file (@dir) {
     317    next unless $file =~ /\.html$/;
     318   
     319    my ($rv, $doc_obj) =
     320        $secondary_plugin->read_into_doc_obj ($pluginfo,"", &util::filename_cat($html_dirname,$file), $block_hash, {}, $processor, $maxdocs, $total_count, $gli);
     321    if ((!defined $rv) || ($rv<1)) {
     322        # wasn't processed
     323        return $rv;
     324    }
     325
     326    # next block copied from ConvertBinaryFile
     327    # from here ...
     328    # Override previous gsdlsourcefilename set by secondary plugin
     329   
     330    $doc_obj->set_source_filename ($collect_file, $self->{'file_rename_method'});
     331    ## set_source_filename does not set the doc_obj source_path which is used in archives dbs for incremental
     332    # build. so set it manually.
     333    $doc_obj->{'source_path'} = $filename_full_path;
     334    $doc_obj->set_converted_filename(&util::filename_cat($dirname_within_collection, $file));
     335   
     336    $self->set_Source_metadata($doc_obj, $filename_no_path);
     337       
     338    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     339    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename_full_path));
     340
     341   
     342    my ($tailname, $dirname, $suffix)
     343        = &File::Basename::fileparse($filename_full_path, "\\.[^\\.]+\$");
     344    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FilenameRoot", $tailname);
     345   
     346
     347    my $topsection = $doc_obj->get_top_section();
     348    $self->add_associated_files($doc_obj, $filename_full_path);
     349   
     350    # extra_metadata is already called by sec plugin in process??
     351    $self->extra_metadata($doc_obj, $topsection, $metadata); # do we need this here??
     352    # do any automatic metadata extraction
     353    $self->auto_extract_metadata ($doc_obj);
     354   
     355    # have we found a Title??
     356    $self->title_fallback($doc_obj,$topsection,$filename_no_path);
     357   
     358    # use the one generated by HTMLPlugin, otherwise they all end up with same id.
     359    #$self->add_OID($doc_obj);
     360    # to here...
     361
     362    # process it
     363    $processor->process($doc_obj);
     364    undef $doc_obj;
     365    }
     366    $self->{'num_processed'} ++;
     367
     368#    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
     369   
     370#    if ((defined $process_status) && ($process_status == 1)) {
    288371   
    289372    # process the document
    290     $processor->process($doc_obj);
    291 
    292     $self->{'num_processed'} ++;
    293     undef $doc_obj;
    294     }
     373#   $processor->process($doc_obj);
     374
     375#   $self->{'num_processed'} ++;
     376#   undef $doc_obj;
     377#    }
    295378    # delete any temp files that we may have created
    296379    $self->clean_up_after_doc_obj_processing();
     
    298381
    299382    # if process_status == 1, then the file has been processed.
    300     return $process_status;
     383    return 1;
    301384
    302385}
Note: See TracChangeset for help on using the changeset viewer.