Ignore:
Timestamp:
2010-09-07T12:08:44+12:00 (14 years ago)
Author:
kjdon
Message:

now uses new AutoLoadConverters instead of AutoloadConverterScripting. This doesn't inherit from ConvertBinaryFile, so these plugins all inherit from that again. Now we can initialise the converters, fix up the modifications to the arguments, before parsing them when we do new ConvertBinaryFile. PowerPointPlugin incomplete and still needs lots of work done for processing the result on open office conversion

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22709 r22861  
    33# PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files.
    44#  (basic version supports versions 95 and 97)
    5 #  (through OpenOffice extension, supports all contempoary formats)
     5#  (through OpenOffice extension, supports all contemporary formats)
    66#
    77# A component of the Greenstone digital library software
     
    3535use gsprintf 'gsprintf';
    3636
    37 use AutoloadConverterScripting;
    38 
    39 @PowerPointPlugin::ISA = ('AutoloadConverterScripting');
    40 
     37use AutoLoadConverters;
     38use ConvertBinaryFile;
     39
     40sub BEGIN {
     41    @PowerPointPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
     42}
     43
     44my $openoffice_available = 0;
    4145
    4246my $windows_convert_to_list =
     
    5559      ];
    5660
     61my $openoffice_convert_to_list =
     62    [ { 'name' => "auto",
     63    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
     64      { 'name' => "html",
     65    'desc' => "{PowerPointPlugin.convert_to.oo_html}" },
     66      { 'name' => "text",
     67    'desc' => "{ConvertBinaryFile.convert_to.text}" },
     68      { 'name' => "pagedimg",
     69    'desc' => "{PowerPointPlugin.convert_to.pagedimg}" }
     70      ];
     71
    5772my $arguments =
    5873    [ { 'name' => "process_exp",
     
    6075    'type' => "regexp",
    6176    'reqd' => "no",
    62     'deft' => "&get_default_process_exp(\$self)",  # delayed (see below)
     77    'deft' => "&get_default_process_exp()",  # delayed (see below)
    6378    }
    6479      ];
     
    7792      ];
    7893
     94my $opt_office_args =
     95    [ { 'name' => "convert_to",
     96    'desc' => "{ConvertBinaryFile.convert_to}",
     97    'type' => "enum",
     98    'reqd' => "yes",
     99    'list' => $openoffice_convert_to_list,
     100    'deft' => "html" }
     101      ];
     102
    79103my $options = { 'name'     => "PowerPointPlugin",
    80104        'desc'     => "{PowerPointPlugin.desc}",
     
    89113    push(@$pluginlist, $class);
    90114
     115    # this bit needs to happen later after the arguments array has been
     116    # finished - used for parsing the input args.
     117    # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     118    # this one needs to go in first, to get the print info in the right order
     119    push(@{$hashArgOptLists->{"OptList"}},$options);
     120
    91121    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    92122    push(@$arguments,@$opt_windows_args);
    93123    }
    94124
    95     push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    96     push(@{$hashArgOptLists->{"OptList"}},$options);
    97 
    98 
    99     my $self
    100     = new AutoloadConverterScripting("OpenOfficeConverter",
    101                      $pluginlist, $inputargs,
    102                      $hashArgOptLists);
    103 
    104 
    105     # plugin's process_exp can only be correctly determined once autoloading
    106     # has taken place
    107     my $plug_options = $self->{'option_list'}->[0];
    108     my $plug_args = $plug_options->{'args'};
    109    
    110     foreach my $a (@$plug_args) {
    111     # consider changing this to search for all values that are
    112     # tagged as 'deft-delayed' = 1 ?!?
    113 
     125    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
     126
     127    if ($auto_converter_self->{'openoffice_available'}) {
     128    push (@$arguments,@$opt_office_args);
     129    $openoffice_available = 1;
     130    }
     131    # TODO need to do the case where they are both enabled!!! what will the convert to list be???
     132
     133    # evaluate the default for process_exp  - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
     134    foreach my $a (@$arguments) {
    114135    if ($a->{'name'} eq "process_exp") {
    115136        my $eval_expr = $a->{'deft'};
    116137        $a->{'deft'} = eval "$eval_expr";
    117 
    118         # Now see if process_exp needs updating
    119         my $process_exp = $self->{'process_exp'};
    120         if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) {
    121         # process_exp is only defined if not 'info_only'
    122         #
    123         # if it does exist and it equals the unevaluated $eval_expr
    124         # then it was set to the default (rather than overriden by
    125         # the collect.cfg file)
    126 
    127         $self->{'process_exp'} = $a->{'deft'};
    128         }
    129     }
    130     }
    131 
    132  
     138        last;
     139    }
     140    }
     141
     142    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     143
     144    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     145    my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
     146
    133147    if ($self->{'info_only'}) {
    134148    # don't worry about any options etc
     
    136150    }
    137151
     152    $self = bless $self, $class;
    138153    $self->{'filename_extension'} = "ppt";
    139154    $self->{'file_type'} = "PPT";
     
    150165   my $outhandle = $self->{'outhandle'};
    151166
    152     # can't have windows_scripting and openoffice_scripting at the same time
    153     if ($self->{'windows_scripting'} && $self->{'openoffice_scripting'}) {
    154     print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_scripting\n";
     167    # can't have windows_scripting and openoffice_conversion at the same time
     168    if ($self->{'windows_scripting'} && $self->{'openoffice_conversion'}) {
     169    print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n";
    155170    print $outhandle "         on at the same time.  Defaulting to -windows_scripting\n";
    156     $self->{'openoffice_scripting'} = 0;
     171    $self->{'openoffice_conversion'} = 0;
    157172    }
    158173   
     
    193208    my $self = shift (@_);
    194209
    195     if ($self->{'scripting_ext_working'}) {
     210    if ($openoffice_available) {
    196211    return q^(?i)\.(ppt|pptx|odp)$^;
    197212    }
     
    200215}
    201216
     217sub init {
     218    my $self = shift (@_);
     219
     220    # ConvertBinaryFile init
     221    $self->SUPER::init(@_);
     222    $self->AutoLoadConverters::init();
     223
     224}
     225
     226sub begin {
     227    my $self = shift (@_);
     228
     229    $self->AutoLoadConverters::begin();
     230    $self->SUPER::begin(@_);
     231
     232}
     233
     234sub deinit {
     235    my $self = shift (@_);
     236   
     237    $self->AutoLoadConverters::deinit();
     238    $self->SUPER::deinit(@_);
     239
     240}
     241
     242# override AutoLoadConverters version, as we need to do more stuff once its converted if we are converting to item file
     243sub tmp_area_convert_file {
     244    my $self = shift (@_);
     245    my ($output_ext, $input_filename, $textref) = @_;
     246
     247    if ($self->{'openoffice_conversion'}) {
     248    if ($self->{'convert_to'} eq "pagedimg") {
     249        $output_ext = "html"; # first convert to html
     250    }
     251    my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
     252    if ($result == 0) {
     253        my $outhandle=$self->{'outhandle'};
     254        print $outhandle "OpenOfficeConverter Conversion error\n";
     255        print $outhandle $result_str;
     256        return "";
     257
     258    }
     259    #print STDERR "result = $result\n";
     260    if ($self->{'convert_to'} eq "pagedimg") {
     261        #my $item_filename = $self->generate_item_file($new_filename);
     262        #return $item_filename;
     263        return "/research/kjdon/home/gsdl/collect/openoffice/test.item";
     264    }
     265    return $new_filename;
     266
     267    }
     268    else {
     269    return $self->ConvertBinaryFile::tmp_area_convert_file(@_);
     270    }
     271    # get tmp filename
     272}
     273
     274# override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed.
     275sub read_XX {
     276    my $self = shift (@_); 
     277    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     278
     279    # can we process this file??
     280    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     281
     282    return undef unless $self->can_process_this_file($filename_full_path);
     283   
     284    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
     285   
     286    if ((defined $process_status) && ($process_status == 1)) {
     287   
     288    # process the document
     289    $processor->process($doc_obj);
     290
     291    $self->{'num_processed'} ++;
     292    undef $doc_obj;
     293    }
     294    # delete any temp files that we may have created
     295    $self->clean_up_after_doc_obj_processing();
     296
     297
     298    # if process_status == 1, then the file has been processed.
     299    return $process_status;
     300
     301}
     302
    2023031;
    203304
Note: See TracChangeset for help on using the changeset viewer.