Changeset 22861


Ignore:
Timestamp:
2010-09-07T12:08:44+12:00 (14 years ago)
Author:
kjdon
Message:

now uses new AutoLoadConverters instead of AutoloadConverterScripting. This doesn't inherit from ConvertBinaryFile, so these plugins all inherit from that again. Now we can initialise the converters, fix up the modifications to the arguments, before parsing them when we do new ConvertBinaryFile. PowerPointPlugin incomplete and still needs lots of work done for processing the result on open office conversion

Location:
main/trunk/greenstone2/perllib/plugins
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/ExcelPlugin.pm

    r22709 r22861  
    3434use gsprintf 'gsprintf';
    3535
    36 use AutoloadConverterScripting;
     36use AutoLoadConverters;
     37use ConvertBinaryFile;
    3738
    38 @ExcelPlugin::ISA = ('AutoloadConverterScripting');
     39sub BEGIN {
     40    @ExcelPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
     41}
    3942
     43my $openoffice_available = 0;
    4044
    4145my $arguments =
     
    4448    'type' => "regexp",
    4549    'reqd' => "no",
    46     'deft' => "&get_default_process_exp(\$self)"  # delayed (see below)
     50    'deft' => "&get_default_process_exp()"  # delayed (see below)
    4751    }
    4852      ];
     
    6064    push(@$pluginlist, $class);
    6165   
    62     push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     66    # this bit needs to happen later after the arguments array has been
     67    # finished - used for parsing the input args.
     68    # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     69    # this one needs to go in first, to get the print info in the right order
    6370    push(@{$hashArgOptLists->{"OptList"}},$options);
    6471
    65     my $self
    66     = new AutoloadConverterScripting("OpenOfficeConverter",$pluginlist,
    67                      $inputargs, $hashArgOptLists);
     72    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
    6873
    69     # plugin's process_exp can only be correctly determined once autoloading
    70     # has taken place
    71     my $plug_options = $self->{'option_list'}->[0];
    72     my $plug_args = $plug_options->{'args'};
    73    
    74     foreach my $a (@$plug_args) {
    75     # consider changing this to search for all values that are
    76     # tagged as 'deft-delayed' = 1 ?!?
    77 
     74    # evaluate the default for process_exp  - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
     75    foreach my $a (@$arguments) {
    7876    if ($a->{'name'} eq "process_exp") {
    7977        my $eval_expr = $a->{'deft'};
    8078        $a->{'deft'} = eval "$eval_expr";
    81 
    82         # Now see if process_exp needs updating
    83         my $process_exp = $self->{'process_exp'};
    84         if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) {
    85         # process_exp is only defined if not 'info_only'
    86         #
    87         # if it does exist and it equals the unevaluated $eval_expr
    88         # then it was set to the default (rather than overriden by
    89         # the collect.cfg file)
    90 
    91         $self->{'process_exp'} = $a->{'deft'};
    92         }
     79        last;
    9380    }
    9481    }
     82
     83    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     84    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     85    my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
     86
    9587   
    9688    if ($self->{'info_only'}) {
     
    9991    }
    10092
     93    $self = bless $self, $class;
    10194    $self->{'filename_extension'} = "xls";
    10295    $self->{'file_type'} = "Excel";
     
    109102    }
    110103
    111     $self = bless $self, $class;
    112104    # set convert_to_plugin and convert_to_ext
    113105    $self->set_standard_convert_settings();
     
    133125
    134126
     127sub get_default_process_exp {
     128    my $self = shift (@_);
     129
     130    if ($openoffice_available) {
     131    return q^(?i)\.(xls|xlsx|ods)$^;
     132    }
     133
     134    return q^(?i)\.xls$^;
     135}
     136 
     137sub init {
     138    my $self = shift (@_);
     139
     140    # ConvertBinaryFile init
     141    $self->SUPER::init(@_);
     142    $self->AutoLoadConverters::init();
     143
     144}
     145
     146sub begin {
     147    my $self = shift (@_);
     148
     149    $self->AutoLoadConverters::begin();
     150    $self->SUPER::begin(@_);
     151
     152}
     153
     154sub deinit {
     155    my $self = shift (@_);
     156   
     157    $self->AutoLoadConverters::deinit();
     158    $self->SUPER::deinit(@_);
     159
     160}
     161
     162sub tmp_area_convert_file {
     163
     164    my $self = shift (@_);
     165    return $self->AutoLoadConverters::tmp_area_convert_file(@_);
     166
     167}
     168   
    135169sub convert_post_process_old
    136170{
     
    153187}
    154188
    155 sub get_default_process_exp {
    156     my $self = shift (@_);
    157 
    158     if ($self->{'scripting_ext_working'}) {
    159     return q^(?i)\.(xls|xlsx|ods)$^;
    160     }
    161 
    162     return q^(?i)\.xls$^;
    163 }
    164    
    165 
    1661891;
  • main/trunk/greenstone2/perllib/plugins/PDFPlugin.pm

    r22705 r22861  
    3131use unicode;
    3232
    33 use AutoloadConverterScripting;
    34 
    35 @PDFPlugin::ISA = ('AutoloadConverterScripting', 'ReadTextFile');
     33use AutoLoadConverters;
     34
     35@PDFPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters', 'ReadTextFile');
    3636
    3737
     
    120120    push(@{$hashArgOptLists->{"OptList"}},$options);
    121121
    122     my $self = new AutoloadConverterScripting("PDFBoxConverter",$pluginlist, $inputargs, $hashArgOptLists);
     122    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["PDFBoxConverter"],1);
     123    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     124    my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
    123125   
    124126    if ($self->{'info_only'}) {
     
    126128    return bless $self, $class;
    127129    }
    128 
     130   
     131    $self = bless $self, $class;
    129132    $self->{'filename_extension'} = "pdf";
    130133    $self->{'file_type'} = "PDF";
     
    169172   
    170173    if ($secondary_plugin_name eq "HTMLPlugin") {
    171     # pdftohtml always produces utf8
     174    # pdftohtml always produces utf8 - What about pdfbox???
    172175    push(@$specific_options, "-input_encoding", "utf8");
    173176    push(@$specific_options, "-extract_language") if $self->{'extract_language'};
     
    209212    return "";
    210213}
    211    
     214
     215sub init {
     216    my $self = shift (@_);
     217
     218    # ConvertBinaryFile init
     219    $self->SUPER::init(@_);
     220    $self->AutoLoadConverters::init();
     221
     222}
     223
     224sub begin {
     225    my $self = shift (@_);
     226
     227    $self->AutoLoadConverters::begin();
     228    $self->SUPER::begin(@_);
     229
     230}
     231
     232sub deinit {
     233    my $self = shift (@_);
     234   
     235    $self->AutoLoadConverters::deinit();
     236    $self->SUPER::deinit(@_);
     237
     238}
     239 
     240
     241sub tmp_area_convert_file {
     242
     243    my $self = shift (@_);
     244    return $self->AutoLoadConverters::tmp_area_convert_file(@_);
     245
     246}
     247
    212248sub convert_post_process
    213249{
  • main/trunk/greenstone2/perllib/plugins/PowerPointPlugin.pm

    r22709 r22861  
    33# PowerPointPlugin.pm -- plugin for importing Microsoft PowerPoint files.
    44#  (basic version supports versions 95 and 97)
    5 #  (through OpenOffice extension, supports all contempoary formats)
     5#  (through OpenOffice extension, supports all contemporary formats)
    66#
    77# A component of the Greenstone digital library software
     
    3535use gsprintf 'gsprintf';
    3636
    37 use AutoloadConverterScripting;
    38 
    39 @PowerPointPlugin::ISA = ('AutoloadConverterScripting');
    40 
     37use AutoLoadConverters;
     38use ConvertBinaryFile;
     39
     40sub BEGIN {
     41    @PowerPointPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
     42}
     43
     44my $openoffice_available = 0;
    4145
    4246my $windows_convert_to_list =
     
    5559      ];
    5660
     61my $openoffice_convert_to_list =
     62    [ { 'name' => "auto",
     63    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
     64      { 'name' => "html",
     65    'desc' => "{PowerPointPlugin.convert_to.oo_html}" },
     66      { 'name' => "text",
     67    'desc' => "{ConvertBinaryFile.convert_to.text}" },
     68      { 'name' => "pagedimg",
     69    'desc' => "{PowerPointPlugin.convert_to.pagedimg}" }
     70      ];
     71
    5772my $arguments =
    5873    [ { 'name' => "process_exp",
     
    6075    'type' => "regexp",
    6176    'reqd' => "no",
    62     'deft' => "&get_default_process_exp(\$self)",  # delayed (see below)
     77    'deft' => "&get_default_process_exp()",  # delayed (see below)
    6378    }
    6479      ];
     
    7792      ];
    7893
     94my $opt_office_args =
     95    [ { 'name' => "convert_to",
     96    'desc' => "{ConvertBinaryFile.convert_to}",
     97    'type' => "enum",
     98    'reqd' => "yes",
     99    'list' => $openoffice_convert_to_list,
     100    'deft' => "html" }
     101      ];
     102
    79103my $options = { 'name'     => "PowerPointPlugin",
    80104        'desc'     => "{PowerPointPlugin.desc}",
     
    89113    push(@$pluginlist, $class);
    90114
     115    # this bit needs to happen later after the arguments array has been
     116    # finished - used for parsing the input args.
     117    # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     118    # this one needs to go in first, to get the print info in the right order
     119    push(@{$hashArgOptLists->{"OptList"}},$options);
     120
    91121    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    92122    push(@$arguments,@$opt_windows_args);
    93123    }
    94124
    95     push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    96     push(@{$hashArgOptLists->{"OptList"}},$options);
    97 
    98 
    99     my $self
    100     = new AutoloadConverterScripting("OpenOfficeConverter",
    101                      $pluginlist, $inputargs,
    102                      $hashArgOptLists);
    103 
    104 
    105     # plugin's process_exp can only be correctly determined once autoloading
    106     # has taken place
    107     my $plug_options = $self->{'option_list'}->[0];
    108     my $plug_args = $plug_options->{'args'};
    109    
    110     foreach my $a (@$plug_args) {
    111     # consider changing this to search for all values that are
    112     # tagged as 'deft-delayed' = 1 ?!?
    113 
     125    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
     126
     127    if ($auto_converter_self->{'openoffice_available'}) {
     128    push (@$arguments,@$opt_office_args);
     129    $openoffice_available = 1;
     130    }
     131    # TODO need to do the case where they are both enabled!!! what will the convert to list be???
     132
     133    # evaluate the default for process_exp  - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
     134    foreach my $a (@$arguments) {
    114135    if ($a->{'name'} eq "process_exp") {
    115136        my $eval_expr = $a->{'deft'};
    116137        $a->{'deft'} = eval "$eval_expr";
    117 
    118         # Now see if process_exp needs updating
    119         my $process_exp = $self->{'process_exp'};
    120         if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) {
    121         # process_exp is only defined if not 'info_only'
    122         #
    123         # if it does exist and it equals the unevaluated $eval_expr
    124         # then it was set to the default (rather than overriden by
    125         # the collect.cfg file)
    126 
    127         $self->{'process_exp'} = $a->{'deft'};
    128         }
    129     }
    130     }
    131 
    132  
     138        last;
     139    }
     140    }
     141
     142    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     143
     144    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     145    my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
     146
    133147    if ($self->{'info_only'}) {
    134148    # don't worry about any options etc
     
    136150    }
    137151
     152    $self = bless $self, $class;
    138153    $self->{'filename_extension'} = "ppt";
    139154    $self->{'file_type'} = "PPT";
     
    150165   my $outhandle = $self->{'outhandle'};
    151166
    152     # can't have windows_scripting and openoffice_scripting at the same time
    153     if ($self->{'windows_scripting'} && $self->{'openoffice_scripting'}) {
    154     print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_scripting\n";
     167    # can't have windows_scripting and openoffice_conversion at the same time
     168    if ($self->{'windows_scripting'} && $self->{'openoffice_conversion'}) {
     169    print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n";
    155170    print $outhandle "         on at the same time.  Defaulting to -windows_scripting\n";
    156     $self->{'openoffice_scripting'} = 0;
     171    $self->{'openoffice_conversion'} = 0;
    157172    }
    158173   
     
    193208    my $self = shift (@_);
    194209
    195     if ($self->{'scripting_ext_working'}) {
     210    if ($openoffice_available) {
    196211    return q^(?i)\.(ppt|pptx|odp)$^;
    197212    }
     
    200215}
    201216
     217sub init {
     218    my $self = shift (@_);
     219
     220    # ConvertBinaryFile init
     221    $self->SUPER::init(@_);
     222    $self->AutoLoadConverters::init();
     223
     224}
     225
     226sub begin {
     227    my $self = shift (@_);
     228
     229    $self->AutoLoadConverters::begin();
     230    $self->SUPER::begin(@_);
     231
     232}
     233
     234sub deinit {
     235    my $self = shift (@_);
     236   
     237    $self->AutoLoadConverters::deinit();
     238    $self->SUPER::deinit(@_);
     239
     240}
     241
     242# override AutoLoadConverters version, as we need to do more stuff once its converted if we are converting to item file
     243sub tmp_area_convert_file {
     244    my $self = shift (@_);
     245    my ($output_ext, $input_filename, $textref) = @_;
     246
     247    if ($self->{'openoffice_conversion'}) {
     248    if ($self->{'convert_to'} eq "pagedimg") {
     249        $output_ext = "html"; # first convert to html
     250    }
     251    my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
     252    if ($result == 0) {
     253        my $outhandle=$self->{'outhandle'};
     254        print $outhandle "OpenOfficeConverter Conversion error\n";
     255        print $outhandle $result_str;
     256        return "";
     257
     258    }
     259    #print STDERR "result = $result\n";
     260    if ($self->{'convert_to'} eq "pagedimg") {
     261        #my $item_filename = $self->generate_item_file($new_filename);
     262        #return $item_filename;
     263        return "/research/kjdon/home/gsdl/collect/openoffice/test.item";
     264    }
     265    return $new_filename;
     266
     267    }
     268    else {
     269    return $self->ConvertBinaryFile::tmp_area_convert_file(@_);
     270    }
     271    # get tmp filename
     272}
     273
     274# override default read in some situations, as the conversion of ppt to html results in many files, and we want them all to be processed.
     275sub read_XX {
     276    my $self = shift (@_); 
     277    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     278
     279    # can we process this file??
     280    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     281
     282    return undef unless $self->can_process_this_file($filename_full_path);
     283   
     284    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
     285   
     286    if ((defined $process_status) && ($process_status == 1)) {
     287   
     288    # process the document
     289    $processor->process($doc_obj);
     290
     291    $self->{'num_processed'} ++;
     292    undef $doc_obj;
     293    }
     294    # delete any temp files that we may have created
     295    $self->clean_up_after_doc_obj_processing();
     296
     297
     298    # if process_status == 1, then the file has been processed.
     299    return $process_status;
     300
     301}
     302
    2023031;
    203304
  • main/trunk/greenstone2/perllib/plugins/WordPlugin.pm

    r22709 r22861  
    3131use gsprintf 'gsprintf';
    3232
    33 use AutoloadConverterScripting;
    34 
    35 @WordPlugin::ISA = ('AutoloadConverterScripting');
    36 
     33use AutoLoadConverters;
     34use ConvertBinaryFile;
     35
     36sub BEGIN {
     37    @WordPlugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters');
     38}
     39
     40my $openoffice_available = 0;
    3741
    3842my $arguments =
     
    4044    'desc' => "{BasePlugin.process_exp}",
    4145    'type' => "regexp",
    42     'deft' => "&get_default_process_exp(\$self)", # delayed (see below)
     46    'deft' => "&get_default_process_exp()", # delayed (see below)
    4347    'reqd' => "no" },
    4448      { 'name' => "description_tags",
     
    5155               'desc' => "{WordPlugin.windows_scripting}",
    5256               'type' => "flag",
     57
    5358               'reqd' => "no" } ];
    5459
     
    100105    push(@$pluginlist, $class);
    101106
     107    # this bit needs to happen later after the arguments array has been
     108    # finished - used for parsing the input args.
     109    # push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     110    # this one needs to go in first, to get the print info in the right order
     111    push(@{$hashArgOptLists->{"OptList"}},$options);
     112
    102113    my $office_capable = 0;
    103114    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    104115    push(@$arguments,@$opt_windows_args);
    105116    $office_capable = 1;
    106     }
    107     if ($AutoloadConverterScripting::openoffice_ext_working) {
     117    }
     118
     119    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["OpenOfficeConverter"],1);
     120
     121   if ($auto_converter_self->{'openoffice_available'}) {
    108122    $office_capable = 1;
    109     }
    110     # these office args apply to windows scripting or to openoffice scripting
     123    $openoffice_available = 1;
     124    }
     125
     126    # these office args apply to windows scripting or to openoffice conversion
    111127    if ($office_capable) {
    112128    push(@$arguments,@$opt_office_args);
    113129    }
    114130   
    115     push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    116     push(@{$hashArgOptLists->{"OptList"}},$options);
    117 
    118     my $self
    119     = new AutoloadConverterScripting("OpenOfficeConverter",$pluginlist,
    120                      $inputargs, $hashArgOptLists);
    121 
    122     # plugin's process_exp can only be correctly determined once autoloading
    123     # has taken place
    124     my $plug_options = $self->{'option_list'}->[0];
    125     my $plug_args = $plug_options->{'args'};
    126    
    127     foreach my $a (@$plug_args) {
    128     # consider changing this to search for all values that are
    129     # tagged as 'deft-delayed' = 1 ?!?
    130 
     131    # evaluate the default for process_exp  - it needs to be delayed till here so we know if openoffice is available or not. But needs to be done before parsing the args.
     132    foreach my $a (@$arguments) {
    131133    if ($a->{'name'} eq "process_exp") {
    132134        my $eval_expr = $a->{'deft'};
    133135        $a->{'deft'} = eval "$eval_expr";
    134 
    135         # Now see if process_exp needs updating
    136         my $process_exp = $self->{'process_exp'};
    137         if (!$self->{'info_only'} && ($process_exp eq $eval_expr)) {
    138         # process_exp is only defined if not 'info_only'
    139         #
    140         # if it does exist and it equals the unevaluated $eval_expr
    141         # then it was set to the default (rather than overriden by
    142         # the collect.cfg file)
    143 
    144         $self->{'process_exp'} = $a->{'deft'};
    145         }
    146     }
    147     }
    148 
     136        last;
     137    }
     138    }
     139   
     140    # have finished modifying our arguments, add them to ArgList
     141    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     142
     143    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     144    my $self = BasePlugin::merge_inheritance($auto_converter_self, $cbf_self);
    149145
    150146    if ($self->{'info_only'}) {
     
    153149    }
    154150
     151    $self = bless $self, $class;
    155152    $self->{'filename_extension'} = "doc";
    156153    $self->{'file_type'} = "Word";
     
    162159    $self->{'office_scripting'} = 1;
    163160    }   
    164     if ($self->{'openoffice_scripting'}) {
     161    if ($self->{'openoffice_conversion'}) {
    165162    if ($self->{'windows_scripting'}) {
    166         print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_scripting\n";
     163        print $outhandle "Warning: Cannot have -windows_scripting and -openoffice_conversion\n";
    167164        print $outhandle "         on at the same time.  Defaulting to -windows_scripting\n";
    168         $self->{'openoffice_scripting'} = 0;
     165        $self->{'openoffice_conversion'} = 0;
    169166    }
    170167    else {
     
    234231    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
    235232
    236     return bless $self, $class;
     233    return $self;
    237234}
    238235
    239236sub get_default_process_exp {
    240237    my $self = shift (@_);
    241     if ($self->{'scripting_ext_working'}) {
     238
     239    if ($openoffice_available) {
    242240    return q^(?i)\.(doc|dot|docx|odt)$^;
    243241    }
    244242    return q^(?i)\.(doc|dot)$^;
    245243}
     244
     245sub init {
     246    my $self = shift (@_);
     247
     248    # ConvertBinaryFile init
     249    $self->SUPER::init(@_);
     250    $self->AutoLoadConverters::init();
     251
     252}
     253
     254sub begin {
     255    my $self = shift (@_);
     256
     257    $self->AutoLoadConverters::begin();
     258    $self->SUPER::begin(@_);
     259
     260}
     261
     262sub deinit {
     263    my $self = shift (@_);
     264   
     265    $self->AutoLoadConverters::deinit();
     266    $self->SUPER::deinit(@_);
     267
     268}
     269
     270sub tmp_area_convert_file {
     271
     272    my $self = shift (@_);
     273    return $self->AutoLoadConverters::tmp_area_convert_file(@_);
     274
     275}
     276
    246277
    247278sub convert_post_process_old
Note: See TracChangeset for help on using the changeset viewer.