Ignore:
Timestamp:
2010-08-10T14:31:53+12:00 (14 years ago)
Author:
kjdon
Message:

code tidy up. rearranged how convertbinaryfile plugins set up their secondary plugins - now only set up the options for the one they are using. all subclass specific code moved out of convertbinaryfile.new into the appropriate plugin file.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/WordPlugin.pm

    r22514 r22597  
    4141    eval("require OpenOfficeConverter");
    4242    if ($@) {
    43     # Useful debugging statement if there is a syntax error in OpenOfficeConverter 
     43    # Useful debugging statement if there is a syntax error in OpenOfficeConverter:
    4444    #print STDERR "$@\n";
    4545    @WordPlugin::ISA = ('ConvertBinaryFile');
     
    174174    }
    175175
    176     my $outhandle = $self->{'outhandle'};
    177176    $self->{'filename_extension'} = "doc";
    178177    $self->{'file_type'} = "Word";
     178
     179    my $outhandle = $self->{'outhandle'};
    179180
    180181    if ($self->{'windows_scripting'}) {
     
    193194    }
    194195
    195     # we always save as utf-8
    196 #    if ($self->{'input_encoding'} eq "auto") {
    197 #   $self->{'input_encoding'} = "utf8";
    198 #    }
    199 
     196    # check convert_to
     197    if ($self->{'convert_to'} eq "auto") {
     198    $self->{'convert_to'} = "html";
     199    }
     200    # windows or open office scripting, outputs structuredHTML
     201    if (defined $self->{'office_scripting'}) {
     202    $self->{'convert_to'} = "structuredhtml";
     203    }
     204
     205    # set convert_to_plugin and convert_to_ext
     206    $self->ConvertBinaryFile::set_standard_convert_settings();
     207 
     208    my $secondary_plugin_name = $self->{'convert_to_plugin'};
    200209    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    201     if (defined $self->{'office_scripting'}) {
    202     if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){
    203         $secondary_plugin_options->{'StructuredHTMLPlugin'} = [];
    204         my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};
    205        
    206         # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    207         # to extract these metadata fields from the HEAD META fields
    208         push (@$structhtml_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>");
    209         push (@$structhtml_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    210         push (@$structhtml_options, "-description_tags") if $self->{'office_scripting'};
    211         push (@$structhtml_options, "-extract_language") if $self->{'extract_language'};
    212         push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'};
    213         push (@$structhtml_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'};
    214         push (@$structhtml_options, "-title_header", $self->{'title_header'}) if $self->{'title_header'};
    215         push (@$structhtml_options, "-level1_header", $self->{'level1_header'}) if $self->{'level1_header'};
    216         push (@$structhtml_options, "-level2_header", $self->{'level2_header'})if $self->{'level2_header'};
    217         push (@$structhtml_options, "-level3_header", $self->{'level3_header'}) if $self->{'level3_header'};
    218         push (@$structhtml_options, "-metadata_fields", $self->{'metadata_fields'}) if $self->{'metadata_fields'};
    219         push (@$structhtml_options, "-metadata_field_separator", $self->{'metadata_field_separator'}) if $self->{'metadata_field_separator'};
    220     }
    221     }
    222     if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
    223     $secondary_plugin_options->{'HTMLPlugin'} = [];
    224     }
    225     if (!defined $secondary_plugin_options->{'TextPlugin'}) {
    226     $secondary_plugin_options->{'TextPlugin'} = [];
    227     }
    228 
    229     my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
    230     my $text_options = $secondary_plugin_options->{'TextPlugin'};
    231     my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};   
    232     # tell the secondary plugins that they are processing tmp files
    233     push(@$html_options, "-processing_tmp_files");
    234     push(@$structhtml_options, "-processing_tmp_files");
    235    
    236     # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this
    237     push(@$html_options,"-input_encoding", "utf8");
    238     push(@$html_options,"-extract_language") if $self->{'extract_language'};
    239     push(@$html_options, "-description_tags") if $self->{'description_tags'};
    240 
    241     # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    242     # to extract these metadata fields from the HEAD META fields
    243     push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    244     push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     210
     211    if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
     212    $secondary_plugin_options->{$secondary_plugin_name} = [];
     213    }
     214    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
     215
     216    # following title_sub removes "Page 1" and a leading
     217    # "1", which is often the page number at the top of the page. Bad Luck
     218    # if your document title actually starts with "1 " - is there a better way?
     219    push(@$specific_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    245220
    246221    my $associate_tail_re = $self->{'associate_tail_re'};
    247222    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
    248     push(@$html_options, "-associate_tail_re", $associate_tail_re);
    249     push(@$text_options, "-associate_tail_re", $associate_tail_re);
    250     push(@$structhtml_options, "-associate_tail_re", $associate_tail_re) if defined $structhtml_options;
    251     }
    252 
    253     push(@$html_options, "-file_rename_method", "none");
    254     push(@$text_options, "-file_rename_method", "none");
    255     push(@$structhtml_options, "-file_rename_method", "none") if defined $structhtml_options;
     223    push(@$specific_options, "-associate_tail_re", $associate_tail_re);
     224    }
     225    push(@$specific_options, "-file_rename_method", "none");
     226
     227    if ($secondary_plugin_name eq "StructuredHTMLPlugin") {
     228    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
     229    # to extract these metadata fields from the HEAD META fields
     230    push (@$specific_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>");
     231    push (@$specific_options, "-description_tags") if $self->{'office_scripting'};
     232    push (@$specific_options, "-extract_language") if $self->{'extract_language'};
     233    push (@$specific_options, "-delete_toc") if $self->{'delete_toc'};
     234    push (@$specific_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'};
     235    push (@$specific_options, "-title_header", $self->{'title_header'}) if $self->{'title_header'};
     236    push (@$specific_options, "-level1_header", $self->{'level1_header'}) if $self->{'level1_header'};
     237    push (@$specific_options, "-level2_header", $self->{'level2_header'})if $self->{'level2_header'};
     238    push (@$specific_options, "-level3_header", $self->{'level3_header'}) if $self->{'level3_header'};
     239    push (@$specific_options, "-metadata_fields", $self->{'metadata_fields'}) if $self->{'metadata_fields'};
     240    push (@$specific_options, "-metadata_field_separator", $self->{'metadata_field_separator'}) if $self->{'metadata_field_separator'};
     241    push(@$specific_options, "-processing_tmp_files");
     242   
     243    }
     244   
     245    elsif ($secondary_plugin_name eq "HTMLPlugin") {
     246    push(@$specific_options, "-processing_tmp_files");
     247    push(@$specific_options,"-input_encoding", "utf8");
     248    push(@$specific_options,"-extract_language") if $self->{'extract_language'};
     249    push(@$specific_options, "-description_tags") if $self->{'description_tags'};
     250    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
     251    # to extract these metadata fields from the HEAD META fields
     252    push(@$specific_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     253    }
    256254
    257255    $self = bless $self, $class;
Note: See TracChangeset for help on using the changeset viewer.