Show
Ignore:
Timestamp:
10.08.2010 14:31:53 (9 years ago)
Author:
kjdon
Message:

code tidy up. rearranged how convertbinaryfile plugins set up their secondary plugins - now only set up the options for the one they are using. all subclass specific code moved out of convertbinaryfile.new into the appropriate plugin file.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/WordPlugin.pm

    r22514 r22597  
    4141    eval("require OpenOfficeConverter"); 
    4242    if ($@) { 
    43     # Useful debugging statement if there is a syntax error in OpenOfficeConverter   
     43    # Useful debugging statement if there is a syntax error in OpenOfficeConverter: 
    4444    #print STDERR "$@\n"; 
    4545    @WordPlugin::ISA = ('ConvertBinaryFile'); 
     
    174174    } 
    175175 
    176     my $outhandle = $self->{'outhandle'}; 
    177176    $self->{'filename_extension'} = "doc"; 
    178177    $self->{'file_type'} = "Word"; 
     178 
     179    my $outhandle = $self->{'outhandle'}; 
    179180 
    180181    if ($self->{'windows_scripting'}) { 
     
    193194    } 
    194195 
    195     # we always save as utf-8 
    196 #    if ($self->{'input_encoding'} eq "auto") { 
    197 #   $self->{'input_encoding'} = "utf8"; 
    198 #    } 
    199  
     196    # check convert_to 
     197    if ($self->{'convert_to'} eq "auto") { 
     198    $self->{'convert_to'} = "html"; 
     199    } 
     200    # windows or open office scripting, outputs structuredHTML 
     201    if (defined $self->{'office_scripting'}) { 
     202    $self->{'convert_to'} = "structuredhtml"; 
     203    }  
     204 
     205    # set convert_to_plugin and convert_to_ext 
     206    $self->ConvertBinaryFile::set_standard_convert_settings(); 
     207  
     208    my $secondary_plugin_name = $self->{'convert_to_plugin'}; 
    200209    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    201     if (defined $self->{'office_scripting'}) {  
    202     if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){ 
    203         $secondary_plugin_options->{'StructuredHTMLPlugin'} = []; 
    204         my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'}; 
    205          
    206         # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 
    207         # to extract these metadata fields from the HEAD META fields 
    208         push (@$structhtml_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
    209         push (@$structhtml_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    210         push (@$structhtml_options, "-description_tags") if $self->{'office_scripting'};  
    211         push (@$structhtml_options, "-extract_language") if $self->{'extract_language'}; 
    212         push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'}; 
    213         push (@$structhtml_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'}; 
    214         push (@$structhtml_options, "-title_header", $self->{'title_header'}) if $self->{'title_header'}; 
    215         push (@$structhtml_options, "-level1_header", $self->{'level1_header'}) if $self->{'level1_header'}; 
    216         push (@$structhtml_options, "-level2_header", $self->{'level2_header'})if $self->{'level2_header'}; 
    217         push (@$structhtml_options, "-level3_header", $self->{'level3_header'}) if $self->{'level3_header'}; 
    218         push (@$structhtml_options, "-metadata_fields", $self->{'metadata_fields'}) if $self->{'metadata_fields'}; 
    219         push (@$structhtml_options, "-metadata_field_separator", $self->{'metadata_field_separator'}) if $self->{'metadata_field_separator'}; 
    220     } 
    221     } 
    222     if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 
    223     $secondary_plugin_options->{'HTMLPlugin'} = []; 
    224     } 
    225     if (!defined $secondary_plugin_options->{'TextPlugin'}) { 
    226     $secondary_plugin_options->{'TextPlugin'} = []; 
    227     } 
    228  
    229     my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 
    230     my $text_options = $secondary_plugin_options->{'TextPlugin'}; 
    231     my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};     
    232     # tell the secondary plugins that they are processing tmp files 
    233     push(@$html_options, "-processing_tmp_files"); 
    234     push(@$structhtml_options, "-processing_tmp_files"); 
    235      
    236     # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this 
    237     push(@$html_options,"-input_encoding", "utf8"); 
    238     push(@$html_options,"-extract_language") if $self->{'extract_language'}; 
    239     push(@$html_options, "-description_tags") if $self->{'description_tags'}; 
    240  
    241     # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 
    242     # to extract these metadata fields from the HEAD META fields 
    243     push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
    244     push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
     210 
     211    if (!defined $secondary_plugin_options->{$secondary_plugin_name}) { 
     212    $secondary_plugin_options->{$secondary_plugin_name} = []; 
     213    } 
     214    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name}; 
     215 
     216    # following title_sub removes "Page 1" and a leading 
     217    # "1", which is often the page number at the top of the page. Bad Luck 
     218    # if your document title actually starts with "1 " - is there a better way? 
     219    push(@$specific_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    245220 
    246221    my $associate_tail_re = $self->{'associate_tail_re'}; 
    247222    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
    248     push(@$html_options, "-associate_tail_re", $associate_tail_re); 
    249     push(@$text_options, "-associate_tail_re", $associate_tail_re); 
    250     push(@$structhtml_options, "-associate_tail_re", $associate_tail_re) if defined $structhtml_options; 
    251     } 
    252  
    253     push(@$html_options, "-file_rename_method", "none"); 
    254     push(@$text_options, "-file_rename_method", "none"); 
    255     push(@$structhtml_options, "-file_rename_method", "none") if defined $structhtml_options; 
     223    push(@$specific_options, "-associate_tail_re", $associate_tail_re); 
     224    } 
     225    push(@$specific_options, "-file_rename_method", "none"); 
     226 
     227    if ($secondary_plugin_name eq "StructuredHTMLPlugin") { 
     228    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 
     229    # to extract these metadata fields from the HEAD META fields 
     230    push (@$specific_options, "-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
     231    push (@$specific_options, "-description_tags") if $self->{'office_scripting'};  
     232    push (@$specific_options, "-extract_language") if $self->{'extract_language'}; 
     233    push (@$specific_options, "-delete_toc") if $self->{'delete_toc'}; 
     234    push (@$specific_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'}; 
     235    push (@$specific_options, "-title_header", $self->{'title_header'}) if $self->{'title_header'}; 
     236    push (@$specific_options, "-level1_header", $self->{'level1_header'}) if $self->{'level1_header'}; 
     237    push (@$specific_options, "-level2_header", $self->{'level2_header'})if $self->{'level2_header'}; 
     238    push (@$specific_options, "-level3_header", $self->{'level3_header'}) if $self->{'level3_header'}; 
     239    push (@$specific_options, "-metadata_fields", $self->{'metadata_fields'}) if $self->{'metadata_fields'}; 
     240    push (@$specific_options, "-metadata_field_separator", $self->{'metadata_field_separator'}) if $self->{'metadata_field_separator'}; 
     241    push(@$specific_options, "-processing_tmp_files"); 
     242     
     243    } 
     244     
     245    elsif ($secondary_plugin_name eq "HTMLPlugin") { 
     246    push(@$specific_options, "-processing_tmp_files"); 
     247    push(@$specific_options,"-input_encoding", "utf8"); 
     248    push(@$specific_options,"-extract_language") if $self->{'extract_language'}; 
     249    push(@$specific_options, "-description_tags") if $self->{'description_tags'}; 
     250    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 
     251    # to extract these metadata fields from the HEAD META fields 
     252    push(@$specific_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
     253    } 
    256254 
    257255    $self = bless $self, $class;