Ignore:
Timestamp:
2006-09-22T16:03:36+12:00 (18 years ago)
Author:
kjdon
Message:

these convertto plugins were all setting extract_language=1 to their secondary plugins. we don't want this - only pass to secondary plugin if user has asked for it. textcat can be very slow, so don't want to run it unless we have to

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r12169 r12834  
    115115    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
    116116
     117    # we always save as utf-8
     118    if ($self->{'input_encoding'} eq "auto") {
     119    $self->{'input_encoding'} = "utf8";
     120    }
     121
    117122    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    118123    if (defined $self->{'windows_scripting'}) {
     
    120125        $secondary_plugin_options->{'StructuredHTMLPlug'} = [];
    121126        my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
    122         if ($self->{'input_encoding'} eq "auto") {
    123         $self->{'input_encoding'} = "utf8";
    124         $self->{'extract_language'} = 1;
    125         #push(@$structhtml_options,"-input_encoding", "utf8");
    126         push(@$structhtml_options,"-extract_language");
    127         }
    128                
     127       
    129128        # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
    130129        # to extract these metadata fields from the HEAD META fields
     
    132131        push (@$structhtml_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    133132        push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'};
     133        push(@$structhtml_options,"-extract_language") if $self->{'extract_language'};
    134134        push (@$structhtml_options, "-delete_toc") if $self->{'delete_toc'};
    135135        push (@$structhtml_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'};
     
    153153    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this
    154154    push(@$html_options,"-input_encoding", "utf8");
    155 
    156     if ($self->{'input_encoding'} eq "auto") {
    157     $self->{'input_encoding'} = "utf8";
    158     $self->{'extract_language'} = 1;
    159     push(@$html_options,"-extract_language");
    160     }
    161     if ($self->{'description_tags'} == 1) {
    162     push(@$html_options, "-description_tags");
    163     }
     155    push(@$html_options,"-extract_language") if $self->{'extract_language'};
     156    push(@$html_options, "-description_tags") if $self->{'description_tags'};
     157
    164158    # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
    165159    # to extract these metadata fields from the HEAD META fields
Note: See TracChangeset for help on using the changeset viewer.