Ignore:
Timestamp:
2005-08-09T10:43:13+12:00 (19 years ago)
Author:
chi
Message:

Modifications for pushing required option and argument lists to StructuredHTMLPlug, also comments out
ensure_utf8() as it seems messing up the document text with UTF8 encoding.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10428 r10441  
    104104    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    105105    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    106     my @arg_array = @$inputargs;
     106
    107107    my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
    108108
     
    120120        push(@$structhtml_options,"-input_encoding", "utf8");
    121121        push(@$structhtml_options,"-extract_language");
     122        }
    122123               
    123         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
    124         # to extract these metadata fields from the HEAD META fields
    125         push(@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    126         push(@$structhtml_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    127         }   
     124        # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     125        # to extract these metadata fields from the HEAD META fields
     126        push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     127        push (@$structhtml_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     128        push (@$structhtml_options, "-description_tags") if $self->{'windows_scripting'};
     129        push (@$structhtml_options, "-checkout_toc") if $self->{'checkout_toc'};
     130        push (@$structhtml_options, "-toc_header", $self->{'toc_header'}) if $self->{'toc_header'};
     131        push (@$structhtml_options, "-tof_header", $self->{'tof_header'}) if $self->{'tof_header'};
     132        push (@$structhtml_options, "-title_header", $self->{'title_header'}) if $self->{'title_header'};
     133        push (@$structhtml_options, "-level1_header", $self->{'level1_header'}) if $self->{'level1_header'};
     134        push (@$structhtml_options, "-level2_header", $self->{'level2_header'})if $self->{'level2_header'};
     135        push (@$structhtml_options, "-level3_header", $self->{'level3_header'}) if $self->{'level3_header'};
     136        push (@$structhtml_options, "-extracted_word_metadata_fields", $self->{'extracted_word_metadata_fields'}) if $self->{'extracted_word_metadata_fields'};
    128137    }
    129138    }
     
    144153    push(@$html_options,"-input_encoding", "utf8");
    145154    push(@$html_options,"-extract_language");
    146 
    147     # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
    148     # to extract these metadata fields from the HEAD META fields
    149     push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    150     push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    151     }
    152    
     155    }
     156
     157    # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     158    # to extract these metadata fields from the HEAD META fields
     159    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     160    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     161       
    153162    $self = bless $self, $class;
    154163    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
     
    169178
    170179    my $outhandle=$self->{'outhandle'};
    171 
     180     
    172181    my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
    173182
     
    177186
    178187    # turn any high bytes that aren't valid utf-8 into utf-8.
    179     unicode::ensure_utf8(\$text);
    180 
     188    #unicode::ensure_utf8(\$text);
     189   
    181190    # Write it out again!
    182     $self->utf8_write_file (\$text, $conv_filename);
     191    #$self->utf8_write_file (\$text, $conv_filename);
    183192}
    184193
     
    216225    my $self = shift (@_);
    217226    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    218    
     227
    219228    return $self->process_type("doc", $base_dir, $file, $doc_obj);
    220229}
Note: See TracChangeset for help on using the changeset viewer.