Changeset 15963


Ignore:
Timestamp:
2008-06-12T12:16:20+12:00 (16 years ago)
Author:
kjdon
Message:

commented out the textcat stuff in post_process - need to think about whether we need it or not - does the conversion always produce utf8?

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/PDFPlugin.pm

    r15904 r15963  
    212212    my $outhandle=$self->{'outhandle'};
    213213
    214     my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
     214    #$self->{'input_encoding'} = "utf8"; # The output is always in utf8 (is it?? it is for html, but what about other types?)
     215    #my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename);
    215216
    216217    # read in file ($text will be in utf8)
    217218    my $text = "";
    218     $self->read_file ($conv_filename, $encoding, $language, \$text);
     219    # encoding will be utf8 for html files - what about other types? will we do this step for them anyway?
     220    $self->read_file ($conv_filename, "utf8", "", \$text);
    219221
    220222    # Calculate number of pages based on <a ...> tags (we have a <a name=1> etc
     
    302304    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    303305
    304     my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj);
     306    my $result = $self->process_type($base_dir,$file,$doc_obj);
    305307
    306308    # fix up the extracted date metadata to be in Greenstone date format,
Note: See TracChangeset for help on using the changeset viewer.