Changeset 15963
- Timestamp:
- 2008-06-12T12:16:20+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/PDFPlugin.pm
r15904 r15963 212 212 my $outhandle=$self->{'outhandle'}; 213 213 214 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename); 214 #$self->{'input_encoding'} = "utf8"; # The output is always in utf8 (is it?? it is for html, but what about other types?) 215 #my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename); 215 216 216 217 # read in file ($text will be in utf8) 217 218 my $text = ""; 218 $self->read_file ($conv_filename, $encoding, $language, \$text); 219 # encoding will be utf8 for html files - what about other types? will we do this step for them anyway? 220 $self->read_file ($conv_filename, "utf8", "", \$text); 219 221 220 222 # Calculate number of pages based on <a ...> tags (we have a <a name=1> etc … … 302 304 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 303 305 304 my $result = $self->process_type( "pdf",$base_dir,$file,$doc_obj);306 my $result = $self->process_type($base_dir,$file,$doc_obj); 305 307 306 308 # fix up the extracted date metadata to be in Greenstone date format,
Note:
See TracChangeset
for help on using the changeset viewer.