Context Navigation

source: main/trunk/greenstone2/perllib/plugins/ReadTextFile.pm@ 31742

Last change on this file since 31742 was 31492, checked in by kjdon, 7 years ago
renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes
Property svn:executable set to ``*
File size: 19.6 KB

Rev	Line
[15868]	1	###########################################################################
	2	#
	3	# ReadTxtFile.pm -- base class for import plugins that have plain text files
	4	# A component of the Greenstone digital library software
	5	# from the New Zealand Digital Library Project at the
	6	# University of Waikato, New Zealand.
	7	#
	8	# Copyright (C) 1999-2005 New Zealand Digital Library Project
	9	#
	10	# This program is free software; you can redistribute it and/or modify
	11	# it under the terms of the GNU General Public License as published by
	12	# the Free Software Foundation; either version 2 of the License, or
	13	# (at your option) any later version.
	14	#
	15	# This program is distributed in the hope that it will be useful,
	16	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	# GNU General Public License for more details.
	19	#
	20	# You should have received a copy of the GNU General Public License
	21	# along with this program; if not, write to the Free Software
	22	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	23	#
	24	###########################################################################
	25
	26	package ReadTextFile;
	27
	28	use strict;
	29	no strict 'subs';
	30	no strict 'refs'; # allow filehandles to be variables and viceversa
	31
[22844]	32	use Encode;
[15868]	33
	34	use multiread;
	35	use encodings;
	36	use unicode;
	37	use textcat;
	38	use doc;
	39	use ghtml;
	40	use gsprintf 'gsprintf';
	41
	42	use AutoExtractMetadata;
	43
	44	sub BEGIN {
	45	@ReadTextFile::ISA = ( 'AutoExtractMetadata' );
	46	}
	47
[16014]	48	my $encoding_plus_auto_list =
	49	[ { 'name' => "auto",
	50	'desc' => "{ReadTextFile.input_encoding.auto}" } ];
[31492]	51	push(@{$encoding_plus_auto_list},@{$CommonUtil::encoding_list});
[15868]	52
	53	my $arguments =
	54	[ { 'name' => "input_encoding",
	55	'desc' => "{ReadTextFile.input_encoding}",
	56	'type' => "enum",
[16014]	57	'list' => $encoding_plus_auto_list,
[15868]	58	'reqd' => "no" ,
	59	'deft' => "auto" } ,
	60	{ 'name' => "default_encoding",
	61	'desc' => "{ReadTextFile.default_encoding}",
	62	'type' => "enum",
[31492]	63	'list' => $CommonUtil::encoding_list,
[15868]	64	'reqd' => "no",
	65	'deft' => "utf8" },
	66	{ 'name' => "extract_language",
	67	'desc' => "{ReadTextFile.extract_language}",
	68	'type' => "flag",
	69	'reqd' => "no" },
	70	{ 'name' => "default_language",
	71	'desc' => "{ReadTextFile.default_language}",
	72	'type' => "string",
	73	'deft' => "en",
[16642]	74	'reqd' => "no" }
	75	];
[15868]	76
	77
	78	my $options = { 'name' => "ReadTextFile",
[16014]	79	'desc' => "{ReadTextFile.desc}",
[15868]	80	'abstract' => "yes",
	81	'inherits' => "no",
	82	'args' => $arguments };
	83
	84
	85
	86	sub new {
	87	my $class = shift (@_);
[16699]	88	my ($pluginlist,$inputargs,$hashArgOptLists, $auxiliary) = @_;
[15868]	89	push(@$pluginlist, $class);
	90
	91	push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
	92	push(@{$hashArgOptLists->{"OptList"}},$options);
	93
[16699]	94	my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists, $auxiliary);
[15868]	95
	96	return bless $self, $class;
	97
	98	}
	99
	100
	101
	102	# The ReadTextFile read_into_doc_obj() function. This function does all the
	103	# right things to make general options work for a given plugin. It reads in
	104	# a file and sets up a slew of metadata all saved in doc_obj, which
	105	# it then returns as part of a tuple (process_status,doc_obj)
	106	#
	107	# Much of this functionality used to reside in read, but it was broken
	108	# down into a supporting routine to make the code more flexible.
	109	#
	110	# recursive plugins (e.g. RecPlug) and specialized plugins like those
	111	# capable of processing many documents within a single file (e.g.
	112	# GMLPlug) will normally want to implement their own version of
	113	# read_into_doc_obj()
	114	#
	115	# Note that $base_dir might be "" and that $file might
	116	# include directories
	117	sub read_into_doc_obj {
	118	my $self = shift (@_);
[16392]	119	my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
[15868]	120
	121	my $outhandle = $self->{'outhandle'};
	122	# should we move this to read? What about secondary plugins?
	123	print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli);
[23484]	124	my $pp_file = &util::prettyprint_file($base_dir,$file,$gli);
[23363]	125	print $outhandle "$self->{'plugin_type'} processing $pp_file\n"
[15868]	126	if $self->{'verbosity'} > 1;
	127
[16392]	128	my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
	129
[15868]	130	# Do encoding stuff
[23335]	131	my ($language, $content_encoding) = $self->textcat_get_language_encoding ($filename_full_path);
[15868]	132	if ($self->{'verbosity'} > 2) {
[23335]	133	print $outhandle "ReadTextFile: reading $file as ($content_encoding,$language)\n";
[15868]	134	}
	135
	136	# create a new document
[18320]	137	my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
[15868]	138	my $top_section = $doc_obj->get_top_section();
	139
	140	# this should look at the plugin option too...
	141	$doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
[16724]	142	$doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
[15868]	143
[23352]	144	my $plugin_filename_encoding = $self->{'filename_encoding'};
[23348]	145	my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
[23352]	146	$self->set_Source_metadata($doc_obj, $filename_full_path, $filename_encoding);
[23335]	147
[15868]	148	$doc_obj->add_utf8_metadata($top_section, "Language", $language);
[23335]	149	$doc_obj->add_utf8_metadata($top_section, "Encoding", $content_encoding);
[15868]	150
[31439]	151	# read in file ($text will be in perl internal unicode aware format)
[15868]	152	my $text = "";
[23335]	153	$self->read_file ($filename_full_path, $content_encoding, $language, \$text);
[15868]	154
	155	if (!length ($text)) {
	156	if ($gli) {
	157	print STDERR "<ProcessingError n='$file' r='File contains no text'>\n";
	158	}
	159	gsprintf($outhandle, "$self->{'plugin_type'}: {ReadTextFile.file_has_no_text}\n", $filename_full_path) if $self->{'verbosity'};
	160
	161	my $failhandle = $self->{'failhandle'};
	162	gsprintf($failhandle, "$file: " . ref($self) . ": {ReadTextFile.empty_file}\n");
	163	# print $failhandle "$file: " . ref($self) . ": file contains no text\n";
	164	$self->{'num_not_processed'} ++;
	165
	166	return (0,undef); # what should we return here?? error but don't want to pass it on
	167	}
	168
	169	# do plugin specific processing of doc_obj
	170	unless (defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
	171	$text = '';
	172	undef $text;
	173	print STDERR "<ProcessingError n='$file'>\n" if ($gli);
	174	return (-1,undef);
	175	}
	176	$text='';
	177	undef $text;
	178
	179	# include any metadata passed in from previous plugins
	180	# note that this metadata is associated with the top level section
	181	$self->add_associated_files($doc_obj, $filename_full_path);
	182	$self->extra_metadata ($doc_obj, $top_section, $metadata);
	183
	184	# do any automatic metadata extraction
	185	$self->auto_extract_metadata ($doc_obj);
	186
	187
	188	# if we haven't found any Title so far, assign one
	189	$self->title_fallback($doc_obj,$top_section,$filename_no_path);
	190
	191	$self->add_OID($doc_obj);
	192
	193	return (1,$doc_obj);
	194	}
	195
	196	# uses the multiread package to read in the entire file pointed to
	197	# by filename and loads the resulting text into $$textref. Input text
	198	# may be in any of the encodings handled by multiread, output text
	199	# will be in utf8
	200	sub read_file {
	201	my $self = shift (@_);
	202	my ($filename, $encoding, $language, $textref) = @_;
	203
	204	if (!-r $filename)
	205	{
	206	my $outhandle = $self->{'outhandle'};
	207	gsprintf($outhandle, "{ReadTextFile.read_denied}\n", $filename) if $self->{'verbosity'};
	208	# print $outhandle "Read permission denied for $filename\n" if $self->{'verbosity'};
	209	return;
	210	}
	211	$$textref = "";
	212	if (!open (FILE, $filename)) {
	213	gsprintf(STDERR, "ReadTextFile::read_file {ReadTextFile.could_not_open_for_reading} ($!)\n", $filename);
	214	die "\n";
	215	}
	216
	217	if ($encoding eq "ascii") {
[24449]	218	# Replace file 'slurp' with faster implementation
	219	sysread(FILE, $$textref, -s FILE);
	220
	221	# The old slow way of reading in a file
	222	#undef $/;
	223	#$$textref = <FILE>;
	224	#$/ = "\n";
[15868]	225	} else {
	226	my $reader = new multiread();
	227	$reader->set_handle ('ReadTextFile::FILE');
	228	$reader->set_encoding ($encoding);
	229	$reader->read_file ($textref);
	230	}
[22844]	231
[22857]	232	# At this point $$textref is a binary byte string
[22844]	233	# => turn it into a Unicode aware string, so full
	234	# Unicode aware pattern matching can be used.
	235	# For instance: 's/\x{0101}//g' or '[[:upper:]]'
	236	#
	237
	238	$$textref = decode("utf8",$$textref);
	239
[15868]	240	close FILE;
	241	}
	242
	243
[22844]	244	# Not currently used
[22857]	245	sub UNUSED_read_file_usingPerlsEncodeModule {
	246	##sub read_file {
[22844]	247	my $self = shift (@_);
	248	my ($filename, $encoding, $language, $textref) = @_;
	249
	250	if (!-r $filename)
	251	{
	252	my $outhandle = $self->{'outhandle'};
	253	gsprintf($outhandle, "{ReadTextFile.read_denied}\n", $filename) if $self->{'verbosity'};
	254	# print $outhandle "Read permission denied for $filename\n" if $self->{'verbosity'};
	255	return;
	256	}
	257	$$textref = "";
	258	if (!open (FILE, $filename)) {
	259	gsprintf(STDERR, "ReadTextFile::read_file {ReadTextFile.could_not_open_f
	260	or_reading} ($!)\n", $filename);
	261	die "\n";
	262	}
	263
	264	my $store_slash = $/;
	265	undef $/;
	266	my $text = <FILE>;
	267	$/ = $store_slash;
	268
	269	$$textref = decode($encoding,$text);
	270
	271	close FILE;
	272	}
	273
	274
[16724]	275	sub read_file_no_decoding {
	276	my $self = shift (@_);
	277	my ($filename, $textref) = @_;
	278
	279	if (!-r $filename)
	280	{
	281	my $outhandle = $self->{'outhandle'};
	282	gsprintf($outhandle, "{ReadTextFile.read_denied}\n", $filename) if $self->{'verbosity'};
	283	# print $outhandle "Read permission denied for $filename\n" if $self->{'verbosity'};
	284	return;
	285	}
	286	$$textref = "";
	287	if (!open (FILE, $filename)) {
	288	gsprintf(STDERR, "ReadTextFile::read_file {ReadTextFile.could_not_open_for_reading} ($!)\n", $filename);
	289	die "\n";
	290	}
	291
	292	my $reader = new multiread();
	293	$reader->set_handle ('ReadTextFile::FILE');
	294	$reader->read_file_no_decoding ($textref);
	295
	296	$self->{'reader'} = $reader;
	297
	298	close FILE;
	299	}
	300
	301
	302	sub decode_text {
	303	my $self = shift (@_);
	304	my ($raw_text, $encoding, $language, $textref) = @_;
	305
	306	my $reader = $self->{'reader'};
	307	if (!defined $reader) {
	308	gsprintf(STDERR, "ReadTextFile::decode_text needs to call ReadTextFile::read_file_no_decoding first\n");
	309	}
	310	else {
	311	$reader->set_encoding($encoding);
	312	$reader->decode_text($raw_text,$textref);
[23387]	313
	314	# At this point $$textref is a binary byte string
	315	# => turn it into a Unicode aware string, so full
	316	# Unicode aware pattern matching can be used.
	317	# For instance: 's/\x{0101}//g' or '[[:upper:]]'
	318
	319	$$textref = decode("utf8",$$textref);
[16724]	320	}
	321	}
	322
	323
[15868]	324	sub textcat_get_language_encoding {
	325	my $self = shift (@_);
	326	my ($filename) = @_;
	327
	328	my ($language, $encoding, $extracted_encoding);
	329	if ($self->{'input_encoding'} eq "auto") {
	330	# use textcat to automatically work out the input encoding and language
	331	($language, $encoding) = $self->get_language_encoding ($filename);
	332	} elsif ($self->{'extract_language'}) {
[16724]	333	# use textcat to get language metadata
[15868]	334	($language, $extracted_encoding) = $self->get_language_encoding ($filename);
	335	$encoding = $self->{'input_encoding'};
	336	# don't print this message for english... english in utf8 is identical
	337	# to english in iso-8859-1 (except for some punctuation). We don't have
	338	# a language model for en_utf8, so textcat always says iso-8859-1!
[16724]	339	if ($extracted_encoding ne $encoding && $language ne "en" && $self->{'verbosity'}) {
[15868]	340	my $plugin_name = ref ($self);
	341	my $outhandle = $self->{'outhandle'};
	342	gsprintf($outhandle, "$plugin_name: {ReadTextFile.wrong_encoding}\n", $filename, $encoding, $extracted_encoding);
	343	}
	344	} else {
	345	$language = $self->{'default_language'};
[16724]	346	$encoding = $self->{'input_encoding'};
[15868]	347	}
[16724]	348
	349	# print STDERR "** language encoding of contents of file $filename:\n\t**$language $encoding\n";
[15868]	350
	351	return ($language, $encoding);
	352	}
	353
[17213]	354
[15868]	355	# Uses textcat to work out the encoding and language of the text in
	356	# $filename. All html tags are removed before processing.
	357	# returns an array containing "language" and "encoding"
	358	sub get_language_encoding {
	359	my $self = shift (@_);
	360	my ($filename) = @_;
	361	my $outhandle = $self->{'outhandle'};
	362	my $unicode_format = "";
	363	my $best_language = "";
	364	my $best_encoding = "";
	365
[17213]	366
[15868]	367	# read in file
	368	if (!open (FILE, $filename)) {
	369	gsprintf(STDERR, "ReadTextFile::get_language_encoding {ReadTextFile.could_not_open_for_reading} ($!)\n", $filename);
	370	# this is a pretty bad error, but try to continue anyway
	371	return ($self->{'default_language'}, $self->{'input_encoding'});
	372	}
	373	undef $/;
	374	my $text = <FILE>;
	375	$/ = "\n";
	376	close FILE;
	377
	378	# check if first few bytes have a Byte Order Marker
	379	my $bom=substr($text,0,2); # check 16bit unicode
	380	if ($bom eq "\xff\xfe") { # little endian 16bit unicode
	381	$unicode_format="unicode";
	382	} elsif ($bom eq "\xfe\xff") { # big endian 16bit unicode
	383	$unicode_format="unicode";
	384	} else {
	385	$bom=substr($text,0,3); # check utf-8
	386	if ($bom eq "\xef\xbb\xbf") { # utf-8 coded FEFF bom
	387	$unicode_format="utf8";
	388	# } elsif ($bom eq "\xef\xbf\xbe") { # utf-8 coded FFFE bom. Error!?
	389	# $unicode_format="utf8";
	390	}
	391	}
	392
[16667]	393	my $found_html_encoding = 0;
[15868]	394	# handle html files specially
	395	# XXX this doesn't match plugins derived from HTMLPlug (except ConvertTo)
[15970]	396	if (ref($self) eq 'HTMLPlugin' \|\|
[15868]	397	(exists $self->{'converted_to'} && $self->{'converted_to'} eq 'HTML')){
	398
[16765]	399	# remove comments in head, including multiline ones, so that we don't match on
[16753]	400	# inactive tags (those that are nested inside comments)
[16765]	401	my ($head) = ($text =~ m/<head>(.*)<\/head>/si);
[17099]	402	$head = "" unless defined $head; # some files are not proper HTML eg php files
[16765]	403	$head =~ s/<!--.*?-->//sg;
[16753]	404
[15868]	405	# remove <title>stuff</title> -- as titles tend often to be in English
	406	# for foreign language documents
	407	$text =~ s!<title>.*?</title>!!si;
	408
	409	# see if this html file specifies its encoding
	410	if ($text =~ /^<\?xml.*encoding="(.+?)"/) {
	411	$best_encoding = $1;
[16724]	412	}
[16765]	413	# check the meta http-equiv charset tag
	414	elsif ($head =~ m/<meta http-equiv.content-type.charset=(.+?)\"/si) {
[15868]	415	$best_encoding = $1;
	416	}
	417	if ($best_encoding) { # we extracted an encoding
	418	$best_encoding =~ s/-+/_/g;
	419	$best_encoding = lc($best_encoding); # lowercase
	420	if ($best_encoding eq "utf_8") { $best_encoding = "utf8" }
[16667]	421	$found_html_encoding = 1;
	422	# We shouldn't be modifying this here!!
	423	#$self->{'input_encoding'} = $best_encoding;
[15868]	424	}
	425
	426	# remove all HTML tags
	427	$text =~ s/<[^>]*>//sg;
	428	}
	429
[17213]	430	# don't need to do textcat if we know the encoding now AND don't need to extract language
	431	if($found_html_encoding && !$self->{'extract_language'}) { # encoding specified in html file
	432	$best_language = $self->{'default_language'};
	433	}
[15868]	434
[17213]	435	else { # need to use textcat to get either the language, or get both language and encoding
	436	$self->{'textcat'} = new textcat() if (!defined($self->{'textcat'}));
	437
	438	if($found_html_encoding) { # know encoding, find language by limiting search to known encoding
	439	my $results = $self->{'textcat'}->classify_contents_for_encoding(\$text, $filename, $best_encoding);
	440
	441	my $language;
	442	($language) = $results->[0] =~ m/^([^-])(?:-(?:.))?$/ if (scalar @$results > 0);
[15868]	443
[17213]	444	if (!defined $language \|\| scalar @$results > 3) {
	445	# if there were too many results even when restricting results by encoding,
	446	# or if there were no results, use default language with the known encoding
	447	$best_language = $self->use_default_language($filename);
	448	}
	449	else { # fewer than 3 results means textcat is more certain, use the first result
	450	$best_language = $language;
	451	}
[15868]	452	}
[17213]	453	else { # don't know encoding or language yet, therefore we use textcat
	454	my $results = $self->{'textcat'}->classify_contents(\$text, $filename);
	455
	456	# if textcat returns 3 or less possibilities we'll use the first one in the list
	457	if (scalar @$results <= 3) { # results will be > 0 when we don't constrain textcat by an encoding
	458	my ($language, $encoding) = $results->[0] =~ m/^([^-])(?:-(.))?$/;
[15868]	459
[17213]	460	$language = $self->use_default_language($filename) unless defined $language;
	461	$encoding = $self->use_default_encoding($filename) unless defined $encoding;
[15868]	462
[17213]	463	$best_language = $language;
	464	$best_encoding = $encoding;
[15868]	465	}
[17213]	466	else { # if (scalar @$results > 3) {
	467	if ($unicode_format) { # in case the first had a BOM
	468	$best_encoding=$unicode_format;
	469	}
	470	else {
	471	# Find the most frequent encoding in the textcat results returned
	472	# Returns "" if there's no encoding more frequent than another
	473	$best_encoding = $self->{'textcat'}->most_frequent_encoding($results);
	474	}
	475
	476	if ($best_encoding eq "") { # encoding still not set, use defaults
	477	$best_language = $self->use_default_language($filename);
	478	$best_encoding = $self->use_default_encoding($filename);
	479	}
	480	elsif (!$self->{'extract_language'}) { # know encoding but don't need to discover language
	481	$best_language = $self->use_default_language($filename);
	482	}
	483	else { # textcat again using the most frequent encoding or the $unicode_format set above
	484	$results = $self->{'textcat'}->classify_contents_for_encoding(\$text, $filename, $best_encoding);
	485	my $language;
	486	($language) = $results->[0] =~ m/^([^-])(?:-(.))?$/ if (scalar @$results > 0);
	487	if (!defined $language \|\| scalar @$results > 3) {
	488	# if no result or too many results, use default language for the encoding previously found
	489	$best_language = $self->use_default_language($filename);
	490	}
	491	else { # fewer than 3 results, use the language of the first result
	492	$best_language = $language;
	493	}
	494	}
[15868]	495	}
	496	}
	497	}
	498
[17213]	499	if($best_encoding eq "" \|\| $best_language eq "") {
	500	print STDERR "****Shouldn't happen: encoding and/or language still not set. Using defaults.\n";
	501	$best_encoding = $self->use_default_encoding($filename) if $best_encoding eq "";
	502	$best_language = $self->use_default_language($filename) if $best_language eq "";
	503	}
	504	# print STDERR "****Content language: $best_language; Encoding: $best_encoding.\n";
	505
	506
[16555]	507	if ($best_encoding =~ /^iso_8859/ && &unicode::check_is_utf8($text)) {
[15868]	508	# the text is valid utf8, so assume that's the real encoding
	509	# (since textcat is based on probabilities)
	510	$best_encoding = 'utf8';
	511	}
	512
	513	# check for equivalents where textcat doesn't have some encodings...
	514	# eg MS versions of standard encodings
	515	if ($best_encoding =~ /^iso_8859_(\d+)/) {
	516	my $iso = $1; # which variant of the iso standard?
	517	# iso-8859 sets don't use chars 0x80-0x9f, windows codepages do
	518	if ($text =~ /[\x80-\x9f]/) {
	519	# Western Europe
	520	if ($iso == 1 or $iso == 15) { $best_encoding = 'windows_1252' }
	521	elsif ($iso == 2) {$best_encoding = 'windows_1250'} # Central Europe
	522	elsif ($iso == 5) {$best_encoding = 'windows_1251'} # Cyrillic
	523	elsif ($iso == 6) {$best_encoding = 'windows_1256'} # Arabic
	524	elsif ($iso == 7) {$best_encoding = 'windows_1253'} # Greek
	525	elsif ($iso == 8) {$best_encoding = 'windows_1255'} # Hebrew
	526	elsif ($iso == 9) {$best_encoding = 'windows_1254'} # Turkish
	527	}
	528	}
	529
	530	if ($best_encoding !~ /^(ascii\|utf8\|unicode)$/ &&
	531	!defined $encodings::encodings->{$best_encoding}) {
	532	if ($self->{'verbosity'}) {
	533	gsprintf($outhandle, "ReadTextFile: {ReadTextFile.unsupported_encoding}\n",
	534	$filename, $best_encoding, $self->{'default_encoding'});
	535	}
	536	$best_encoding = $self->{'default_encoding'};
	537	}
	538
	539	return ($best_language, $best_encoding);
	540	}
	541
	542
[17213]	543	sub use_default_language {
	544	my $self = shift (@_);
	545	my ($filename) = @_;
[15868]	546
[17213]	547	if ($self->{'verbosity'}>2) {
	548	gsprintf($self->{'outhandle'},
	549	"ReadTextFile: {ReadTextFile.could_not_extract_language}\n",
	550	$filename, $self->{'default_language'});
	551	}
	552	return $self->{'default_language'};
	553	}
	554
	555	sub use_default_encoding {
	556	my $self = shift (@_);
	557	my ($filename) = @_;
	558
	559	if ($self->{'verbosity'}>2) {
	560	gsprintf($self->{'outhandle'},
	561	"ReadTextFile: {ReadTextFile.could_not_extract_encoding}\n",
	562	$filename, $self->{'default_encoding'});
	563	}
	564	return $self->{'default_encoding'};
	565	}
	566
[15868]	567	# Overridden by exploding plugins (eg. ISISPlug)
	568	sub clean_up_after_exploding
	569	{
	570	my $self = shift(@_);
	571	}
	572
	573
	574	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: