Changeset 15872 for gsdl/trunk/perllib/plugins/HBPlugin.pm
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/HBPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # HBPlug .pm --3 # HBPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 38 38 # Humanity Library collections 39 39 40 package HBPlug ;40 package HBPlugin; 41 41 42 42 use ghtml; 43 use Bas Plug;43 use BasePlugin; 44 44 use unicode; 45 45 use util; … … 50 50 51 51 sub BEGIN { 52 @HBPlug::ISA = ('BasPlug'); 53 } 54 52 @HBPlugin::ISA = ('BasePlugin'); 53 } 54 my $encoding_list = 55 [ { 'name' => "ascii", 56 'desc' => "{ReadTextFile.input_encoding.ascii}" }, 57 { 'name' => "iso_8859_1", 58 'desc' => "Latin1 (western languages)" } ]; 59 55 60 my $arguments = 56 61 [ { 'name' => "process_exp", 57 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 58 63 'type' => "regexp", 59 64 'reqd' => "no", 60 'deft' => &get_default_process_exp() } 65 'deft' => &get_default_process_exp() }, 66 { 'name' => "input_encoding", 67 'desc' => "{ReadTextFile.input_encoding}", 68 'type' => "enum", 69 'deft' => "iso_8859_1", 70 'list' => $encoding_list, 71 'reqd' => "no" } 61 72 ]; 62 73 63 my $options = { 'name' => "HBPlug ",64 'desc' => "{HBPlug .desc}",74 my $options = { 'name' => "HBPlugin", 75 'desc' => "{HBPlugin.desc}", 65 76 'abstract' => "no", 66 77 'inherits' => "yes", … … 72 83 push(@$pluginlist, $class); 73 84 74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};76 77 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);85 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 86 push(@{$hashArgOptLists->{"OptList"}},$options); 87 88 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 78 89 79 90 return bless $self, $class; 80 91 } 81 92 82 sub init {83 my $self = shift (@_);84 my ($verbosity, $outhandle) = @_;85 86 $self->BasPlug::init($verbosity, $outhandle);87 $self->{'input_encoding'} = "iso_8859_1";88 89 # this plugin only handles ascii encodings90 if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {91 die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .92 $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";93 }94 }95 96 93 # this is included only to prevent warnings being printed out 97 # from Bas Plug::init. The process_exp is not used by this plugin94 # from BasePlugin::init. The process_exp is not used by this plugin 98 95 sub get_default_process_exp { 99 96 my $self = shift (@_); … … 148 145 if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) { 149 146 my $font = $1; 150 print $outhandle "HBPlug ::HB_gettext - warning removed font $font\n"147 print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n" 151 148 if ($font !~ /^arial$/i); 152 149 } … … 217 214 } 218 215 219 # if input_encoding is ascii we can call add_utf8_metadata220 # directly but if it's iso_8859_1 (the default) we need to call221 # add_metadata so that the ascii2utf8 conversion is done first222 # this should speed things up a little if processing an ascii only223 # document with input_encoding set to ascii224 sub HB_add_metadata {225 my $self = shift (@_);226 my ($doc_obj, $cursection, $field, $value) = @_;227 228 # All text should now be in utf-8229 # if ($self->{'input_encoding'} eq "ascii") {230 $doc_obj->add_utf8_metadata ($cursection, $field, $value);231 # } else {232 # $doc_obj->add_metadata ($cursection, $field, $value);233 # }234 }235 236 216 # return number of files processed, undef if can't process 237 217 # Note that $base_dir might be "" and that $file might … … 251 231 return undef unless -e $htmlfile; 252 232 253 print STDERR "<Processing n='$file' p='HBPlug '>\n" if ($gli);254 print $outhandle "HBPlug : processing $file\n";233 print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli); 234 print $outhandle "HBPlugin: processing $file\n"; 255 235 256 236 # read in the file and do basic html cleaning (removing header etc) … … 276 256 # $metadata->{$field} may be an array reference 277 257 if (ref ($metadata->{$field}) eq "ARRAY") { 278 map { 279 $ self->HB_add_metadata ($doc_obj, $cursection, $field, $_);258 map { 259 $doc_obj->add_utf8_metadata($cursection, $field, $_); 280 260 } @{$metadata->{$field}}; 281 261 } else { 282 $ self->HB_add_metadata ($doc_obj,$cursection, $field, $metadata->{$field});262 $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field}); 283 263 } 284 264 } … … 321 301 322 302 # add the metadata to this section 323 $ self->HB_add_metadata ($doc_obj,$cursection, "Title", $title);303 $doc_obj->add_utf8_metadata($cursection, "Title", $title); 324 304 325 305 # clean up the section html … … 332 312 333 313 # add the text for this section 334 # All read text should now be in utf-8335 # if ($self->{'input_encoding'} eq "ascii") {336 314 $doc_obj->add_utf8_text ($cursection, $sectiontext); 337 # } else {338 # $doc_obj->add_text ($cursection, $sectiontext);339 # }340 315 } else { 341 316 print $outhandle "WARNING - leftover text\n" , $self->shorten($html),
Note:
See TracChangeset
for help on using the changeset viewer.