Ignore:
Timestamp:
2008-06-05T09:29:32+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/HBPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # HBPlug.pm --
     3# HBPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    3838# Humanity Library collections
    3939
    40 package HBPlug;
     40package HBPlugin;
    4141
    4242use ghtml;
    43 use BasPlug;
     43use BasePlugin;
    4444use unicode;
    4545use util;
     
    5050
    5151sub BEGIN {
    52     @HBPlug::ISA = ('BasPlug');
    53 }
    54 
     52    @HBPlugin::ISA = ('BasePlugin');
     53}
     54my $encoding_list =     
     55    [ { 'name' => "ascii",
     56    'desc' => "{ReadTextFile.input_encoding.ascii}" },
     57      { 'name' => "iso_8859_1",
     58    'desc' => "Latin1 (western languages)" } ];
     59 
    5560my $arguments =
    5661    [ { 'name' => "process_exp",
    57     'desc' => "{BasPlug.process_exp}",
     62    'desc' => "{BasePlugin.process_exp}",
    5863    'type' => "regexp",
    5964    'reqd' => "no",
    60     'deft' => &get_default_process_exp() }
     65    'deft' => &get_default_process_exp() },
     66      { 'name' => "input_encoding",
     67    'desc' => "{ReadTextFile.input_encoding}",
     68    'type' => "enum",
     69    'deft' => "iso_8859_1",
     70    'list' => $encoding_list,
     71    'reqd' => "no" }
    6172      ];
    6273
    63 my $options = { 'name'     => "HBPlug",
    64         'desc'     => "{HBPlug.desc}",
     74my $options = { 'name'     => "HBPlugin",
     75        'desc'     => "{HBPlugin.desc}",
    6576        'abstract' => "no",
    6677        'inherits' => "yes",
     
    7283    push(@$pluginlist, $class);
    7384
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    76 
    77     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     86    push(@{$hashArgOptLists->{"OptList"}},$options);
     87
     88    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7889
    7990    return bless $self, $class;
    8091}
    8192
    82 sub init {
    83     my $self = shift (@_);
    84     my ($verbosity, $outhandle) = @_;
    85 
    86     $self->BasPlug::init($verbosity, $outhandle);
    87     $self->{'input_encoding'} = "iso_8859_1";
    88 
    89     # this plugin only handles ascii encodings
    90     if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {
    91     die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
    92         $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
    93     }
    94 }
    95 
    9693# this is included only to prevent warnings being printed out
    97 # from BasPlug::init. The process_exp is not used by this plugin
     94# from BasePlugin::init. The process_exp is not used by this plugin
    9895sub get_default_process_exp {
    9996    my $self = shift (@_);
     
    148145    if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) {
    149146        my $font = $1;
    150         print $outhandle "HBPlug::HB_gettext - warning removed font $font\n"
     147        print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n"
    151148        if ($font !~ /^arial$/i);
    152149    }
     
    217214}
    218215
    219 # if input_encoding is ascii we can call add_utf8_metadata
    220 # directly but if it's iso_8859_1 (the default) we need to call
    221 # add_metadata so that the ascii2utf8 conversion is done first
    222 # this should speed things up a little if processing an ascii only
    223 # document with input_encoding set to ascii
    224 sub HB_add_metadata {
    225     my $self = shift (@_);
    226     my ($doc_obj, $cursection, $field, $value) = @_;
    227 
    228 # All text should now be in utf-8
    229 #    if ($self->{'input_encoding'} eq "ascii") {
    230     $doc_obj->add_utf8_metadata ($cursection, $field, $value);
    231 #    } else {
    232 #   $doc_obj->add_metadata ($cursection, $field, $value);
    233 #    }
    234 }
    235 
    236216# return number of files processed, undef if can't process
    237217# Note that $base_dir might be "" and that $file might
     
    251231    return undef unless -e $htmlfile;
    252232
    253     print STDERR "<Processing n='$file' p='HBPlug'>\n" if ($gli);
    254     print $outhandle "HBPlug: processing $file\n";
     233    print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli);
     234    print $outhandle "HBPlugin: processing $file\n";
    255235
    256236    # read in the file and do basic html cleaning (removing header etc)
     
    276256    # $metadata->{$field} may be an array reference
    277257    if (ref ($metadata->{$field}) eq "ARRAY") {
    278         map { 
    279         $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);
     258        map {
     259        $doc_obj->add_utf8_metadata($cursection, $field, $_);
    280260        } @{$metadata->{$field}};
    281261    } else {
    282         $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});
     262        $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field});
    283263    }
    284264    }
     
    321301
    322302        # add the metadata to this section
    323         $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title);
     303        $doc_obj->add_utf8_metadata($cursection, "Title", $title);
    324304
    325305        # clean up the section html
     
    332312
    333313        # add the text for this section
    334 # All read text should now be in utf-8
    335 #       if ($self->{'input_encoding'} eq "ascii") {
    336314        $doc_obj->add_utf8_text ($cursection, $sectiontext);
    337 #       } else {
    338 #       $doc_obj->add_text ($cursection, $sectiontext);
    339 #       }
    340315    } else {
    341316        print $outhandle "WARNING - leftover text\n" , $self->shorten($html),
Note: See TracChangeset for help on using the changeset viewer.