Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/HBPlug.pm

    r1020 r1279  
    2424###########################################################################
    2525
    26 # plugin which process an HTML book directory
     26# plugin which processes an HTML book directory
     27
     28# This plugin is used by the Humanity Library collections and does not handle
     29# input encodings other than ascii or extended ascii
     30
     31# this code is kind of ugly and could no doubt be made to run faster, by leaving
     32# it in this state I hope to encourage people to make their collections use
     33# HBSPlug instead ;-)
     34
     35# Use HBSPlug if creating a new collection and marking up files like the
     36# Humanity Library collections. HBSPlug accepts all input encodings but
     37# expects the marked up files to be cleaner than those used by the
     38# Humanity Library collections
    2739
    2840package HBPlug;
    2941
    30 use plugin;
    3142use ghtml;
    3243use BasPlug;
    3344use util;
    34 use lang;
    3545use doc;
    36 use cfgread;
    3746
    3847
     
    4352sub new {
    4453    my ($class) = @_;
    45     $self = new BasPlug ();
     54    my $self = new BasPlug ("HBPlug", @_);
    4655
    4756    return bless $self, $class;
    4857}
    4958
    50 sub is_recursive {
    51     my $self = shift (@_);
    52 
    53     return 0; # this is not a recursive plugin
    54 }
     59sub init {
     60    my $self = shift (@_);
     61    my ($verbosity) = @_;
     62
     63    $self->BasPlug::init();
     64
     65    # this plugin only handles ascii encodings
     66    if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {
     67    die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
     68        $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
     69    }
     70}
     71
     72# this is included only to prevent warnings being printed out
     73# from BasPlug::init. The process_exp is not used by this plugin
     74sub get_default_process_exp {
     75    my $self = shift (@_);
     76
     77    return "This plugin does not use a process_exp\n";
     78}
     79
    5580
    5681sub HB_read_html_file {
     
    6590
    6691    my $foundbody = 0;
    67     $self->HB_gettext (\$foundbody, $text, FILE);
     92    $self->HB_gettext (\$foundbody, $text, "FILE");
    6893    close FILE;
    6994
     
    7297    $foundbody = 1;
    7398    open (FILE, $htmlfile) || return;
    74     $self->HB_gettext (\$foundbody, $text, FILE);   
     99    $self->HB_gettext (\$foundbody, $text, "FILE");
    75100    close FILE;
    76101    }
     
    159184}
    160185
     186# if input_encoding is ascii we can call add_utf8_metadata
     187# directly but if it's iso_8859_1 (the default) we need to call
     188# add_metadata so that the ascii2utf8 conversion is done first
     189# this should speed things up a little if processing an ascii only
     190# document with input_encoding set to ascii
     191sub HB_add_metadata {
     192    my $self = shift (@_);
     193    my ($doc_obj, $cursection, $field, $value) = @_;
     194
     195    if ($self->{'input_encoding'} eq "ascii") {
     196    $doc_obj->add_utf8_metadata ($cursection, $field, $value);
     197    } else {
     198    $doc_obj->add_metadata ($cursection, $field, $value);
     199    }
     200}
    161201
    162202# return number of files processed, undef if can't process
     
    192232
    193233    # add metadata for top level of document
    194     foreach $field (keys(%$metadata)) {
     234    foreach my $field (keys(%$metadata)) {
    195235    # $metadata->{$field} may be an array reference
    196236    if (ref ($metadata->{$field}) eq "ARRAY") {
    197237        map {
    198         $doc_obj->add_metadata ($cursection, $field, $_);
     238        $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);
    199239        } @{$metadata->{$field}};
    200240    } else {
    201         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
     241        $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});
    202242    }
    203243    }
     
    240280
    241281        # add the metadata to this section
    242         $doc_obj->add_metadata ($cursection, "Title", $title);
     282        $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title);
    243283
    244284        # clean up the section html
     
    251291
    252292        # add the text for this section
    253         $doc_obj->add_text ($cursection, $sectiontext);
    254        
     293        if ($self->{'input_encoding'} eq "ascii") {
     294        $doc_obj->add_utf8_text ($cursection, $sectiontext);
     295        } else {
     296        $doc_obj->add_text ($cursection, $sectiontext);
     297        }
    255298    } else {
    256299        print STDERR "WARNING - leftover text\n" , $self->shorten($html),
Note: See TracChangeset for help on using the changeset viewer.