Ignore:
Timestamp:
2008-06-05T09:26:56+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: Split plug renamed to SplitTextFile, XMLPlug renamed to ReadXMLFile, ConvertToPlug renamed to ConvertBinaryFile. With the exception of BasePlugin, only 'real' plugins (top level ones) are named xxPlugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/ReadXMLFile.pm

    r15865 r15871  
    11###########################################################################
    22#
    3 # XMLPlug.pm -- base class for XML plugins
     3# ReadXMLFile.pm -- base class for XML plugins
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 package XMLPlug;
    27 
    28 use BasPlug;
     26package ReadXMLFile;
     27
     28use BasePlugin;
    2929use doc;
    3030use strict;
     
    3232
    3333sub BEGIN {
    34     @XMLPlug::ISA = ('BasPlug');
     34    @ReadXMLFile::ISA = ('BasePlugin');
    3535    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    3636}
     
    4040my $arguments =
    4141    [ { 'name' => "process_exp",
    42     'desc' => "{BasPlug.process_exp}",
     42    'desc' => "{BasePlugin.process_exp}",
    4343    'type' => "regexp",
    4444    'deft' => &get_default_process_exp(),
    4545    'reqd' => "no" },
    4646      { 'name' => "xslt",
    47     'desc' => "{XMLPlug.xslt}",
     47    'desc' => "{ReadXMLFile.xslt}",
    4848    'type' => "string",
    4949    'deft' => "",
    5050    'reqd' => "no" } ];
    5151
    52 my $options = { 'name'     => "XMLPlug",
    53         'desc'     => "{XMLPlug.desc}",
     52my $options = { 'name'     => "ReadXMLFile",
     53        'desc'     => "{ReadXMLFile.desc}",
    5454        'abstract' => "yes",
    5555        'inherits' => "yes",
     
    6161    push(@$pluginlist, $class);
    6262
    63     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    64     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    65    
    66     # $self is global for use within subroutines called by XML::Parser
    67     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     63    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     64    push(@{$hashArgOptLists->{"OptList"}},$options);
     65   
     66    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    6867
    6968    if ($self->{'info_only'}) {
    70     # don't worry about any options etc
     69    # don't worry about creating the XML parser as all we want is the
     70    # list of plugin options
    7171    return bless $self, $class;
    7272    }
    7373
    7474    my $parser = new XML::Parser('Style' => 'Stream',
    75                                  'Pkg' => 'XMLPlug',
     75                                 'Pkg' => 'ReadXMLFile',
    7676                                 'PluginObj' => $self,
    7777                 'Handlers' => {'Char' => \&Char,
     
    198198    if (defined $result) {
    199199    # we think we are processing this, but check that we actually are
    200     my $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
     200    my $filename = $self->get_full_filename($base_dir, $file);
    201201
    202202    if ($self->check_doctype($filename)) {
     
    207207}
    208208
     209# we need to implement read cos we are not just using process_exp to determine
     210# whether to process this or not.
    209211sub read {
    210212    my $self = shift (@_); 
     
    213215
    214216    # Make sure we're processing the correct file, do blocking etc
    215     my ($block_status,$filename) = $self->read_block(@_);   
     217    my ($block_status,$filename_full_path) = $self->read_block(@_);   
    216218    return $block_status if ((!defined $block_status) || ($block_status==0));
    217219
    218220    ## check the doctype to see whether we really want to process the file
    219     if (!$self->check_doctype($filename)) {
     221    if (!$self->check_doctype($filename_full_path)) {
    220222    # this file is not for us
    221223    return undef;
     
    225227    $self->{'base_dir'} = $base_dir;
    226228    $self->{'file'} = $file;
    227     $self->{'filename'} = $filename;
     229    $self->{'filename'} = $filename_full_path;
    228230    $self->{'processor'} = $processor;
    229231    $self->{'metadata'} = $metadata;
     
    233235    if (defined $xslt && ($xslt ne "")) {
    234236        # perform xslt
    235         my $transformed_xml = $self->apply_xslt($xslt,$filename);
     237        my $transformed_xml = $self->apply_xslt($xslt,$filename_full_path);
    236238
    237239        # feed transformed file (now in memory as string) into XML parser
     
    239241    }
    240242    else {
    241         $self->{'parser'}->parsefile($filename);
     243        $self->{'parser'}->parsefile($filename_full_path);
    242244    }
    243245    };
     
    246248
    247249    # parsefile may either croak somewhere in XML::Parser (e.g. because
    248     # the document is not well formed) or die somewhere in XMLPlug or a
     250    # the document is not well formed) or die somewhere in ReadXMLFile or a
    249251    # derived plugin (e.g. because we're attempting to process a
    250252    # document whose DOCTYPE is not meant for this plugin). For the
     
    271273}
    272274
    273 # the following two methods are for if you want to do the parsing from a
    274 # plugin that inherits from this. it seems that you can't call the parse
    275 # methods directly. WHY???
    276 #
    277 # [Stefan 27/5/07] These two methods may not be necessary any more as I've
    278 # fixed XMLPlug so $self is no longer required to be a global variable
    279 # (that was why inheritance wasn't working quite right with XMLPlug I
    280 # think). I don't really know what other plugins rely on these methods
    281 # though so have left them here for now.
    282 sub parse_file {
    283     my $self = shift (@_);
    284     my ($filename) = @_;
    285     $self->{'parser'}->parsefile($filename);
    286 }
    287 
    288 sub parse_string {
    289     my $self = shift (@_);
    290     my ($xml_string) = @_;
    291     $self->{'parser'}->parse($xml_string);
    292 }
    293275
    294276sub get_default_process_exp {
     
    344326
    345327    my ($expat, $name, $sysid, $pubid, $internal) = @_;
    346     die "XMLPlug Cannot process XML document with DOCTYPE of $name";
     328    die "ReadXMLFile Cannot process XML document with DOCTYPE of $name";
    347329}
    348330
     
    395377    $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc");
    396378    $self->{'doc_obj'}->set_OIDtype ($self->{'processor'}->{'OIDtype'}, $self->{'processor'}->{'OIDmetadata'});
     379    $self->{'doc_obj'}->add_utf8_metadata($self->{'doc_obj'}->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     380
     381    # do we want other auto metadata here (see BasePlugin.read_into_doc_obj)
    397382}
    398383
     
    400385    my $self = shift(@_);
    401386    my $doc_obj = $self->{'doc_obj'};
     387
     388    # do we want other auto stuff here, see BasePlugin.read_into_doc_obj
     389
    402390    # include any metadata passed in from previous plugins
    403391    # note that this metadata is associated with the top level section
     
    410398   
    411399    # add an OID
    412     $doc_obj->set_OID();
     400    $self->add_OID();
    413401   
    414402    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     
    419407   
    420408    $self->{'num_processed'} ++;
     409    undef $self->{'doc_obj'};
     410    undef $doc_obj; # is this the same as above??
    421411}
    422412
Note: See TracChangeset for help on using the changeset viewer.