Ignore:
Timestamp:
2008-06-05T09:26:56+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: Split plug renamed to SplitTextFile, XMLPlug renamed to ReadXMLFile, ConvertToPlug renamed to ConvertBinaryFile. With the exception of BasePlugin, only 'real' plugins (top level ones) are named xxPlugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/SplitTextFile.pm

    r15865 r15871  
    11###########################################################################
    22#
    3 # SplitPlug.pm - a plugin for splitting input files into segments that
     3# SplitTextFile.pm - a plugin for splitting input files into segments that
    44#                will then be individually processed.
    55#
     
    2929
    3030
    31 # SplitPlug is a plugin for splitting input files into segments that will
     31# SplitTextFile is a plugin for splitting input files into segments that will
    3232# then be individually processed. 
    3333
     
    3535# process input files that contain several documents, you should write a
    3636# plugin with a process function that will handle one of those documents
    37 # and have it inherit from SplitPlug.  See ReferPlug for an example.
    38 
    39 
    40 package SplitPlug;
    41 
    42 use BasPlug;
     37# and have it inherit from SplitTextFile.  See ReferPlug for an example.
     38
     39
     40package SplitTextFile;
     41
     42use ReadTextFile;
    4343use gsprintf 'gsprintf';
    4444use util;
     
    4747no strict 'refs'; # allow filehandles to be variables and viceversa
    4848
    49 # SplitPlug is a sub-class of BasPlug.
     49# SplitTextFile is a sub-class of BasPlug.
    5050sub BEGIN {
    51     @SplitPlug::ISA = ('BasPlug');
     51    @SplitTextFile::ISA = ('ReadTextFile');
    5252}
    5353
     
    5555my $arguments =
    5656    [ { 'name' => "split_exp",
    57     'desc' => "{SplitPlug.split_exp}",
     57    'desc' => "{SplitTextFile.split_exp}",
    5858    'type' => "regexp",
    5959    #'deft' => &get_default_split_exp(),
     
    6161    'reqd' => "no" } ];
    6262
    63 my $options = { 'name'     => "SplitPlug",
    64         'desc'     => "{SplitPlug.desc}",
     63my $options = { 'name'     => "SplitTextFile",
     64        'desc'     => "{SplitTextFile.desc}",
    6565        'abstract' => "yes",
    6666        'inherits' => "yes",
     
    7373    push(@$pluginlist, $class);
    7474
    75     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    76     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    77 
    78     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     75    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     76    push(@{$hashArgOptLists->{"OptList"}},$options);
     77
     78    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
    7979
    8080    $self->{'textcat_store'} = {};
     
    8787    my ($verbosity, $outhandle, $failhandle) = @_;
    8888
    89     $self->BasPlug::init($verbosity, $outhandle, $failhandle);
    90 
     89    $self->ReadTextFile::init($verbosity, $outhandle, $failhandle);
     90
     91    # why is this is init and not in new??
    9192    if ((!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) {
    9293
     
    119120    my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
    120121
     122    # returns 1 if matches process_exp, and has done blocking in the meantime
    121123    my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file,
    122124                          $metadata, $extrametakeys,
     
    146148 
    147149    if ($text !~ /\w/) {
    148         gsprintf($outhandle, "$plugin_name: {BasPlug.file_has_no_text}\n",
     150        gsprintf($outhandle, "$plugin_name: {ReadTextFile.file_has_no_text}\n",
    149151             $file)
    150152        if $self->{'verbosity'};
     
    171173    }
    172174
    173     print $outhandle "SplitPlug found " . (scalar @segments) . " documents in $filename\n"
     175    print $outhandle "SplitTextFile found " . (scalar @segments) . " documents in $filename\n"
    174176        if $self->{'verbosity'};
    175177   
     
    231233    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
    232234    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    233     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     235    $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
    234236    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment");
    235237    if ($self->{'cover_image'}) {
Note: See TracChangeset for help on using the changeset viewer.