Changeset 15872


Ignore:
Timestamp:
2008-06-05T09:29:32+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

Location:
gsdl/trunk/perllib/plugins
Files:
49 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/BibTexPlugin.pm

    r15864 r15872  
    11###########################################################################
    22#
    3 # BibTexPlug.pm - a plugin for bibliography records in BibTex format
     3# BibTexPlugin.pm - a plugin for bibliography records in BibTex format
    44#
    55# A component of the Greenstone digital library software
     
    2727
    2828
    29 # BibTexPlug reads bibliography files in BibTex format.
     29# BibTexPlugin reads bibliography files in BibTex format.
    3030#
    3131# by Gordon W. Paynter ([email protected]), November 2000
    3232# Based on ReferPlug.  See ReferPlug for geneology.
    3333#
    34 # BibTexPlug creates a document object for every reference a the file.
    35 # It is a subclass of SplitPlug, so if there are multiple records, all
     34# BibTexPlugin creates a document object for every reference a the file.
     35# It is a subclass of SplitTextFile, so if there are multiple records, all
    3636# are read.
    3737#
     
    4242
    4343
    44 package BibTexPlug;
    45 
    46 use SplitPlug;
     44package BibTexPlugin;
     45
     46use SplitTextFile;
    4747use strict;
    4848no strict 'refs'; # allow filehandles to be variables and viceversa
    4949
    50 # BibTexPlug is a sub-class of BasPlug.
     50# BibTexPlugin is a sub-class of SplitTextFile.
    5151sub BEGIN {
    52     @BibTexPlug::ISA = ('SplitPlug');
     52    @BibTexPlugin::ISA = ('SplitTextFile');
    5353}
    5454
    5555my $arguments =
    5656    [ { 'name' => "process_exp",
    57     'desc' => "{BasPlug.process_exp}",
     57    'desc' => "{BasePlugin.process_exp}",
    5858    'type' => "regexp",
    5959    'reqd' => "no",
    6060    'deft' => &get_default_process_exp() },
    6161      { 'name' => "split_exp",
    62     'desc' => "{SplitPlug.split_exp}",
     62    'desc' => "{SplitTextFile.split_exp}",
    6363    'type' => "regexp",
    6464    'deft' => &get_default_split_exp(),
     
    6666      ];
    6767
    68 my $options = { 'name'     => "BibTexPlug",
    69         'desc'     => "{BibTexPlug.desc}",
     68my $options = { 'name'     => "BibTexPlugin",
     69        'desc'     => "{BibTexPlugin.desc}",
    7070        'abstract' => "no",
    7171        'inherits' => "yes",
     
    8282    return q^\n+(?=@)^;
    8383}
     84
    8485sub new {
    8586    my ($class) = shift (@_);
     
    8788    push(@$pluginlist, $class);
    8889
    89     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    90     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    91 
    92     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     90    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     91    push(@{$hashArgOptLists->{"OptList"}},$options);
     92
     93    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    9394
    9495    return bless $self, $class;
     
    115116
    116117    # Report that we're processing the file
    117     print STDERR "<Processing n='$file' p='BibTexPlug'>\n" if ($gli);
    118     print $outhandle "BibTexPlug: processing $file\n"
     118    print STDERR "<Processing n='$file' p='BibTexPlugin'>\n" if ($gli);
     119    print $outhandle "BibTexPlugin: processing $file\n"
    119120    if ($self->{'verbosity'}) > 1;
    120121
     
    312313            $vonlast=shift @parts;
    313314            if (scalar(@parts) > 0) {
    314             print $outhandle "BibTexPlug: couldn't parse name $a\n";
     315            print $outhandle "BibTexPlugin: couldn't parse name $a\n";
    315316            # but we continue anyway...
    316317            }
     
    331332            # some non-English names do start with lowercase
    332333            # eg "Marie desJardins". Also we can get typos...
    333             print $outhandle "BibTexPlug: couldn't parse surname $vonlast\n";
     334            print $outhandle "BibTexPlugin: couldn't parse surname $vonlast\n";
    334335            $von="";
    335336            if ($vonlast =~ /^[a-z]+$/) {
     
    724725        my $replacement=$utf8_chars{$tex};
    725726        if (!defined($replacement)) {
    726         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
     727        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
    727728        $replacement=$char;
    728729        }
     
    737738        my $replacement=$special_utf8_chars{$tex};
    738739        if (!defined($replacement)) {
    739         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
     740        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
    740741        $replacement=$tex;
    741742        }
     
    749750          my $replacement=$special_utf8_chars{$tex};
    750751          if (!defined($replacement)) {
    751           print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
     752          print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
    752753          $replacement=$char;
    753754      }
  • gsdl/trunk/perllib/plugins/BookPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # BookPlug.pm (formally called HBSPlug) -- plugin for processing simple
     3# BookPlugin.pm (formally called HBSPlug) -- plugin for processing simple
    44# html (or text) books
    55#
     
    4040# taken as the cover image (jpg files are blocked by this plugin)
    4141
    42 # BookPlug is a simplification (and extension) of the HBPlug used
    43 # by the Humanity Library collections. BookPlug is faster as it expects
     42# BookPlugin is a simplification (and extension) of the HBPlug used
     43# by the Humanity Library collections. BookPlugin is faster as it expects
    4444# the input files to be cleaner (The input to the HDL collections
    4545# contains lots of excess html tags around <<TOC>> tags, uses <<I>>
     
    4949# use this plugin instead of HBPlug.
    5050
    51 # 12/05/02 Added usage datastructure - John Thompson
    52 
    53 package BookPlug;
    54 
    55 use BasPlug;
     51package BookPlugin;
     52
     53use AutoExtractMetadata;
    5654use util;
    5755use strict;
     
    5957
    6058sub BEGIN {
    61     @BookPlug::ISA = ('BasPlug');
     59    @BookPlugin::ISA = ('AutoExtractMetadata');
    6260}
    6361
    6462my $arguments =
    6563    [ { 'name' => "process_exp",
    66     'desc' => "{BasPlug.process_exp}",
     64    'desc' => "{BasePlugin.process_exp}",
    6765    'type' => "regexp",
    6866    'reqd' => "no",
    6967    'deft' => &get_default_process_exp() },
    7068      { 'name' => "block_exp",
    71     'desc' => "{BasPlug.block_exp}",
     69    'desc' => "{BasePlugin.block_exp}",
    7270    'type' => "regexp",
    7371    'reqd' => "no",
    7472    'deft' => &get_default_block_exp() } ];
    7573
    76 my $options = { 'name'     => "BookPlug",
    77         'desc'     => "{BookPlug.desc}",
     74my $options = { 'name'     => "BookPlugin",
     75        'desc'     => "{BookPlugin.desc}",
    7876        'abstract' => "no",
    7977        'inherits' => "yes",
     
    8583    push(@$pluginlist, $class);
    8684
    87     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    88     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    89 
    90     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     86    push(@{$hashArgOptLists->{"OptList"}},$options);
     87
     88    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists);
    9189
    9290    return bless $self, $class;
     
    111109    my $outhandle = $self->{'outhandle'};
    112110
    113     print STDERR "<Processing n='$file' p='BookPlug'>\n" if ($gli);
    114     print $outhandle "BookPlug: processing $file\n"
     111    print STDERR "<Processing n='$file' p='BookPlugin'>\n" if ($gli);
     112    print $outhandle "BookPlugin: processing $file\n"
    115113    if $self->{'verbosity'} > 1;
    116114   
     
    211209    if ($imagetype eq "jpg") {$imagetype = "jpeg";}
    212210    if ($imagetype !~ /^(jpeg|gif|png)$/) {
    213     print $outhandle "BookPlug: Warning - unknown image type ($imagetype)\n";
     211    print $outhandle "BookPlugin: Warning - unknown image type ($imagetype)\n";
    214212    }
    215213    my ($imagefile) = $link =~ /([^\/]*)$/;
     
    223221        $foundimage = 1;
    224222    } else {
    225         $error = "BookPlug: Warning - couldn't find image file $imagefile in either $filename or";
     223        $error = "BookPlugin: Warning - couldn't find image file $imagefile in either $filename or";
    226224    }
    227225    }
     
    235233        print $outhandle "$error $filename\n";
    236234    } else {
    237         print $outhandle "BookPlug: Warning - couldn't find image file $imagefile in $filename\n";
     235        print $outhandle "BookPlugin: Warning - couldn't find image file $imagefile in $filename\n";
    238236    }
    239237    }
  • gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # CONTENTdmPlug.pm -- reasonably with-it pdf plugin
     3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2323#
    2424###########################################################################
    25 package CONTENTdmPlug;
    26 
    27 use ConvertToPlug;
     25package CONTENTdmPlugin;
     26
     27use ConvertBinaryFile;
     28use ReadXMLFile;
     29
    2830use unicode;
    2931use ghtml;
     
    3537use XMLParser;
    3638
     39# inherit ReadXMLFile for the apply_xslt method
    3740sub BEGIN {
    38     @CONTENTdmPlug::ISA = ('ConvertToPlug');
     41    @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
    3942}
    4043
     
    4245my $convert_to_list =
    4346    [ { 'name' => "auto",
    44     'desc' => "{ConvertToPlug.convert_to.auto}" },
     47    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    4548      { 'name' => "html",
    46     'desc' => "{ConvertToPlug.convert_to.html}" },
     49    'desc' => "{ConvertBinaryFile.convert_to.html}" },
    4750      { 'name' => "text",
    48     'desc' => "{ConvertToPlug.convert_to.text}" },
     51    'desc' => "{ConvertBinaryFile.convert_to.text}" },
    4952      { 'name' => "pagedimg",
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg}"},
     53    'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
    5154      ];
    5255
     
    5659      [
    5760       { 'name' => "convert_to",
    58     'desc' => "{ConvertToPlug.convert_to}",
     61    'desc' => "{ConvertBinaryFile.convert_to}",
    5962    'type' => "enum",
    6063    'reqd' => "yes",
     
    6265    'deft' => "html" }, 
    6366      { 'name' => "xslt",
    64     'desc' => "{XMLPlug.xslt}",
     67    'desc' => "{ReadXMLFile.xslt}",
    6568    'type' => "string",
    6669    'deft' => "",
    6770    'reqd' => "no" },
    6871       { 'name' => "process_exp",
    69     'desc' => "{BasPlug.process_exp}",
     72    'desc' => "{BasePlugin.process_exp}",
    7073    'type' => "regexp",
    7174    'deft' => &get_default_process_exp(),
    7275    'reqd' => "no" },
    7376      { 'name' => "block_exp",
    74     'desc' => "{BasPlug.block_exp}",
     77    'desc' => "{BasePlugin.block_exp}",
    7578    'type' => "regexp",
    7679    'deft' => &get_default_block_exp() }
    7780];
    7881
    79 my $options = { 'name'     => "CONTENTdmPlug",
    80         'desc'     => "{CONTENTdmPlug.desc}",
     82my $options = { 'name'     => "CONTENTdmPlugin",
     83        'desc'     => "{CONTENTdmPlugin.desc}",
    8184        'abstract' => "no",
    8285        'inherits' => "yes",
    83         # CONTENTdmPlug is one of the few ConvertToPlug subclasses whose source doc can't be replaced by a GS-generated html
     86        # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
    8487        'srcreplaceable' => "no",
    8588        'args'     => $arguments };
    86 
    87 our ($self);
    8889
    8990sub new {
     
    9596    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    9697
    97     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    98     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     98    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     99    push(@{$hashArgOptLists->{"OptList"}},$options);
    99100
    100101    my @arg_array = @$inputargs;
    101     $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
     102    my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
    102103   
    103104    if ($self->{'info_only'}) {
     
    107108
    108109    my $parser = new XML::Parser('Style' => 'Stream',
     110                 'Pkg' => 'CONTENTdmPlugin',
     111                 'PluginObj' => $self,
    109112                 'Handlers' => {'Char' => \&Char,
    110113                        'XMLDecl' => \&XMLDecl,
     
    119122    $self->{'metadata_value'} = undef;
    120123
    121     $self->{'convert_to'} = "PagedImg";
     124    $self->{'convert_to'} = "PagedImage";
    122125    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    123126
    124     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
    125     $secondary_plugin_options->{'PagedImgPlug'} = [];
    126     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
    127     push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    128     push(@$pagedimg_options, "-thumbnail", "-screenview");
    129     }
     127    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
     128    $secondary_plugin_options->{'PagedImagePlugin'} = [];
     129    }
     130    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
     131    push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     132    push(@$pagedimg_options, "-thumbnail", "-screenview");
     133
    130134
    131135    $self = bless $self, $class;
     
    140144}
    141145
    142 # so we don't inherit HTMLPlug's block exp...
    143146sub get_default_block_exp {
    144147    return q^(?i)\.(jpg|jpeg|gif)$^;
    145 }
    146 
    147 
    148 
    149 
    150 # A smarter (?) option would be to add XMLPlug into inheritence above
    151 # thereby avoiding a fair amount of code duplication
    152 
    153 sub apply_xslt
    154 {
    155     my $self = shift @_;
    156     my ($xslt,$filename) = @_;
    157    
    158     my $outhandle = $self->{'outhandle'};
    159 
    160     my $xslt_filename = $xslt;
    161 
    162     if (! -e $xslt_filename) {
    163     # Look in main site directory
    164     my $gsdlhome = $ENV{'GSDLHOME'};
    165     $xslt_filename = &util::filename_cat($gsdlhome,$xslt);
    166     }
    167 
    168     if (! -e $xslt_filename) {
    169     # Look in collection directory
    170     my $coldir = $ENV{'GSDLCOLLECTDIR'};
    171     $xslt_filename = &util::filename_cat($coldir,$xslt);
    172     }
    173 
    174     if (! -e $xslt_filename) {
    175     print $outhandle "Warning: Unable to find XSLT $xslt\n";
    176     if (open(XMLIN,"<$filename")) {
    177 
    178         my $untransformed_xml = "";
    179         while (defined (my $line = <XMLIN>)) {
    180 
    181         $untransformed_xml .= $line;
    182         }
    183         close(XMLIN);
    184        
    185         return $untransformed_xml;
    186     }
    187     else {
    188         print $outhandle "Error: Unable to open file $filename\n";
    189         print $outhandle "       $!\n";
    190         return "";
    191     }
    192    
    193     }
    194 
    195     my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");
    196     my $jar_filename = &util::filename_cat($bin_java,"xalan.jar");
    197     my $xslt_base_cmd = "java -jar $jar_filename";
    198     my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\"";
    199 
    200     my $transformed_xml = "";
    201 
    202     if (open(XSLT_IN,"$xslt_cmd |")) {
    203     while (defined (my $line = <XSLT_IN>)) {
    204 
    205         $transformed_xml .= $line;
    206     }
    207     close(XSLT_IN);
    208     }
    209     else {
    210     print $outhandle "Error: Unable to run command $xslt_cmd\n";
    211     print $outhandle "       $!\n";
    212     }
    213 
    214     return $transformed_xml;
    215 
    216148}
    217149
     
    523455
    524456
    525 # Override ConvertToPlug tmp_area_convert_file() to provide solution specific
     457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
    526458# to CONTENTdm
    527459#
     
    612544    $self->{'converted_to'} = "HTML";
    613545    } elsif ($output_type =~ /te?xt/i) {
    614     $self->{'converted_to'} = "TEXT";
     546    $self->{'converted_to'} = "Text";
    615547    } elsif ($output_type =~ /item/i){
    616     $self->{'converted_to'} = "PagedImg";
     548    $self->{'converted_to'} = "PagedImage";
    617549    }
    618550
     
    624556
    625557
    626 # Override ConvertToPlug read
     558# Override ConvertBinaryFile read
    627559# Needed so multiple .item files generate are sent down secondary plugin
    628560
     
    640572    my ($block_status,$filename) = $self->read_block(@_);
    641573    return $block_status if ((!defined $block_status) || ($block_status==0));
    642     $file = $self->read_tidy_file($file);
    643    
    644     # read() deviates at this point from ConvertToPlug
     574    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     575       
     576    # read() deviates at this point from ConvertBinaryFile
    645577    # Need to work with list of filename returned
    646578
     
    693625   
    694626    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    695     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     627    $self->set_Source_metadata($doc_obj, $filemeta);
    696628    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    697629    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
     
    713645
    714646    # add an OID
    715     $doc_obj->set_OID();
     647    $self->add_OID($doc_obj);
    716648    # process the document
    717649    $processor->process($doc_obj);
     
    723655}
    724656
    725 
    726 
    727657sub process {
    728     my $self = shift (@_);
    729     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     658
     659}
     660# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
     661sub process_old {
     662    my $self = shift (@_);
     663    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    730664
    731665   
     
    741675}
    742676
    743 
    744 
    745 
    746 sub StartDocument {$self->xml_start_document(@_);}
    747 sub XMLDecl {$self->xml_xmldecl(@_);}
    748 sub Entity {$self->xml_entity(@_);}
    749 sub Doctype {$self->xml_doctype(@_);}
    750 sub StartTag {$self->xml_start_tag(@_);}
    751 sub EndTag {$self->xml_end_tag(@_);}
    752 sub Text {$self->xml_text(@_);}
    753 sub PI {$self->xml_pi(@_);}
    754 sub EndDocument {$self->xml_end_document(@_);}
    755 sub Default {$self->xml_default(@_);}
    756 
    757 # This Char function overrides the one in XML::Parser::Stream to overcome a
    758 # problem where $expat->{Text} is treated as the return value, slowing
    759 # things down significantly in some cases.
    760 sub Char {
    761     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
    762     $_[0]->{'Text'} .= $_[1];
    763     return undef;
    764 }
    765677
    766678# Called at the beginning of the XML document.
     
    772684}
    773685
    774 # Called for XML declarations
    775 sub xml_xmldecl {
    776     my $self = shift(@_);
    777     my ($expat, $version, $encoding, $standalone) = @_;
    778 }
    779 
    780 # Called for XML entities
    781 sub xml_entity {
    782   my $self = shift(@_);
    783   my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;
    784 }
    785686
    786687# Called for DOCTYPE declarations - use die to bail out if this doctype
     
    793694
    794695    my $outhandle = $self->{'outhandle'};
    795     print $outhandle "CONTENTdmPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     696    print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    796697
    797698}
     
    873774}
    874775
    875 # Called for processing instructions. The $_ variable will contain a copy
    876 # of the pi.
    877 sub xml_pi {
    878     my $self = shift(@_);
    879     my ($expat, $target, $data) = @_;
    880 }
    881 
    882776# Called at the end of the XML document.
    883777sub xml_end_document {
     
    887781}
    888782
    889 # Called for any characters not handled by the above functions.
    890 sub xml_default {
    891     my $self = shift(@_);
    892     my ($expat, $text) = @_;
    893 }
    894 
    895783
    8967841;
  • gsdl/trunk/perllib/plugins/CSVPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # CSVPlug.pm -- A plugin for files in comma-separated value format
     3# CSVPlugin.pm -- A plugin for files in comma-separated value format
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 package CSVPlug;
     27package CSVPlugin;
    2828
    2929
    30 use SplitPlug;
     30use SplitTextFile;
    3131use strict;
    3232no strict 'refs'; # allow filehandles to be variables and viceversa
    3333
    3434
    35 # CSVPlug is a sub-class of SplitPlug.
     35# CSVPlugin is a sub-class of SplitTextFile.
    3636sub BEGIN {
    37     @CSVPlug::ISA = ('SplitPlug');
     37    @CSVPlugin::ISA = ('SplitTextFile');
    3838}
    3939
     
    4141my $arguments =
    4242    [ { 'name' => "process_exp",
    43     'desc' => "{BasPlug.process_exp}",
     43    'desc' => "{BasePlugin.process_exp}",
    4444    'type' => "regexp",
    4545    'reqd' => "no",
    4646    'deft' => &get_default_process_exp() },
    4747      { 'name' => "split_exp",
    48     'desc' => "{SplitPlug.split_exp}",
     48    'desc' => "{SplitTextFile.split_exp}",
    4949    'type' => "regexp",
    5050    'reqd' => "no",
     
    5454
    5555
    56 my $options = { 'name'     => "CSVPlug",
    57         'desc'     => "{CSVPlug.desc}",
     56my $options = { 'name'     => "CSVPlugin",
     57        'desc'     => "{CSVPlugin.desc}",
    5858        'abstract' => "no",
    5959        'inherits' => "yes",
     
    8080    push(@$pluginlist, $class);
    8181
    82     if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});}
    83     if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options)};
     82    push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});
     83    push(@{$hashArgOptLists->{"OptList"}}, $options);
    8484
    85     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     85    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    8686
    8787    return bless $self, $class;
     
    9898    open(FILE, $filename);
    9999    my $reader = new multiread();
    100     $reader->set_handle('CSVPlug::FILE');
     100    $reader->set_handle('CSVPlugin::FILE');
    101101    $reader->set_encoding($encoding);
    102102    $reader->read_file($textref);
     
    144144
    145145    # Report that we're processing the file
    146     print STDERR "\n<Processing n='$file' p='CSVPlug'>\n" if ($gli);
    147     print $outhandle "CSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
     146    print STDERR "\n<Processing n='$file' p='CSVPlugin'>\n" if ($gli);
     147    print $outhandle "CSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
    148148
    149149    # Add the raw line as the document text
  • gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ConvertToRogPlug.pm -- plugin that inherits from RogPlug
     3# ConvertToRogPlugin.pm -- plugin that inherits from RogPlugin
    44#
    55# A component of the Greenstone digital library software
     
    2626
    2727
    28 package ConvertToRogPlug;
    29 
    30 use BasPlug;
    31 use RogPlug;
     28package ConvertToRogPlugin;
     29
     30use RogPlugin;
    3231use strict;
    3332no strict 'refs'; # allow filehandles to be variables and viceversa
    3433
    3534sub BEGIN {
    36     @ConvertToRogPlug::ISA = ('RogPlug');
     35    @ConvertToRogPlugin::ISA = ('RogPlugin');
    3736}
    3837
    3938my $arguments = [
    4039         ];
    41 my $options = { 'name'     => "ConvertToRogPlug",
    42         'desc'     => "{ConvertToRogPlug.desc}",
     40my $options = { 'name'     => "ConvertToRogPlugin",
     41        'desc'     => "{ConvertToRogPlugin.desc}",
    4342        'abstract' => "yes",
    4443        'inherits' => "yes" };
     
    4948    push(@$pluginlist, $class);
    5049
    51     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    52     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    53 
    54     my $self = new RogPlug($pluginlist, $inputargs, $hashArgOptLists);
     50    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     51    push(@{$hashArgOptLists->{"OptList"}},$options);
     52
     53    my $self = new RogPlugin($pluginlist, $inputargs, $hashArgOptLists);
    5554
    5655    $self->{'convert_to'} = "Rog";
     
    171170
    172171
    173 # Exact copy of read_rog_record from RogPlug
     172# Exact copy of read_rog_record from RogPlugin
    174173# Needed for FILE in right scope
    175174
     
    271270}
    272271
    273 # Override RogPlug function so rog files are stored as sections (not docs)
     272# Override RogPlugin function so rog files are stored as sections (not docs)
    274273
    275274sub process_rog_record
     
    307306
    308307
    309 # Override BasPlug read
     308# Override BasePlugin read
    310309# We don't want to get language encoding stuff until after we've converted
    311310# our file to Rog format
     
    347346    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    348347    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    349     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     348    $self->set_Source_metadata($doc_obj, $filemeta);
     349   
    350350    if ($self->{'cover_image'}) {
    351351    $self->associate_cover_image($doc_obj, $filename);
     
    436436    my $ret_val = 1;   
    437437
    438 #   $ret_val = &RogPlug::process($self, $textref, $pluginfo,
     438#   $ret_val = &RogPlugin::process($self, $textref, $pluginfo,
    439439#                $tmp_dirname, $tmp_tailname,
    440440#                $metadata, $doc_obj);
  • gsdl/trunk/perllib/plugins/DBPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # DBPlug.pm -- plugin to import records from a database
     3# DBPlugin.pm -- plugin to import records from a database
    44#
    55# A component of the Greenstone digital library software
     
    3434# Mar, Apr 2003
    3535
    36 package DBPlug;
     36package DBPlugin;
    3737
    3838use strict;
    3939no strict 'refs'; # allow variable as a filehandle
    4040
    41 use BasPlug;
     41use AutoExtractMetadata;
    4242use unicode;
    4343
    44 #use DBI; # database independent stuff
    45 
    4644sub BEGIN {
    47     @DBPlug::ISA = ('BasPlug');
     45    @DBPlugin::ISA = ('AutoExtractMetadata');
    4846}
    4947
    5048my $arguments =
    5149    [ { 'name' => "process_exp",
    52     'desc' => "{BasPlug.process_exp}",
     50    'desc' => "{AutoExtractMetadata.process_exp}",
    5351    'type' => "regexp",
    5452    'deft' => &get_default_process_exp(),
    5553    'reqd' => "no" }];
    5654
    57 my $options = { 'name'     => "DBPlug",
    58         'desc'     => "{DBPlug.desc}",
     55my $options = { 'name'     => "DBPlugin",
     56        'desc'     => "{DBPlugin.desc}",
    5957        'abstract' => "no",
    6058        'inherits' => "yes",
     
    6664    push(@$pluginlist, $class);
    6765
    68     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    69     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    70 
    71     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     66    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     67    push(@{$hashArgOptLists->{"OptList"}},$options);
     68
     69    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists);
    7270
    7371    return bless $self, $class;
     
    7876
    7977    return q^(?i)\.dbi$^;
    80 }
    81 # we don't have a per-greenstone document process() function!
    82 sub process {
    83 
    8478}
    8579
     
    9589    my $verbosity = $self->{'verbosity'};
    9690
    97     print $outhandle "DBPlug: processing $file\n"
     91    print $outhandle "DBPlugin: processing $file\n"
    9892    if $self->{'verbosity'} > 1;
    9993   
     
    114108    my $db=undef;
    115109
    116 # get id of pages from "nonempty", get latest version number from "recent", and
    117 # then get pagename from "page" and content from "version" !
     110    # get id of pages from "nonempty", get latest version number from
     111    # "recent", and then get pagename from "page" and content from "version" !
    118112
    119113    my $sql_query_prime = undef ;
     
    126120    # read in config file.
    127121    if (!open (CONF, $filename)) {
    128         print $outhandle "DBPlug: can't read $filename: $!\n";
     122        print $outhandle "DBPlugin: can't read $filename: $!\n";
    129123        return 0;
    130124    }
     
    145139        $callback =~ /[\`]|\|\-/) {
    146140        # no backticks or functions that start new processes allowed
    147         print $outhandle "DBPlug: bad function in callback\n";
     141        print $outhandle "DBPlugin: bad function in callback\n";
    148142        return 0;
    149143        }
     
    152146        my $ret = eval "\$callbacks{'$fieldname'} = $callback ; 1";
    153147        if (!defined($ret)) {
    154         print $outhandle "DBPlug: error eval'ing callback: $@\n";
     148        print $outhandle "DBPlugin: error eval'ing callback: $@\n";
    155149        exit(1);
    156150        }
    157151        $callback="";
    158         print $outhandle "DBPlug: callback registered for '$fieldname'\n"
     152        print $outhandle "DBPlugin: callback registered for '$fieldname'\n"
    159153            if $dbplug_debug;
    160154    } elsif ($callback) {
     
    176170            chomp $err;
    177171            $err =~ s/\.$//; # remove a trailing .
    178             print $outhandle "DBPlug: error evaluating `$statement'\n";
     172            print $outhandle "DBPlugin: error evaluating `$statement'\n";
    179173            print $outhandle " $err (in $filename)\n";
    180174            return 0; # there was an error reading the config file
     
    185179        $statement = "";
    186180        } else {
    187         print $outhandle "DBPlug: skipping statement `$statement'\n";
     181        print $outhandle "DBPlugin: skipping statement `$statement'\n";
    188182        }
    189183        $statement = "";
     
    194188   
    195189    if (!defined($db)) {
    196     print $outhandle "DBPlug: error: $filename does not specify a db!\n";
     190    print $outhandle "DBPlugin: error: $filename does not specify a db!\n";
    197191    return 0;
    198192    }
    199193    if (!defined($sql_query)) {
    200         print $outhandle "DBPlug: error: no SQL query specified!\n";
     194        print $outhandle "DBPlugin: error: no SQL query specified!\n";
    201195    return 0;
    202196    }
     
    205199
    206200    if (!defined($dbhandle)) {
    207     die "DBPlug: could not connect to database, exiting.\n";
     201    die "DBPlugin: could not connect to database, exiting.\n";
    208202    }
    209203    if (defined($dbplug_debug) && $dbplug_debug==1) {
    210     print $outhandle "DBPlug (debug): connected ok\n";
     204    print $outhandle "DBPlugin (debug): connected ok\n";
    211205    }
    212206
     
    238232    if (defined($db_to_greenstone_fields{$fieldname})) {
    239233        if (defined($dbplug_debug) && $dbplug_debug==1) {
    240         print $outhandle "DBPlug (debug): mapping db field "
     234        print $outhandle "DBPlugin (debug): mapping db field "
    241235            . "'$fieldname' to "
    242236            . $db_to_greenstone_fields{$fieldname} . "\n";
     
    255249    while (scalar(@row_array)) {
    256250    if (defined($dbplug_debug) && $dbplug_debug==1) {
    257         print $outhandle "DBPlug (debug): retrieved a row from query\n";
     251        print $outhandle "DBPlugin (debug): retrieved a row from query\n";
    258252    }
    259253
     
    263257    my $cursection = $doc_obj->get_top_section();
    264258
    265     # if $language not set in config file, will use BasPlug's default
     259    # if $language not set in config file, will use BasePlugin's default
    266260    if (defined($language)) {
    267261        $doc_obj->add_utf8_metadata($cursection, "Language", $language);
    268262    }
    269     # if $encoding not set in config file, will use BasPlug's default
     263    # if $encoding not set in config file, will use BasePlugin's default
    270264    if (defined($encoding)) {
    271265        # allow some common aliases
     
    274268        $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding);
    275269    }
    276     $doc_obj->add_utf8_metadata($cursection,
    277                     "Source", &ghtml::dmsafe($db));
     270    $self->set_Source_metadata($doc_obj, $db, $encoding);
     271
    278272    if ($self->{'cover_image'}) {
    279273        $self->associate_cover_image($doc_obj, $filename);
     
    358352    # check "$sth->err" if empty array for error
    359353    if ($statement_hand->err) {
    360     print $outhandle "DBPlug: received error: \"" .
     354    print $outhandle "DBPlugin: received error: \"" .
    361355        $statement_hand->errstr . "\"\n";
    362356    }
     
    370364
    371365    if (defined($dbplug_debug) && $dbplug_debug==1) {
    372         print $outhandle "DBPlug: imported $count DB records as documents.\n";
     366        print $outhandle "DBPlugin: imported $count DB records as documents.\n";
    373367    }
    374368    $count;
  • gsdl/trunk/perllib/plugins/DSpacePlugin.pm

    r15865 r15872  
    22###########################################################################
    33#
    4 # DSpacePlug.pm -- plugin for import the collection from DSpace
     4# DSpacePlugin.pm -- plugin for importing a collection from DSpace
    55#
    66# A component of the Greenstone digital library software
     
    88# University of Waikato, New Zealand.
    99#
    10 # Copyright (C) 1999 New Zealand Digital Library Project
     10# Copyright (C) 2004 New Zealand Digital Library Project
    1111#
    1212# This program is free software; you can redistribute it and/or modify
     
    2626###########################################################################
    2727
    28 # DSpace Plug - 10/2004
    29 #
    30 #
     28
    3129# This plugin takes "contents" and dublin_core.xml file, which contain
    3230# Metadata and lists of associated files for a particular document
     
    4745#
    4846
    49 package DSpacePlug;
    50 
    51 use BasPlug;
     47package DSpacePlugin;
     48
     49use BasePlugin;
    5250use plugin;
    53 #use ghtml;
    5451use XMLParser;
    5552use strict;
     
    5754
    5855sub BEGIN {
    59     @DSpacePlug::ISA = ('BasPlug');
     56    @DSpacePlugin::ISA = ('BasePlugin');
    6057}
    6158
    6259my $arguments =
    6360    [ { 'name' => "process_exp",
    64     'desc' => "{BasPlug.process_exp}",
     61    'desc' => "{BasePlugin.process_exp}",
    6562    'type' => "string",
    6663    'deft' => &get_default_process_exp(),
    6764    'reqd' => "no" },
    6865      { 'name' => "only_first_doc",
    69     'desc' => "{DSpacePlug.only_first_doc}",
     66    'desc' => "{DSpacePlugin.only_first_doc}",
    7067    'type' => "flag",
    7168    'reqd' => "no" },
    7269      { 'name' => "first_inorder_ext",
    73     'desc' => "{DSpacePlug.first_inorder_ext}",
     70    'desc' => "{DSpacePlugin.first_inorder_ext}",
    7471    'type' => "string",
    7572    'reqd' => "no" },
    7673      { 'name' => "first_inorder_mime",
    77     'desc' => "{DSpacePlug.first_inorder_mime}",
     74    'desc' => "{DSpacePlugin.first_inorder_mime}",
    7875    'type' => "flag",
    7976    'reqd' => "no" },
    8077      { 'name' => "block_exp",
    81     'desc' => "{BasPlug.block_exp}",
     78    'desc' => "{BasePlugin.block_exp}",
    8279    'type' => "regexp",
    8380    'deft' => &get_default_block_exp(),
     
    8582
    8683
    87 my $options = { 'name'     => "DSpacePlug",
    88         'desc'     => "{DSpacePlug.desc}",
     84my $options = { 'name'     => "DSpacePlugin",
     85        'desc'     => "{DSpacePlugin.desc}",
    8986        'inherits' => "yes",
    9087        'abstract' => "no",
     
    104101    push(@$pluginlist, $class);
    105102
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    108 
    109     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
    110    
     103    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     104    push(@{$hashArgOptLists->{"OptList"}},$options);
     105
     106    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     107   
     108    if ($self->{'info_only'}) {
     109    # don't worry about creating the XML parser as all we want is the
     110    # list of plugin options
     111    return bless $self, $class;
     112    }
     113
    111114    #create XML::Parser object for parsing dublin_core.xml files
    112115    my $parser = new XML::Parser('Style' => 'Stream',
     
    252255    }
    253256   
    254     print $outhandle "DSpacePlug: extracting metadata from $file\n"
     257    print $outhandle "DSpacePlugin: extracting metadata from $file\n"
    255258    if $self->{'verbosity'} > 1;
    256259   
     
    262265   
    263266    if ($@) {
    264     die "DSpacePlug: ERROR $filename is not a well formed dublin_core.xml file ($@)\n";
     267    die "DSpacePlugin: ERROR $filename is not a well formed dublin_core.xml file ($@)\n";
    265268    }
    266269
     
    287290
    288291    # Temporarily store associate file info in metadata table
    289     # This will be removed in 'extra_metadata' in BasPlug and used
     292    # This will be removed in 'extra_metadata' in BasePlugin and used
    290293    # to perform the actual file association (once the doc obj has
    291294    # been formed
     
    313316
    314317
    315 # The DSpacePlug read() function. This function does all the right things
    316 # to make general options work for a given plugin. It calls the process()
    317 # function which does all the work specific to a plugin (like the old
    318 # read functions used to do). Most plugins should define their own
    319 # process() function and let this read() function keep control.
    320 #
    321 # DSpace overrides read() because there is no need to read the actual
    322 # text of the file in, because the contents of the file is not text...
    323 #
    324 # Return number of files processed, undef if can't process
    325 # Note that $base_dir might be "" and that $file might
    326 # include directories
    327 
     318# The DSpacePlugin read() function. We are not actually reading any documents
     319# here, just blocking ones that have been processed by metadata read.
     320#
     321# Returns 0 for a file its blocking, undef for any other
    328322sub read {
    329323    my $self = shift (@_);
     
    340334    return 0 if (defined $self->{'extra_blocks'}->{$filename});
    341335    return undef;
    342 }
    343 
    344 # do plugin specific processing of doc_obj
    345 sub process {
    346     my $self = shift (@_);
    347     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    348     my $outhandle = $self->{'outhandle'};
    349    
    350     return 1;
    351336}
    352337
  • gsdl/trunk/perllib/plugins/EmailPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # EMAILPlug.pm - a plugin for parsing email files
     3# EmailPlugin.pm - a plugin for parsing email files
    44#
    55# A component of the Greenstone digital library software
     
    2727
    2828
    29 # EMAILPlug
     29# EmailPlugin
    3030#
    3131# by Gordon Paynter ([email protected])
     
    6363
    6464# 12/05/02 Added usage datastructure - John Thompson
    65 package EMAILPlug;
     65package EmailPlugin;
    6666
    6767use strict;
     
    6969
    7070
    71 use SplitPlug;
     71use SplitTextFile;
    7272use unicode;  # gs conv functions
    7373use gsprintf 'gsprintf'; # translations
     
    7777
    7878sub BEGIN {
    79     @EMAILPlug::ISA = ('SplitPlug');
     79    @EmailPlugin::ISA = ('SplitTextFile');
    8080}
    8181
     
    8383my $arguments =
    8484    [ { 'name' => "process_exp",
    85     'desc' => "{BasPlug.process_exp}",
     85    'desc' => "{BasePlugin.process_exp}",
    8686    'type' => "regexp",
    8787    'reqd' => "no",
    8888    'deft' => &get_default_process_exp() },
    8989      { 'name' => "no_attachments",
    90     'desc' => "{EMAILPlug.no_attachments}",
     90    'desc' => "{EmailPlugin.no_attachments}",
    9191    'type' => "flag",
    9292    'reqd' => "no" },
    9393      { 'name' => "headers",
    94     'desc' => "{EMAILPlug.headers}",
     94    'desc' => "{EmailPlugin.headers}",
    9595    'type' => "flag",
    9696    'reqd' => "no" },
    9797      { 'name' => "split_exp",
    98     'desc' => "{EMAILPlug.split_exp}",
     98    'desc' => "{EmailPlugin.split_exp}",
    9999    'type' => "regexp",
    100100    'reqd' => "no",
     
    102102      ];
    103103
    104 my $options = { 'name'     => "EMAILPlug",
    105         'desc'     => "{EMAILPlug.desc}",
     104my $options = { 'name'     => "EmailPlugin",
     105        'desc'     => "{EmailPlugin.desc}",
    106106        'abstract' => "no",
    107107        'inherits' => "yes",
    108108        'args'     => $arguments };
    109109
    110 # Create a new EMAILPlug object with which to parse a file.
    111 # Accomplished by creating a new BasPlug and using bless to
    112 # turn it into an EMAILPlug.
     110# Create a new EmailPlugin object with which to parse a file.
     111# Accomplished by creating a new BasePlugin and using bless to
     112# turn it into an EmailPlugin.
    113113
    114114sub new {
     
    117117    push(@$pluginlist, $class);
    118118
    119     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    120     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    121 
    122     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     119    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     120    push(@{$hashArgOptLists->{"OptList"}},$options);
     121
     122    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    123123
    124124    $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber
     
    166166
    167167
    168     print STDERR "<Processing n='$file' p='EMAILPlug'>\n" if ($gli);
    169 
    170     gsprintf($outhandle, "EMAILPlug: {common.processing} $file\n")
     168    print STDERR "<Processing n='$file' p='EmailPlugin'>\n" if ($gli);
     169
     170    gsprintf($outhandle, "EmailPlugin: {common.processing} $file\n")
    171171    if $self->{'verbosity'} > 1;
    172172
     
    524524        }
    525525    } else {
    526         print $outhandle "EMAILPlug: (warning) couldn't parse MIME boundary\n";
     526        print $outhandle "EmailPlugin: (warning) couldn't parse MIME boundary\n";
    527527    }
    528528    # parts start with "--$boundary"
     
    540540    # make sure it is only -- and whitespace
    541541    if ($last !~ /^\-\-\s*$/ms) {
    542         print $outhandle "EMAILPlug: (warning) last part of MIME message isn't empty\n";
     542        print $outhandle "EmailPlugin: (warning) last part of MIME message isn't empty\n";
    543543    }
    544544    foreach my $message_part (@message_parts) {
     
    579579        # or it was an empty message...
    580580        # do nothing...
    581         gsprintf($outhandle, "{BasPlug.empty_file} - empty body?\n");
     581        gsprintf($outhandle, "{BasePlugin.empty_file} - empty body?\n");
    582582        } else {
    583583        $text = $part_text;
     
    814814        }
    815815        open (SAVE, ">$tmpdir/$save_filename") ||
    816         warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
     816        warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!";
    817817        my $part_text = $message_part;
    818818        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
     
    834834#           &util::rm("$tmpdir/$save_filename");
    835835        my $outhandle=$self->{'outhandle'};
    836         print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; #
     836        print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; #
    837837       
    838838        # be nice if "download" was a translatable macro :(
     
    905905        # rfc2045 also allows binary, which we ignore (for now).
    906906        my $outhandle=$self->{'outhandle'};
    907         print $outhandle "EMAILPlug: unknown transfer encoding: $encoding\n";
     907        print $outhandle "EmailPlugin: unknown transfer encoding: $encoding\n";
    908908        return "";
    909909    }
     
    10671067      if ($badbytesfound==1) {
    10681068          # claims to be utf8, but it isn't!
    1069           print $outhandle "EMAILPlug: Headers claim utf-8 but bad bytes "
     1069          print $outhandle "EmailPlugin: Headers claim utf-8 but bad bytes "
    10701070          . "detected and removed.\n";
    10711071
     
    10921092      # 1252 has characters between 0x80 and 0x9f, 8859-1 doesn't
    10931093      if ($$textref =~ m/[\x80-\x9f]/) {
    1094       print $outhandle "EMAILPlug: Headers claim ISO charset but MS ";
     1094      print $outhandle "EmailPlugin: Headers claim ISO charset but MS ";
    10951095      print $outhandle "codepage 1252 detected.\n";
    10961096      $charset = "windows_1252";
     
    11061106      # characters out here if this causes problems...
    11071107      my $outhandle=$self->{'outhandle'};
    1108       print $outhandle "EMAILPlug: falling back to iso-8859-1\n";
     1108      print $outhandle "EmailPlugin: falling back to iso-8859-1\n";
    11091109      $$textref=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1",$textref));
    11101110
  • gsdl/trunk/perllib/plugins/ExcelPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ExcelPlug.pm -- plugin for importing Microsoft Excel files.
     3# ExcelPlugin.pm -- plugin for importing Microsoft Excel files.
    44#  (currently only versions 95 and 97)
    55#
     
    2626###########################################################################
    2727
    28 package ExcelPlug;
     28package ExcelPlugin;
    2929
    30 use ConvertToPlug;
     30use ConvertBinaryFile;
    3131use strict;
    3232no strict 'refs'; # allow filehandles to be variables and viceversa
    3333
    3434sub BEGIN {
    35     @ExcelPlug::ISA = ('ConvertToPlug');
     35    @ExcelPlugin::ISA = ('ConvertBinaryFile');
    3636}
    3737
    3838my $arguments =
    3939    [ { 'name' => "process_exp",
    40     'desc' => "{BasPlug.process_exp}",
     40    'desc' => "{BasePlugin.process_exp}",
    4141    'type' => "regexp",
    4242    'reqd' => "no",
     
    4444      ];
    4545
    46 my $options = { 'name'     => "ExcelPlug",
    47         'desc'     => "{ExcelPlug.desc}",
     46my $options = { 'name'     => "ExcelPlugin",
     47        'desc'     => "{ExcelPlugin.desc}",
    4848        'abstract' => "no",
    4949        'inherits' => "yes",
     
    5959    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    6060   
    61     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     61    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    6262
    6363    if ($self->{'info_only'}) {
     
    6666    }
    6767
     68    $self->{'filename_extension'} = "xls";
     69    $self->{'file_type'} = "Excel";
     70
    6871    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    69     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
    70     $secondary_plugin_options->{'HTMLPlug'} = [];
     72    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
     73    $secondary_plugin_options->{'HTMLPlugin'} = [];
    7174    }
    72     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     75    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
    7376   
    74     #$self->{'input_encoding'} = "utf8";
    75     #$self->{'extract_language'} = 1;
    7677    push(@$html_options, "-input_encoding", "utf8");
    7778    push(@$html_options,"-extract_language") if $self->{'extract_language'};
     
    8283}
    8384
    84 sub convert_post_process
     85sub convert_post_process_old
    8586{
    8687    my $self = shift (@_);
     
    107108}
    108109   
    109 sub process {
    110     my $self = shift (@_);
    111     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    112 
    113     return $self->process_type("xls",$base_dir,$file,$doc_obj);
    114 }
    115110
    1161111;
  • gsdl/trunk/perllib/plugins/FOXPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # FOXPlug.pm
     3# FOXPlugin.pm
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2929# the appropriate fields in the file.
    3030
    31 # 12/05/02 Added usage datastructure - John Thompson
    32 
    33 package FOXPlug;
    34 
    35 use BasPlug;
     31package FOXPlugin;
     32
     33use BasePlugin;
    3634use util;
    3735use doc;
    3836use unicode;
    39 use cnseg;
    40 # use gb;
    4137
    4238use strict;
     
    4541
    4642sub BEGIN {
    47     @FOXPlug::ISA = ('BasPlug');
     43    @FOXPlugin::ISA = ('BasePlugin');
    4844}
    4945
    5046my $arguments =
    5147    [ { 'name' => "process_exp",
    52     'desc' => "{BasPlug.process_exp}",
     48    'desc' => "{BasePlugin.process_exp}",
    5349    'type' => "regexp",
    5450    'reqd' => "no",
    5551    'deft' => &get_default_process_exp() },
    5652      { 'name' => "block_exp",
    57     'desc' => "{BasPlug.block_exp}",
     53    'desc' => "{BasePlugin.block_exp}",
    5854    'type' => "regexp",
    5955    'reqd' => "no",
    6056    'deft' => &get_default_block_exp() } ];
    6157
    62 my $options = { 'name'     => "FOXPlug",
    63         'desc'     => "{FOXPlug.desc}",
     58my $options = { 'name'     => "FOXPlugin",
     59        'desc'     => "{FOXPlugin.desc}",
    6460        'abstract' => "no",
    6561        'inherits' => "yes",
     
    7167    push(@$pluginlist, $class);
    7268
    73     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    74     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    75 
    76     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     69    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     70    push(@{$hashArgOptLists->{"OptList"}},$options);
     71
     72    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7773
    7874    return bless $self, $class;
     
    10399    return $block_status if ((!defined $block_status) || ($block_status==0));
    104100
    105     print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
    106     print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1;
     101    print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
     102    print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
    107103
    108104    my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
     
    113109        print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
    114110    }
    115     print STDERR "FOXPlug::read - couldn't read $fullname\n";
     111    print STDERR "FOXPlugin::read - couldn't read $fullname\n";
    116112    return -1; # error in processing
    117113    }
     
    125121        print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
    126122    }
    127     print STDERR "FOXPlug::read - eof while reading database header\n";
     123    print STDERR "FOXPlugin::read - eof while reading database header\n";
    128124    close (FOXBASEIN);
    129125    return -1;
     
    145141        print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
    146142    }
    147     print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n";
     143    print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n";
    148144    return -1;
    149145    }
     
    177173        print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
    178174    }
    179     print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
     175    print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n";
    180176    close (FOXBASEIN);
    181177    return -1;
  • gsdl/trunk/perllib/plugins/FavouritesPlugin.pm

    r15865 r15872  
    2828# especially SRCPlug by John McPherson Nov 2000
    2929
    30 package FavouritesPlug;
     30package FavouritesPlugin;
    3131
    32 use BasPlug;
     32use ReadTextFile;
    3333use strict;
    3434no strict 'refs'; # allow filehandles to be variables and viceversa
    3535
    3636sub BEGIN {
    37     @FavouritesPlug::ISA = ('BasPlug');
     37    @FavouritesPlugin::ISA = ('ReadTextFile');
    3838}
    3939
    4040my $arguments =
    4141    [ { 'name' => "process_exp",
    42     'desc' => "{BasPlug.process_exp}",
     42    'desc' => "{ReadTextFile.process_exp}",
    4343    'type' => "regexp",
    4444    'deft' => &get_default_process_exp(),
    4545    'reqd' => "no" } ];
    4646
    47 my $options = { 'name'     => "FavouritesPlug",
    48         'desc'     => "FavouritesPlug imports Internet Explorer style Favourites. Favourites are often found in the \"C:\\Documents and Settings\\[your username]\\Favorites\" folder on your computer, but can also be made by dragging a bookmark or location from your browser (any) to the desktop.",
     47my $options = { 'name'     => "FavouritesPlugin",
     48        'desc'     => "{FavouritesPlugin.desc}",
    4949        'abstract' => "no",
    5050        'inherits' => "yes",
     
    5757    push(@$pluginlist, $class);
    5858
    59     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    60     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     60    push(@{$hashArgOptLists->{"OptList"}},$options);
    6161
    62     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     62    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
    6363
    6464    return bless $self, $class;
     
    8080
    8181    my $section = $doc_obj->get_top_section();
    82     print STDERR "<Processing n='$file' p='FavouritesPlug'>\n" if ($gli);
    83     print $outhandle "FavouritesPlug: processing $file\n" if $self->{'verbosity'} > 1;
     82    print STDERR "<Processing n='$file' p='FavouritesPlugin'>\n" if ($gli);
     83    print $outhandle "FavouritesPlugin: processing $file\n" if $self->{'verbosity'} > 1;
    8484
    8585    # don't want mg to turn escape chars into actual values
  • gsdl/trunk/perllib/plugins/GAPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # GAPlug.pm
     3# GAPlugin.pm
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2929# to their DTD.
    3030
    31 package GAPlug;
    32 
    33 use XMLPlug;
     31package GAPlugin;
     32
     33use ReadXMLFile;
    3434
    3535use strict;
     
    3737
    3838sub BEGIN {
    39     @GAPlug::ISA = ('XMLPlug');
     39    @GAPlugin::ISA = ('ReadXMLFile');
    4040}
    4141
     
    4949my $arguments =
    5050    [ { 'name' => "process_exp",
    51     'desc' => "{BasPlug.process_exp}",
     51    'desc' => "{BasePlugin.process_exp}",
    5252    'type' => "regexp",
    5353    'deft' => &get_default_process_exp(),
    5454    'reqd' => "no" } ];
    5555
    56 my $options = { 'name'     => "GAPlug",
    57         'desc'     => "{GAPlug.desc}",
     56my $options = { 'name'     => "GAPlugin",
     57        'desc'     => "{GAPlugin.desc}",
    5858        'abstract' => "no",
    5959        'inherits' => "yes",
     
    6565    push(@$pluginlist, $class);
    6666
    67     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    68     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    69 
    70     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     67    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     68    push(@{$hashArgOptLists->{"OptList"}},$options);
     69
     70    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    7171
    7272    $self->{'section'} = "";
     
    106106
    107107    my $outhandle = $self->{'outhandle'};
    108     print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    109     print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'};
     108    print $outhandle "GAPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     109    print STDERR "<Processing n='$self->{'file'}' p='GAPlugin'>\n" if $self->{'gli'};
    110110
    111111}
  • gsdl/trunk/perllib/plugins/GISExtractor.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # GISBasPlug.pm -- base class to enhance plugins with GIS capabilities
     3# GISExtractor.pm -- extension base class to enhance plugins with GIS capabilities
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 package GISBasPlug;
     26package GISExtractor;
     27
     28use PrintInfo;
    2729
    2830use util;
    29 use locale;
    3031
    3132use gsprintf 'gsprintf';
     
    3334no strict 'refs'; # allow filehandles to be variables and viceversa
    3435no strict 'subs';
     36
    3537#field categories in DataBase files
    3638#$LAT = 3;
     
    4244
    4345BEGIN {
    44     die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    45 }
    46 
    47 
    48 use BasPlug; # uses BasPlug, but is not inherited
    49 
    50 
    51 my $options = { 'name'     => "GISBasPlug",
    52         'desc'     => "{GISBasPlug.desc}",
     46    @GISExtractor::ISA = ('PrintInfo');
     47}
     48
     49
     50my $arguments =
     51    [ { 'name' => "extract_placenames",
     52    'desc' => "{GISExtractor.extract_placenames}",
     53    'type' => "flag",
     54    'reqd' => "no" },
     55      { 'name' => "gazetteer",
     56    'desc' => "{GISExtractor.gazetteer}",
     57    'type' => "string",
     58    'reqd' => "no" },
     59      { 'name' => "place_list",
     60    'desc' => "{GISExtractor.place_list}",
     61    'type' => "flag",
     62    'reqd' => "no" } ];
     63
     64
     65my $options = { 'name'     => "GISExtractor",
     66        'desc'     => "{GISExtractor.desc}",
    5367        'abstract' => "yes",
    54         'inherits' => "no" };
     68        'inherits' => "yes",
     69        'args' => $arguments };
    5570
    5671
    5772sub new {
    58     my $class = shift (@_);
    59     my $plugin_name = shift (@_);
    60 
    61     my $self = {};
    62     $self->{'plugin_type'} = "GISBasPlug";
    63 
    64     $self->{'option_list'} = [ $options ];
     73    my ($class) = shift (@_);
     74    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
     75    push(@$pluginlist, $class);
     76
     77    # can we indicate that these are not available if the map data is not there??
     78    #if (has_mapdata()) {
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     80    push(@{$hashArgOptLists->{"OptList"}},$options);
     81    #}
     82    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
     83
     84    if ($self->{'extract_placenames'}) {
     85
     86    my $outhandle = $self->{'outhandle'};
     87   
     88    my $places_ref
     89        = $self->loadGISDatabase($outhandle,$self->{'gazetteer'});
     90   
     91    if (!defined $places_ref) {
     92        print $outhandle "Warning: Error loading mapdata gazetteer \"$self->{'gazetteer'}\"\n";
     93        print $outhandle "         No placename extraction will take place.\n";
     94        $self->{'extract_placenames'} = undef;
     95    }
     96    else {
     97        $self->{'places'} = $places_ref;
     98    }
     99    }
    65100
    66101    return bless $self, $class;
    67 }
    68 
    69 sub init {
    70 }
    71 
    72 sub print_xml_usage
     102
     103}
     104
     105
     106sub extract_gis_metadata
    73107{
    74     BasPlug::print_xml_usage(@_);
    75 }
    76 
    77 sub print_xml
    78 {
    79     BasPlug::print_xml(@_);
    80 }
    81 
    82 sub print_txt_usage
    83 {
    84    BasPlug::print_txt_usage(@_);
    85 }
    86 
    87 sub determine_description_offset
    88 {
    89     BasPlug::determine_description_offset(@_);
    90 }
    91 sub print_plugin_usage
    92 {
    93     my $plugindesc = $options->{'desc'};
    94 
    95     if (defined($plugindesc)) {
    96     gsprintf(STDERR, "$plugindesc\n\n");
    97     }
    98  
    99 }
    100 
    101 sub set_incremental
    102 {
    103     BasPlug::set_incremental(@_);
     108    my $self = shift (@_);
     109    my ($doc_obj) = @_;
     110   
     111    if ($self->{'extract_placenames'}) {
     112    my $thissection = $doc_obj->get_top_section();
     113    while (defined $thissection) {
     114        my $text = $doc_obj->get_text($thissection);
     115        $self->extract_placenames (\$text, $doc_obj, $thissection) if $text =~ /./;
     116        $thissection = $doc_obj->get_next_section ($thissection);
     117    }
     118    }
     119
    104120}
    105121
     
    255271    $doc_obj->associate_file($tempfile, "places.txt", "text/plain");
    256272    $self->{'places_filename'} = $tempfile;
     273   
    257274    my %countries = ();
    258275   
     
    283300   
    284301    #this line removes apostrophes from placenames (they break the javascript function)
    285     $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g;
     302    $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; #' (to get emacs colours back)
    286303       
    287304    #for displaying map of document, count num of places from each country
     
    314331    if ($self->{'verbosity'} > 2);
    315332}
     333
     334sub clean_up_temp_files {
     335    my $self = shift(@_);
     336   
     337    if(defined($self->{'places_filename'}) && -e $self->{'places_filename'}){
     338    &util::rm($self->{'places_filename'});
     339    }
     340    $self->{'places_filename'} = undef;
     341
     342}
  • gsdl/trunk/perllib/plugins/GMLPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # GMLPlug.pm --
     3# GMLPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2929# 12/05/02 Added usage datastructure - John Thompson
    3030
    31 package GMLPlug;
    32 
    33 use BasPlug;
     31package GMLPlugin;
     32
     33use BasePlugin;
    3434use util;
    3535use doc;
     
    3939
    4040sub BEGIN {
    41     @GMLPlug::ISA = ('BasPlug');
     41    @GMLPlugin::ISA = ('BasePlugin');
    4242}
    4343
    4444my $arguments =
    4545    [ { 'name' => "process_exp",
    46     'desc' => "{BasPlug.process_exp}",
     46    'desc' => "{BasePlugin.process_exp}",
    4747    'type' => "regexp",
    4848    'deft' =>  &get_default_process_exp() }
    4949    ];
    5050
    51 my $options = { 'name'     => "GMLPlug",
    52         'desc'     => "{GMLPlug.desc}",
     51my $options = { 'name'     => "GMLPlugin",
     52        'desc'     => "{GMLPlugin.desc}",
    5353        'abstract' => "no",
    5454        'inherits' => "yes",
     
    6060    push(@$pluginlist, $class);
    6161
    62     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    63     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    64 
    65     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     62    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     63    push(@{$hashArgOptLists->{"OptList"}},$options);
     64
     65    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    6666
    6767    return bless $self, $class;
     
    8888    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    8989
    90     print STDERR "<Processing n='$file' p='GMLPlug'>\n" if ($gli);
    91     print $outhandle "GMLPlug: processing $file\n";
     90    print STDERR "<Processing n='$file' p='GMLPlugin'>\n" if ($gli);
     91    print $outhandle "GMLPlugin: processing $file\n";
    9292
    9393    my $parent_dir = $file;
     
    9999        print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n";
    100100    }
    101     print $outhandle "GMLPlug::read - couldn't read $filename\n";
     101    print $outhandle "GMLPlugin::read - couldn't read $filename\n";
    102102    return -1;
    103103    }
     
    130130        if ($gml =~ /^\s*([^>]*)>(.*)$/so) {
    131131            $tags = $1 if defined $1;
    132             $text = &GMLPlug::_unescape_text($2);
     132            $text = &GMLPlugin::_unescape_text($2);
    133133
    134134        } else {
    135             print $outhandle "GMLPlug::read - error in file $filename\n";
     135            print $outhandle "GMLPlugin::read - error in file $filename\n";
    136136            print $outhandle "text: \"$gml\"\n";
    137137            last;
     
    158158        # could be stored as either attributes or ....
    159159        while ((defined $tags) && ($tags =~ s/^\s*(\S+)=\"([^\"]*)\"//o)) {
    160             $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))
     160            $doc_obj->add_utf8_metadata($section, $1, &GMLPlugin::_unescape_text($2))
    161161            if (defined $1 and defined $2);
    162162
     
    183183                $tagname =~ s/^&\#47;/\//;
    184184               
    185                 $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlug::_unescape_text($tagvalue));
     185                $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlugin::_unescape_text($tagvalue));
    186186            }
    187187            }
  • gsdl/trunk/perllib/plugins/HBPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # HBPlug.pm --
     3# HBPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    3838# Humanity Library collections
    3939
    40 package HBPlug;
     40package HBPlugin;
    4141
    4242use ghtml;
    43 use BasPlug;
     43use BasePlugin;
    4444use unicode;
    4545use util;
     
    5050
    5151sub BEGIN {
    52     @HBPlug::ISA = ('BasPlug');
    53 }
    54 
     52    @HBPlugin::ISA = ('BasePlugin');
     53}
     54my $encoding_list =     
     55    [ { 'name' => "ascii",
     56    'desc' => "{ReadTextFile.input_encoding.ascii}" },
     57      { 'name' => "iso_8859_1",
     58    'desc' => "Latin1 (western languages)" } ];
     59 
    5560my $arguments =
    5661    [ { 'name' => "process_exp",
    57     'desc' => "{BasPlug.process_exp}",
     62    'desc' => "{BasePlugin.process_exp}",
    5863    'type' => "regexp",
    5964    'reqd' => "no",
    60     'deft' => &get_default_process_exp() }
     65    'deft' => &get_default_process_exp() },
     66      { 'name' => "input_encoding",
     67    'desc' => "{ReadTextFile.input_encoding}",
     68    'type' => "enum",
     69    'deft' => "iso_8859_1",
     70    'list' => $encoding_list,
     71    'reqd' => "no" }
    6172      ];
    6273
    63 my $options = { 'name'     => "HBPlug",
    64         'desc'     => "{HBPlug.desc}",
     74my $options = { 'name'     => "HBPlugin",
     75        'desc'     => "{HBPlugin.desc}",
    6576        'abstract' => "no",
    6677        'inherits' => "yes",
     
    7283    push(@$pluginlist, $class);
    7384
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    76 
    77     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     86    push(@{$hashArgOptLists->{"OptList"}},$options);
     87
     88    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7889
    7990    return bless $self, $class;
    8091}
    8192
    82 sub init {
    83     my $self = shift (@_);
    84     my ($verbosity, $outhandle) = @_;
    85 
    86     $self->BasPlug::init($verbosity, $outhandle);
    87     $self->{'input_encoding'} = "iso_8859_1";
    88 
    89     # this plugin only handles ascii encodings
    90     if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {
    91     die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
    92         $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
    93     }
    94 }
    95 
    9693# this is included only to prevent warnings being printed out
    97 # from BasPlug::init. The process_exp is not used by this plugin
     94# from BasePlugin::init. The process_exp is not used by this plugin
    9895sub get_default_process_exp {
    9996    my $self = shift (@_);
     
    148145    if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) {
    149146        my $font = $1;
    150         print $outhandle "HBPlug::HB_gettext - warning removed font $font\n"
     147        print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n"
    151148        if ($font !~ /^arial$/i);
    152149    }
     
    217214}
    218215
    219 # if input_encoding is ascii we can call add_utf8_metadata
    220 # directly but if it's iso_8859_1 (the default) we need to call
    221 # add_metadata so that the ascii2utf8 conversion is done first
    222 # this should speed things up a little if processing an ascii only
    223 # document with input_encoding set to ascii
    224 sub HB_add_metadata {
    225     my $self = shift (@_);
    226     my ($doc_obj, $cursection, $field, $value) = @_;
    227 
    228 # All text should now be in utf-8
    229 #    if ($self->{'input_encoding'} eq "ascii") {
    230     $doc_obj->add_utf8_metadata ($cursection, $field, $value);
    231 #    } else {
    232 #   $doc_obj->add_metadata ($cursection, $field, $value);
    233 #    }
    234 }
    235 
    236216# return number of files processed, undef if can't process
    237217# Note that $base_dir might be "" and that $file might
     
    251231    return undef unless -e $htmlfile;
    252232
    253     print STDERR "<Processing n='$file' p='HBPlug'>\n" if ($gli);
    254     print $outhandle "HBPlug: processing $file\n";
     233    print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli);
     234    print $outhandle "HBPlugin: processing $file\n";
    255235
    256236    # read in the file and do basic html cleaning (removing header etc)
     
    276256    # $metadata->{$field} may be an array reference
    277257    if (ref ($metadata->{$field}) eq "ARRAY") {
    278         map { 
    279         $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);
     258        map {
     259        $doc_obj->add_utf8_metadata($cursection, $field, $_);
    280260        } @{$metadata->{$field}};
    281261    } else {
    282         $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});
     262        $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field});
    283263    }
    284264    }
     
    321301
    322302        # add the metadata to this section
    323         $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title);
     303        $doc_obj->add_utf8_metadata($cursection, "Title", $title);
    324304
    325305        # clean up the section html
     
    332312
    333313        # add the text for this section
    334 # All read text should now be in utf-8
    335 #       if ($self->{'input_encoding'} eq "ascii") {
    336314        $doc_obj->add_utf8_text ($cursection, $sectiontext);
    337 #       } else {
    338 #       $doc_obj->add_text ($cursection, $sectiontext);
    339 #       }
    340315    } else {
    341316        print $outhandle "WARNING - leftover text\n" , $self->shorten($html),
  • gsdl/trunk/perllib/plugins/HTMLPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # HTMLPlug.pm -- basic html plugin
     3# HTMLPlugin.pm -- basic html plugin
    44#
    55# A component of the Greenstone digital library software
     
    3434#
    3535
    36 package HTMLPlug;
    37 
    38 use BasPlug;
     36package HTMLPlugin;
     37
     38use ReadTextFile;
     39use HBPlugin;
    3940use ghtml;
    4041use unicode;
     
    4647
    4748sub BEGIN {
    48     @HTMLPlug::ISA = ('BasPlug');
     49    @HTMLPlugin::ISA = ('ReadTextFile', 'HBPlugin');
    4950}
    5051
     
    5455my $arguments =
    5556    [ { 'name' => "process_exp",
    56     'desc' => "{BasPlug.process_exp}",
     57    'desc' => "{BasePlugin.process_exp}",
    5758    'type' => "regexp",
    5859    'deft' =>  &get_default_process_exp() },
    5960      { 'name' => "block_exp",
    60     'desc' => "{BasPlug.block_exp}",
     61    'desc' => "{BasePlugin.block_exp}",
    6162    'type' => 'regexp',
    6263    'deft' =>  &get_default_block_exp() },
    6364      { 'name' => "nolinks",
    64     'desc' => "{HTMLPlug.nolinks}",
     65    'desc' => "{HTMLPlugin.nolinks}",
    6566    'type' => "flag" },
    6667      { 'name' => "keep_head",
    67     'desc' => "{HTMLPlug.keep_head}",
     68    'desc' => "{HTMLPlugin.keep_head}",
    6869    'type' => "flag" },
    6970      { 'name' => "no_metadata",
    70     'desc' => "{HTMLPlug.no_metadata}",
     71    'desc' => "{HTMLPlugin.no_metadata}",
    7172    'type' => "flag" },
    7273      { 'name' => "metadata_fields",
    73     'desc' => "{HTMLPlug.metadata_fields}",
     74    'desc' => "{HTMLPlugin.metadata_fields}",
    7475    'type' => "string",
    7576    'deft' => "Title" },
    7677      { 'name' => "hunt_creator_metadata",
    77     'desc' => "{HTMLPlug.hunt_creator_metadata}",
     78    'desc' => "{HTMLPlugin.hunt_creator_metadata}",
    7879    'type' => "flag" },
    7980      { 'name' => "file_is_url",
    80     'desc' => "{HTMLPlug.file_is_url}",
     81    'desc' => "{HTMLPlugin.file_is_url}",
    8182    'type' => "flag" },
    8283      { 'name' => "assoc_files",
    83     'desc' => "{HTMLPlug.assoc_files}",
     84    'desc' => "{HTMLPlugin.assoc_files}",
    8485    'type' => "regexp",
    8586    'deft' => &get_default_block_exp() },
    8687      { 'name' => "rename_assoc_files",
    87     'desc' => "{HTMLPlug.rename_assoc_files}",
     88    'desc' => "{HTMLPlugin.rename_assoc_files}",
    8889    'type' => "flag" },
    8990      { 'name' => "title_sub",
    90     'desc' => "{HTMLPlug.title_sub}",
     91    'desc' => "{HTMLPlugin.title_sub}",
    9192    'type' => "string",
    9293    'deft' => "" },
    9394      { 'name' => "description_tags",
    94     'desc' => "{HTMLPlug.description_tags}",
     95    'desc' => "{HTMLPlugin.description_tags}",
    9596    'type' => "flag" },
    9697      # retain this for backward compatibility (w3mir option was replaced by
    9798      # file_is_url)
    9899      { 'name' => "w3mir",
    99 #   'desc' => "{HTMLPlug.w3mir}",
     100#   'desc' => "{HTMLPlugin.w3mir}",
    100101    'type' => "flag",
    101102    'hiddengli' => "yes"},
    102103      { 'name' => "no_strip_metadata_html",
    103     'desc' => "{HTMLPlug.no_strip_metadata_html}",
     104    'desc' => "{HTMLPlugin.no_strip_metadata_html}",
    104105    'type' => "string",
    105106    'deft' => "",
    106107    'reqd' => "no"},
    107108      { 'name' => "sectionalise_using_h_tags",
    108     'desc' => "{HTMLPlug.sectionalise_using_h_tags}",
     109    'desc' => "{HTMLPlugin.sectionalise_using_h_tags}",
    109110    'type' => "flag" },
    110111      { 'name' => "use_realistic_book",
    111         'desc' => "{HTMLPlug.tidy_html}",
     112        'desc' => "{HTMLPlugin.tidy_html}",
    112113    'type' => "flag"},
    113       { 'name' => "is_old_HDL_tags",
    114         'desc' => "{HTMLPlug.old_style_HDL}",
    115     'type' => "flag"},
    116       { 'name' => "no_image_links",            # in future think about removing this option,
    117         'desc' => "{HTMLPlug.no_image_links}", # since it has become the default behaviour
    118     'type' => "flag"}, 
     114      { 'name' => "old_style_HDL",
     115        'desc' => "{HTMLPlugin.old_style_HDL}",
     116    'type' => "flag"}
    119117      ];
    120118
    121 my $options = { 'name'     => "HTMLPlug",
    122         'desc'     => "{HTMLPlug.desc}",
     119my $options = { 'name'     => "HTMLPlugin",
     120        'desc'     => "{HTMLPlugin.desc}",
    123121        'abstract' => "no",
    124122        'inherits' => "yes",
     
    506504    if (($self->{'tidy_html'}) || ($self->{'old_style_HDL'}))
    507505    {
    508         # because the document has to be sectionalized set the description tags
    509         $self->{'description_tags'} = 1;
    510 
    511         # set the file to be tidied
    512             $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/;
    513        
    514             # get the tidied file
    515             #my $tidy_filename = $self->tmp_tidy_file($input_filename);
    516         my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename);
    517        
    518             # derive tmp filename from input filename
    519             my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$");
     506    # because the document has to be sectionalized set the description tags
     507    $self->{'description_tags'} = 1;
    520508   
    521         # set the new input file and base_dir to be from the tidied file
    522         $file = "$tailname$suffix";
    523         $base_dir = $dirname;
     509    # set the file to be tidied
     510    $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/;
     511   
     512    # get the tidied file
     513    #my $tidy_filename = $self->tmp_tidy_file($input_filename);
     514    my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename);
     515   
     516    # derive tmp filename from input filename
     517    my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$");
     518   
     519    # set the new input file and base_dir to be from the tidied file
     520    $file = "$tailname$suffix";
     521    $base_dir = $dirname;
    524522    }
    525523   
    526524    # call the parent read_into_doc_obj
    527     my ($process_status,$doc_obj) = &BasPlug::read_into_doc_obj($self,$pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     525    my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
    528526   
    529527    return ($process_status,$doc_obj);
     
    535533    push(@$pluginlist, $class);
    536534   
    537     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    538     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     535    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     536    push(@{$hashArgOptLists->{"OptList"}},$options);
    539537   
    540538
    541     my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
     539    my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists);
    542540   
    543541    if ($self->{'w3mir'}) {
     
    618616    my $outhandle = $self->{'outhandle'};
    619617
    620     print STDERR "<Processing n='$file' p='HTMLPlug'>\n" if ($gli);
    621 
    622     print $outhandle "HTMLPlug: processing $file\n"
     618    print STDERR "<Processing n='$file' p='HTMLPlugin'>\n" if ($gli);
     619
     620    print $outhandle "HTMLPlugin: processing $file\n"
    623621    if $self->{'verbosity'} > 1;
    624622
     
    669667    # URL metadata (even invalid ones) are used to support internal
    670668    # links, so even if 'file_is_url' is off, still need to store info
    671    
    672     $file = &BasPlug::filename_to_metadata($self, $file); # ensures filename is in UTF8 character encoding
    673     my $web_url = "http://$file";
    674     $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); # will eventually ensure it is utf8 anyway
     669
     670    my $utf8_file = $self->filename_to_utf8_metadata($file);
     671    my $web_url = "http://$utf8_file";
     672    $doc_obj->add_utf8_metadata($cursection, "URL", $web_url);
    675673
    676674    if ($self->{'file_is_url'}) {
     
    752750    }
    753751    if ($cursection ne "") {
    754         print $outhandle "HTMLPlug: WARNING: $file contains unmatched <Section></Section> tags\n";
     752        print $outhandle "HTMLPlugin: WARNING: $file contains unmatched <Section></Section> tags\n";
    755753    }
    756754
     
    760758        if (!$found_something) {
    761759        if ($self->{'verbosity'} > 2) {
    762             print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags so\n";
     760            print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags so\n";
    763761            print $outhandle "          will be processed as a single section document\n";
    764762        }
     
    775773
    776774        } else {
    777         print $outhandle "HTMLPlug: WARNING: $file contains the following text outside\n";
     775        print $outhandle "HTMLPlugin: WARNING: $file contains the following text outside\n";
    778776        print $outhandle "          of the final closing </Section> tag. This text will\n";
    779777        print $outhandle "          be ignored.";
     
    795793        # been processed already but we should print the warning
    796794        # as above and extract metadata
    797         print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags and\n";
     795        print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags and\n";
    798796        print $outhandle "          is blank or empty.  Metadata will be assigned if present.\n";
    799797        }
     
    892890    # trap images
    893891
    894     # Previously, by default, HTMLPlug would embed <img> tags inside anchor tags
     892    # Previously, by default, HTMLPlugin would embed <img> tags inside anchor tags
    895893    # i.e. <a href="image><img src="image"></a> in order to overcome a problem that
    896894    # turned regular text succeeding images into links. That is, by embedding <imgs>
     
    907905
    908906    # If at any time, there is a need for having images embedded in <a> anchor tags,
    909     # then it might be better to turn that into an HTMLPlug option rather than make
     907    # then it might be better to turn that into an HTMLPlugin option rather than make
    910908    # it the default behaviour. Also, eventually, no_image_links needs to become
    911     # a deprecated option for HTMLPlug as it has now become the default behaviour.
     909    # a deprecated option for HTMLPlugin as it has now become the default behaviour.
    912910
    913911    #if(!$self->{'no_image_links'}){
    914912    $$textref =~ s/(<(?:img|embed|table|tr|td)[^>]*?(?:src|background)\s*=\s*)([\"][^\"]+[\"]|[\'][^\']+[\']|[^\s\/>]+)([^>]*>)/
    915         $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
     913    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
    916914    #}
    917915
     
    936934    $back="\"$back";
    937935    }
     936
    938937    $link =~ s/\n/ /g;
    939938
     
    10741073
    10751074    my ($before_hash, $hash_part) = $link =~ /^([^\#]*)(\#?.*)$/;
    1076 
     1075   
    10771076    $hash_part = "" if !defined $hash_part;
    10781077    if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) {
    10791078    my $outhandle = $self->{'outhandle'};
    1080     print $outhandle "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n"
     1079    print $outhandle "HTMLPlugin: ERROR - badly formatted tag ignored ($link)\n"
    10811080        if $self->{'verbosity'};
    10821081    return ($link, "", 0);
     
    12571256
    12581257    if (!defined $tag) {
    1259         print $outhandle "HTMLPlug: can't find NAME in \"$metatag\"\n";
     1258        print $outhandle "HTMLPlugin: can't find NAME in \"$metatag\"\n";
    12601259        next;
    12611260    }
     
    12741273    }
    12751274    if (!defined $value) {
    1276         print $outhandle "HTMLPlug: can't find VALUE in \"$metatag\"\n";
     1275        print $outhandle "HTMLPlugin: can't find VALUE in \"$metatag\"\n";
    12771276        next;
    12781277    }
     
    14251424
    14261425
    1427 # Extend the BasPlug read_file so that strings like &eacute; are
     1426# Extend read_file so that strings like &eacute; are
    14281427# converted to UTF8 internally. 
    14291428#
     
    14321431
    14331432sub read_file {
    1434     my ($self, $filename, $encoding, $language, $textref) = @_;
    1435 
    1436     &BasPlug::read_file($self, $filename, $encoding, $language, $textref);
     1433    my $self = shift(@_);
     1434    my ($filename, $encoding, $language, $textref) = @_;
     1435
     1436    $self->SUPER::read_file($filename, $encoding, $language, $textref);
    14371437
    14381438    # Convert entities to their UTF8 equivalents
  • gsdl/trunk/perllib/plugins/ISISPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ISISPlug.pm -- A plugin for CDS/ISIS databases
     3# ISISPlugin.pm -- A plugin for CDS/ISIS databases
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 package ISISPlug;
     27package ISISPlugin;
    2828
    2929
    3030use multiread;
    31 use SplitPlug;
     31use SplitTextFile;
    3232
    3333use strict;
    3434no strict 'refs'; # allow filehandles to be variables and viceversa
    3535
    36 # ISISPlug is a sub-class of SplitPlug.
     36# ISISPlugin is a sub-class of SplitTextFile.
    3737sub BEGIN {
    38     @ISISPlug::ISA = ('SplitPlug');
     38    @ISISPlugin::ISA = ('SplitTextFile');
    3939}
    4040
     
    4242my $arguments =
    4343    [ { 'name' => "process_exp",
    44     'desc' => "{BasPlug.process_exp}",
     44    'desc' => "{BasePlugin.process_exp}",
    4545    'type' => "regexp",
    4646    'reqd' => "no",
    4747    'deft' => &get_default_process_exp() },
    4848      { 'name' => "block_exp",
    49     'desc' => "{BasPlug.block_exp}",
     49    'desc' => "{BasePlugin.block_exp}",
    5050    'type' => "regexp",
    5151    'reqd' => "no",
     
    5353    'hiddengli' => "yes" },
    5454      { 'name' => "split_exp",
    55     'desc' => "{SplitPlug.split_exp}",
     55    'desc' => "{SplitTextFile.split_exp}",
    5656    'type' => "regexp",
    5757    'reqd' => "no",
     
    6161      # The interesting options
    6262      { 'name' => "entry_separator",
    63     'desc' => "{ISISPlug.entry_separator}",
     63    'desc' => "{ISISPlugin.entry_separator}",
    6464    'type' => "string",
    6565    'reqd' => "no",
    6666    'deft' => "<br>" },
    6767      { 'name' => "subfield_separator",
    68     'desc' => "{ISISPlug.subfield_separator}",
     68    'desc' => "{ISISPlugin.subfield_separator}",
    6969    'type' => "string",
    7070    'reqd' => "no",
     
    7272      ];
    7373
    74 my $options = { 'name'     => "ISISPlug",
    75         'desc'     => "{ISISPlug.desc}",
     74my $options = { 'name'     => "ISISPlugin",
     75        'desc'     => "{ISISPlugin.desc}",
    7676        'abstract' => "no",
    7777        'inherits' => "yes",
     
    104104    push(@$pluginlist, $class);
    105105
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    108 
    109     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     106    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     107    push(@{$hashArgOptLists->{"OptList"}},$options);
     108
     109    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    110110
    111111    if ($self->{'info_only'}) {
     
    157157
    158158    my $reader = new multiread();
    159     $reader->set_handle('ISISPlug::FILE');
     159    $reader->set_handle('ISISPlugin::FILE');
    160160    $reader->set_encoding($encoding);
    161161    $reader->read_file($textref);
     
    186186
    187187    # Report that we're processing the file
    188     print STDERR "\n<Processing n='$file' p='ISISPlug'>\n" if ($gli);
     188    print STDERR "\n<Processing n='$file' p='ISISPlugin'>\n" if ($gli);
    189189    print $outhandle "IsisPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
    190190
     
    348348    my $fdtfiletext = "";
    349349    my $reader = new multiread();
    350     $reader->set_handle('ISISPlug::FDT_FILE');
     350    $reader->set_handle('ISISPlugin::FDT_FILE');
    351351    $reader->set_encoding($encoding);
    352352    $reader->read_file($fdtfiletext);
  • gsdl/trunk/perllib/plugins/ImagePlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ImagePlug.pm -- simple text plugin
     3# ImagePlugin.pm -- simple text plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 package ImagePlug;
     26package ImagePlugin;
    2727
    28 use BasPlug;
     28use BasePlugin;
     29use ImageConverter;
    2930
    3031use strict;
     
    3233
    3334sub BEGIN {
    34     @ImagePlug::ISA = ('BasPlug');
     35    @ImagePlugin::ISA = ('BasePlugin', 'ImageConverter');
    3536}
    3637
    3738my $arguments =
    3839    [ { 'name' => "process_exp",
    39     'desc' => "{BasPlug.process_exp}",
     40    'desc' => "{BasePlugin.process_exp}",
    4041    'type' => "regexp",
    4142    'deft' => &get_default_process_exp(),
    4243    'reqd' => "no" },
    43       { 'name' => "cache_generated_images",
    44     'desc' => "{ImagePlug.cache_generated_image}",
    45     'type' => "flag",
    46     'reqd' => "no" },
    47       { 'name' => "noscaleup",
    48     'desc' => "{ImagePlug.noscaleup}",
    49     'type' => "flag",
    50     'reqd' => "no" },
    51       { 'name' => "nothumbnail",
    52     'desc' => "{ImagePlug.generatethumbnail}",
    53     'type' => "flag",
    54     'reqd' => "no" },
    55       { 'name' => "thumbnailsize",
    56     'desc' => "{ImagePlug.thumbnailsize}",
    57     'type' => "int",
    58     'deft' => "100",
    59     'range' => "1,",
    60     'reqd' => "no" },
    61       { 'name' => "thumbnailtype",
    62     'desc' => "{ImagePlug.thumbnailtype}",
    63     'type' => "string",
    64     'deft' => "gif",
    65     'reqd' => "no" },
    66       { 'name' => "noscreenview",
    67     'desc' => "{ImagePlug.generatescreenview}",
    68     'type' => "flag",
    69     'reqd' => "no" },
    70       { 'name' => "screenviewsize",
    71     'desc' => "{ImagePlug.screenviewsize}",
    72     'type' => "int",
    73     'deft' => "0",
    74     'range' => "1,",
    75     'reqd' => "no" },
    76       { 'name' => "screenviewtype",
    77     'desc' => "{ImagePlug.screenviewtype}",
    78     'type' => "string",
    79     'deft' => "jpg",
    80     'reqd' => "no" },
    81       { 'name' => "converttotype",
    82     'desc' => "{ImagePlug.converttotype}",
    83     'type' => "string",
    84     'deft' => "",
    85     'reqd' => "no" },
    86       { 'name' => "minimumsize",
    87     'desc' => "{ImagePlug.minimumsize}",
    88     'type' => "int",
    89     'deft' => "100",
    90     'range' => "1,",
    91     'reqd' => "no" } ];
     44      ];
    9245
    93 my $options = { 'name'     => "ImagePlug",
    94         'desc'     => "{ImagePlug.desc}",
     46my $options = { 'name'     => "ImagePlugin",
     47        'desc'     => "{ImagePlugin.desc}",
    9548        'abstract' => "no",
    9649        'inherits' => "yes",
     
    10457    push(@$pluginlist, $class);
    10558
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     60    push(@{$hashArgOptLists->{"OptList"}},$options);
    10861
    109     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
    110     $self->{'tmp_file_paths'} = ();
     62    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
     63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    11164
    112     # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
    113     if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
    114     my $result = `identify 2>&1`;
    115     if ($? == -1 || $? == 256) {  # Linux and Windows return different values for "program not found"
    116         $self->{'imagemagick_not_installed'} = 1;
    117     }
    118     }
     65    return bless $self, $class;
     66}
    11967
    120        
    121     return bless $self, $class;
     68sub init {
     69    my $self = shift (@_);
     70    my ($verbosity, $outhandle, $failhandle) = @_;
     71
     72    $self->SUPER::init(@_);
     73    $self->ImageConverter::init();
    12274}
    12375
     
    13688    return;
    13789}
    138 # Create the thumbnail and screenview images, and discover the Image's
    139 # size, width, and height using the convert utility.
    140 
    141 sub generate_images
    142 {
    143     my $self = shift (@_);
    144     my $filename = shift (@_);   # filename with full path
    145     my $file = shift (@_);       # filename without path
    146     my $doc_obj = shift (@_);
    147     my $section = $doc_obj->get_top_section();
    148    
    149     my $verbosity = $self->{'verbosity'};
    150     my $outhandle = $self->{'outhandle'};
    151 
    152     # check the filename is okay
    153     return 0 if ($file eq "" || $filename eq "");
    154 
    155 #    Code now extended to quote filenames in 'convert' commnads
    156 #    Allows spaces in filenames, but note needs spaces to be escaped in URL as well
    157 #    if ($filename =~ m/ /) {
    158 #   print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
    159 #   return undef;
    160 #    }
    161 
    162     my $minimumsize = $self->{'minimumsize'};
    163     if (defined $minimumsize && (-s $filename < $minimumsize)) {
    164         print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
    165         if ($verbosity > 1);
    166     }
    167 
    168 
    169     # Convert the image to a new type (if required).
    170     my $converttotype = $self->{'converttotype'};
    171     my $originalfilename = "";  # only set if we do a conversion
    172     my $type = "unknown";
    173 
    174     if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
    175     $originalfilename = $filename;
    176 
    177     my $result = $self->convert($originalfilename, $converttotype, "", "");
    178     ($filename) = ($result =~ /=>(.*\.$converttotype)/);
    179 
    180     $type = $converttotype;
    181     $file =~ s/\..*$/\.$type/;
    182     }
    183    
    184 
    185     # Add the image metadata
    186     my $url = $file;
    187    
    188     ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
    189     ##$url =~ s/ /%20/g;
    190 
    191     my $utf8_filename_meta = $self->filename_to_metadata($url);
    192     $doc_obj->add_utf8_metadata ($section, "Image", $utf8_filename_meta);
    193 
    194     # Also want to set filename as 'Source' metadata to be
    195     # consistent with other plugins
    196     $doc_obj->add_utf8_metadata ($section, "Source", $utf8_filename_meta);
    197 
    198     my ($image_type, $image_width, $image_height, $image_size)
    199     = &identify($filename, $outhandle, $verbosity);
    200 
    201     if ($image_type ne " ") {
    202     $type = $image_type;
    203     }
    204    
    205     $doc_obj->add_metadata ($section, "FileFormat", $type);
    206     $doc_obj->add_metadata ($section, "FileSize",   $image_size);
    207 
    208     $doc_obj->add_metadata ($section, "ImageType",   $image_type);
    209     $doc_obj->add_metadata ($section, "ImageWidth",  $image_width);
    210     $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
    211     $doc_obj->add_metadata ($section, "ImageSize",   $image_size);
    212     $doc_obj->add_metadata ($section, "NoText",    "1");
    213 
    214     $doc_obj->add_metadata ($section, "srclink",
    215                 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
    216     $doc_obj->add_metadata ($section, "/srclink", "</a>");
    217 
    218     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
    219 
    220    
    221     # Add the image as an associated file
    222     $doc_obj->associate_file($filename,$file,"image/$type",$section);
    223 
    224 
    225     if (!$self->{'nothumbnail'}) {
    226 
    227     # Make the thumbnail image
    228     my $thumbnailsize = $self->{'thumbnailsize'} || 100;
    229     my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
    230 
    231     # Generate the thumbnail with convert
    232     my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
    233     my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/);
    234    
    235     # Add the thumbnail as an associated file ...
    236     if (-e "$thumbnailfile") {
    237         $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
    238                      "image/$thumbnailtype",$section);
    239         $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
    240         $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
    241        
    242         $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
    243     }
    244 
    245     # Extract Thumnail metadata from convert output
    246     if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
    247         $doc_obj->add_metadata ($section, "ThumbWidth", $1);
    248         $doc_obj->add_metadata ($section, "ThumbHeight", $2);
    249     }
    250 
    251     }
    252 
    253 
    254     # Make a screen-sized version of the picture if requested
    255     if (!$self->{'noscreenview'}) {
    256 
    257     # To do: if the actual image smaller than the screenview size,
    258     # we should use the original !
    259 
    260     my $screenviewsize = $self->{'screenviewsize'};
    261     my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
    262 
    263     # make the screenview image
    264     my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
    265     my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/);
    266 
    267     # get screenview dimensions, size and type
    268         if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
    269         $doc_obj->add_metadata ($section, "ScreenWidth", $1);
    270         $doc_obj->add_metadata ($section, "ScreenHeight", $2);
    271     }
    272     else {
    273         $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
    274         $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
    275     }
    276 
    277     #add the screenview as an associated file ...
    278     if (-e "$screenviewfilename") {
    279         $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
    280                      "image/$screenviewtype",$section);
    281         $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
    282         $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
    283 
    284         $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
    285     } else {
    286         print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
    287     }
    288     }
    289 
    290     return $type;
    291 
    292 
    293 }
    294 
    295 
    296 
    297 # Discover the characteristics of an image file with the ImageMagick
    298 # "identify" command.
    299 
    300 sub identify {
    301     my ($image, $outhandle, $verbosity) = @_;
    302 
    303     # Use the ImageMagick "identify" command to get the file specs
    304     my $command = "identify \"$image\" 2>&1";
    305     print $outhandle "$command\n" if ($verbosity > 2);
    306     my $result = '';
    307     $result = `$command`;
    308     print $outhandle "$result\n" if ($verbosity > 3);
    309 
    310     # Read the type, width, and height
    311     my $type =   'unknown';
    312     my $width =  'unknown';
    313     my $height = 'unknown';
    314 
    315     my $image_safe = quotemeta $image;
    316     if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
    317     $type = $1;
    318     $width = $2;
    319     $height = $3;
    320     }
    321 
    322     # Read the size
    323     my $size = "unknown";
    324     if ($result =~ m/^.* ([0-9]+)b/) {
    325     $size = $1;
    326     }
    327     elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
    328     $size = 1024 * $1;
    329     if (defined($2)) {
    330         $size = $size + (1024 * $2);
    331         # Truncate size (it isn't going to be very accurate anyway)
    332         $size = int($size);
    333     }
    334     }
    335     elsif ($result =~ m/^.* (([0-9]+)(\.([0-9]+))?e\+([0-9]+))(kb|b)?/) {
    336     # Deals with file sizes on Linux of type "3.4e+02kb" where e+02 is 1*10^2.
    337     # 3.4e+02 therefore evaluates to 3.4 x 1 x 10^2 = 340kb.
    338     # Programming languages including Perl know how that 3.4e+02 is a number,
    339     # so we don't need to do any calculations.
    340     $size = $1*1; # turn the string into a number by multiplying it by 1
    341            #if we did $size = $1; $size would be merely the string "3.4e+02"
    342     $size = int($size); # truncate size
    343     }
    344     print $outhandle "file: $image:\t $type, $width, $height, $size\n"
    345     if ($verbosity > 2);
    346 
    347     # Return the specs
    348     return ($type, $width, $height, $size);
    349 }
    350 
    351 
    352 sub convert
    353 {
    354     my $self = shift(@_);
    355     my $source_file_path = shift(@_);
    356     my $target_file_type = shift(@_);
    357     my $convert_options = shift(@_) || "";
    358     my $convert_type = shift(@_) || "";
    359 
    360     my $outhandle = $self->{'outhandle'};
    361     my $verbosity = $self->{'verbosity'};
    362 
    363     # Determine the full name and path of the output file
    364     my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type;
    365     push(@{$self->{'tmp_file_paths'}}, $target_file_path);
    366 
    367     # Generate and run the convert command
    368     my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
    369     print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
    370     my $result = `$convert_command 2>&1`;
    371     print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
    372 
    373     return $result;
    374 }
    375 
    376 
    377 # The ImagePlug read() function.
    378 # ImagePlug overrides read() because there is no need to read the actual
    379 # text of the file in, because the contents of the file is not text...
    380 #
    381 # Return number of files processed, undef if can't process
    382 # Note that $base_dir might be "" and that $file might
    383 # include directories
    384 
    385 sub read {
    386     my $self = shift (@_);
    387     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    388 
    389     my $outhandle = $self->{'outhandle'};
    390 
    391     #check process and block exps, smart block, etc
    392     my ($block_status,$filename) = $self->read_block(@_);   
    393     return $block_status if ((!defined $block_status) || ($block_status==0));
    394 
    395     print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
    396     print $outhandle "ImagePlug processing $file\n"
    397         if $self->{'verbosity'} > 1;
    398 
    399     # None of this works very well on Windows 95/98...
    400     if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
    401     if ($gli) {
    402         print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
    403     }
    404     print $outhandle "ImagePlug: Windows 95/98 not supported\n";
    405     return -1;
    406     }
    407 
    408     # None of this is going to work very well without ImageMagick...
    409     if ($self->{'imagemagick_not_installed'}) {
    410     if ($gli) {
    411         print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
    412     }
    413     print $outhandle "ImagePlug: ImageMagick not installed\n";
    414     return -1;
    415     }
    416 
    417     #if there's a leading directory name, eat it...
    418     $file =~ s/^.*[\/\\]//;
    419    
    420     # create a new document
    421     my $doc_obj = new doc ($filename, "indexed_doc");
    422     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});   
    423     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    424 
    425     #run convert to get the thumbnail and extract size and type info
    426     my $result = generate_images($self, $filename, $file, $doc_obj);
    427    
    428     if (!defined $result)
    429     {
    430     if ($gli) {
    431         print STDERR "<ProcessingError n='$file'>\n";
    432     }
    433     print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
    434     return -1; # error during processing
    435     }
    436 
    437      
    438     #create an empty text string so we don't break downstream plugins
    439     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    440    
    441     # include any metadata passed in from previous plugins
    442     # note that this metadata is associated with the top level section
    443     my $section = $doc_obj->get_top_section();
    444     $self->extra_metadata ($doc_obj, $section, $metadata);
    445 
    446     # do plugin specific processing of doc_obj
    447     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
    448     print STDERR "<ProcessingError n='$file'>\n" if ($gli);
    449     return -1;
    450     }
    451 
    452     # do any automatic metadata extraction
    453     $self->auto_extract_metadata ($doc_obj);
    454 
    455     # if we haven't found any Title so far, assign one
    456     # this was shifted to here from inside read()
    457     $self->title_fallback($doc_obj,$section,$file);
    458     # add an OID
    459     $doc_obj->set_OID();
    460     $doc_obj->add_utf8_text($section, $text);
    461 
    462     # process the document
    463     $processor->process($doc_obj);
    464 
    465     # clean up temporary files - we do this here instead of in 
    466     # generate_images becuase associated files aren't actually copied
    467     # until after process has been run.
    468     foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})
    469     {
    470     if (-e $tmp_file_path)
    471     {
    472         &util::rm($tmp_file_path);
    473     }
    474     }
    475 
    476     $self->{'num_processed'}++;
    477 
    478     return 1;
    479 }
    48090
    48191# do plugin specific processing of doc_obj
    48292sub process {
    48393    my $self = shift (@_);
    484     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     94    # options??
     95    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     96
    48597    my $outhandle = $self->{'outhandle'};
     98    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     99    if ($self->check_image_magick()) {
     100    $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $doc_obj->get_top_section()); # should we check the return value?
     101    } else {
     102    # do some basic stuff
     103    # associate the image, fileformat, mimetype, srclink, srcicon
     104    # do this if image magick not installed. but also if generate hasn't worked?? what about images too small?
     105    }
     106    #we have no text - adds dummy text and NoText metadata
     107    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
     108
     109    return 1;
     110
     111}
     112
     113sub clean_up_after_doc_obj_processing {
     114    my $self = shift(@_);
    486115   
    487     return 1;
     116    $self->ImageConverter::clean_up_temporary_files();
    488117}
    489118
  • gsdl/trunk/perllib/plugins/IndexPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # IndexPlug.pm --
     3# IndexPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    5050# named 'Subject'.
    5151
    52 # 12/05/02 Added usage datastructure - John Thompson
    53 
    54 package IndexPlug;
     52package IndexPlugin;
    5553
    5654use plugin;
    57 use BasPlug;
     55use BasePlugin;
    5856use doc;
    5957use util;
     
    6462
    6563sub BEGIN {
    66     @IndexPlug::ISA = ('BasPlug');
     64    @IndexPlugin::ISA = ('BasePlugin');
    6765}
    6866
    69 my $arguments = [
    70          ];
     67#my $arguments = [
     68#        ];
    7169
    72 my $options = { 'name'     => "IndexPlug",
    73         'desc'     => "{IndexPlug.desc}",
     70my $options = { 'name'     => "IndexPlugin",
     71        'desc'     => "{IndexPlugin.desc}",
    7472        'abstract' => "no",
    7573        'inherits' => "yes" };
     
    8078    push(@$pluginlist, $class);
    8179
    82     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    83     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     80    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     81    push(@{$hashArgOptLists->{"OptList"}},$options);
    8482
    85     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     83    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    8684
    8785    return bless $self, $class;
     
    110108
    111109    # found an index.txt file
    112     print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli);
    113     print $outhandle "IndexPlug: processing $indexfile\n";
     110    print STDERR "<Processing n='$file' p='IndexPlugin'>\n" if ($gli);
     111    print $outhandle "IndexPlugin: processing $indexfile\n";
    114112
    115113    # read in the index.txt
  • gsdl/trunk/perllib/plugins/LOMPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # LOMPlug.pm -- plugin for import the collection from LOM
     3# LOMPlugin.pm -- plugin for import the collection from LOM
    44#
    55# A component of the Greenstone digital library software
     
    2727### Note this plugin currently can't download source documents from outside if you are behind a firewall.
    2828
    29 package LOMPlug;
    30 
    31 use BasPlug;
     29package LOMPlugin;
     30
     31use ReadTextFile;
    3232use MetadataPass;
    3333use XMLParser;
     
    3535
    3636sub BEGIN {
    37     @ISA = ('BasPlug', 'MetadataPass');
     37    @ISA = ('ReadTextFile', 'MetadataPass');
    3838}
    3939
     
    4444my $arguments =
    4545    [ { 'name' => "process_exp",
    46     'desc' => "{BasPlug.process_exp}",
     46    'desc' => "{ReadTextFile.process_exp}",
    4747    'type' => "string",
    4848    'deft' => &get_default_process_exp(),
    4949    'reqd' => "no" },
    5050      { 'name' => "root_tag",
    51     'desc' => "{LOMPlug.root_tag}",
     51    'desc' => "{LOMPlugin.root_tag}",
    5252    'type' => "regexp",
    5353    'deft' => q/^(?i)lom$/,
    5454    'reqd' => "no" },
    5555      { 'name' => "check_timestamp",
    56     'desc' => "{LOMPlug.check_timestamp}",
     56    'desc' => "{LOMPlugin.check_timestamp}",
    5757    'type' => "flag" },
    5858      { 'name' => "download_srcdocs",
    59     'desc' => "{LOMPlug.download_srcdocs}",
     59    'desc' => "{LOMPlugin.download_srcdocs}",
    6060    'type' => "regexp",
    6161    'deft' => "",
    6262    'reqd' => "no" }];
    6363
    64 my $options = { 'name'     => "LOMPlug",
    65         'desc'     => "{LOMPlug.desc}",
     64my $options = { 'name'     => "LOMPlugin",
     65        'desc'     => "{LOMPlugin.desc}",
    6666        'inherits' => "yes",
    6767        'args'     => $arguments };
     
    7575    push(@$pluginlist, $class);
    7676   
    77     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    78     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     77    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     78    push(@{$hashArgOptLists->{"OptList"}},$options);
    7979   
    80     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     80    $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     81
     82    if ($self->{'info_only'}) {
     83    # don't worry about creating the XML parser as all we want is the
     84    # list of plugin options
     85    return bless $self, $class;
     86    }
    8187
    8288    #create XML::Parser object for parsing dublin_core.xml files
     
    120126    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    121127   
    122     print $outhandle "LOMPlug: extracting metadata from $file\n"
     128    print $outhandle "LOMPlugin: extracting metadata from $file\n"
    123129    if $self->{'verbosity'} > 1;
    124130
     
    131137   
    132138    if ($@) {
    133     print $outhandle "LOMPlug: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
     139    print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
    134140    print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
    135141    return 0;
     
    262268    my $outhandle = $self->{'outhandle'};
    263269
    264     print STDERR "<Processing n='$file' p='LOMPlug'>\n" if ($gli);
     270    print STDERR "<Processing n='$file' p='LOMPlugin'>\n" if ($gli);
    265271
    266272    print $outhandle "LOMPLug: processing $file\n";
  • gsdl/trunk/perllib/plugins/LaTeXPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # LaTeXPlug.pm
     3# LaTeXPlugin.pm
    44#
    55# A component of the Greenstone digital library software
     
    2626#  parse/remove tex \if ... macros
    2727
    28 package LaTeXPlug;
     28package LaTeXPlugin;
    2929
    3030# System complains about $arguments if the strict is set
     
    3333
    3434# greenstone packages
    35 use BasPlug;
     35use ReadTextFile;
    3636use unicode;
    3737use util;
     
    3939my $arguments =
    4040    [ { 'name' => "process_exp",
    41     'desc' => "{BasPlug.process_exp}",
     41    'desc' => "{ReadTextFile.process_exp}",
    4242    'type' => "regexp",
    4343    'reqd' => "no",
    4444    'deft' => &get_default_process_exp() } ];
    4545
    46 my $options = { 'name'     => 'LaTeXPlug',
    47         'desc'     => '{LaTeXPlug.desc}',
     46my $options = { 'name'     => 'LaTeXPlugin',
     47        'desc'     => '{LaTeXPlugin.desc}',
    4848        'abstract' => 'no',
    4949        'inherits' => 'yes',
     
    5151
    5252sub BEGIN {
    53     @LaTeXPlug::ISA = ('BasPlug');
    54 }
    55 
    56 sub print_usage {
    57     print STDERR "\n  usage: plugin LaTeXPlug [options]\n\n";
     53    @LaTeXPlugin::ISA = ('ReadTextFile');
    5854}
    5955
     
    6359    push(@$pluginlist, $class);
    6460
    65     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    66     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    67 
    68     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     61    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     62    push(@{$hashArgOptLists->{"OptList"}},$options);
     63
     64    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
    6965
    7066    $self->{'aux_files'} = {};
     
    9995    my $outhandle = $self->{'outhandle'};
    10096    if ($gli) {
    101     print STDERR "<Processing n='$file' p='LaTeXPlug'>\n";
     97    print STDERR "<Processing n='$file' p='LaTeXPlugin'>\n";
    10298    } elsif ($self->{'verbosity'} > 1) {
    103     print $outhandle "LaTeXPlug: processing $file\n"
     99    print $outhandle "LaTeXPlugin: processing $file\n"
    104100    }
    105101    my $cursection = $doc_obj->get_top_section();
  • gsdl/trunk/perllib/plugins/MARCPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MARCPlug.pm -- basic MARC plugin
     3# MARCPlugin.pm -- basic MARC plugin
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 package MARCPlug;
    28 
    29 use SplitPlug;
     27package MARCPlugin;
     28
     29use SplitTextFile;
    3030
    3131use unicode;
     
    3636
    3737sub BEGIN {
    38     @MARCPlug::ISA = ('SplitPlug');
     38    @MARCPlugin::ISA = ('SplitTextFile');
    3939    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    4040}
     
    4242my $arguments =
    4343    [ { 'name' => "metadata_mapping",
    44     'desc' => "{MARCPlug.metadata_mapping}",
     44    'desc' => "{MARCPlugin.metadata_mapping}",
    4545    'type' => "string",
    4646    'deft' => "marctodc.txt",
     
    5353    'reqd' => "no" },
    5454      { 'name' => "process_exp",
    55     'desc' => "{BasPlug.process_exp}",
     55    'desc' => "{BasePlugin.process_exp}",
    5656    'type' => "regexp",
    5757    'reqd' => "no",
    5858    'deft' => &get_default_process_exp() },
    5959      { 'name' => "split_exp",
    60     'desc' => "{SplitPlug.split_exp}",
     60    'desc' => "{SplitTextFile.split_exp}",
    6161    'type' => "regexp",
    6262    'reqd' => "no",
     
    6464      ];
    6565
    66 my $options = { 'name'     => "MARCPlug",
    67         'desc'     => "{MARCPlug.desc}",
     66my $options = { 'name'     => "MARCPlugin",
     67        'desc'     => "{MARCPlugin.desc}",
    6868        'abstract' => "no",
    6969        'inherits' => "yes",
     
    8181    push(@$pluginlist, $class);
    8282
    83     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    84     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    85 
    86     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     83    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     84    push(@{$hashArgOptLists->{"OptList"}},$options);
     85
     86    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    8787
    8888    # 'metadata_mapping' was used in two ways in the plugin: as a plugin
     
    119119    {
    120120
    121     my $msg = "MARCPlug ERROR: Can't locate mapping file \"" .
     121    my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" .
    122122        $self->{'metadata_mapping_file'} . "\".\n" .
    123123        "    No marc files can be processed.\n";
     
    245245        push(@marc_entries,$marc);
    246246    $$textref .= $marc->as_formatted();
    247     $$textref .= "\n\n"; # for SplitPlug - see default_split_exp above...
     247    $$textref .= "\n\n"; # for SplitTextFile - see default_split_exp above...
    248248    }
    249249
     
    254254
    255255# do plugin specific processing of doc_obj
    256 # This gets done for each record found by SplitPlug in marc files.
     256# This gets done for each record found by SplitTextFile in marc files.
    257257sub process {
    258258    my $self = shift (@_);
     
    264264    if (! defined($self->{'metadata_mapping'}))
    265265    {
    266     print $outhandle "MARCPlug: no metadata file! Can't process $file\n";
     266    print $outhandle "MARCPlugin: no metadata file! Can't process $file\n";
    267267    return undef;
    268268    }
    269269
    270     print STDERR "<Processing n='$file' p='MARCPlug'>\n" if ($gli);
    271     print $outhandle "MARCPlug: processing $file\n"
     270    print STDERR "<Processing n='$file' p='MARCPlugin'>\n" if ($gli);
     271    print $outhandle "MARCPlugin: processing $file\n"
    272272    if $self->{'verbosity'} > 1;
    273273
  • gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MARCXMLPlug.pm
     3# MARCXMLPlugin.pm
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2828# well-formedness).
    2929
    30 package MARCXMLPlug;
    31 
    32 use XMLPlug;
     30package MARCXMLPlugin;
     31
     32use ReadXMLFile;
    3333
    3434use strict;
     
    3636
    3737sub BEGIN {
    38     @MARCXMLPlug::ISA = ('XMLPlug');
     38    @MARCXMLPlugin::ISA = ('ReadXMLFile');
    3939}
    4040
    4141my $arguments = [{'name' => "metadata_mapping_file",
    42           'desc' => "{MARCXMLPlug.metadata_mapping_file}",
     42          'desc' => "{MARCXMLPlugin.metadata_mapping_file}",
    4343          'type' => "string",
    4444          'deft' => "marctodc.txt",
    4545          'reqd' => "no" }];
    4646
    47 my $options = { 'name'     => "MARCXMLPlug",
    48         'desc'     => "{MARCXMLPlug.desc}",
     47my $options = { 'name'     => "MARCXMLPlugin",
     48        'desc'     => "{MARCXMLPlugin.desc}",
    4949        'abstract' => "no",
    5050        'inherits' => "yes",
     
    5757    push(@$pluginlist, $class);
    5858
    59     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    60     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    61    
    62     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     60    push(@{$hashArgOptLists->{"OptList"}},$options);
     61   
     62    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    6363   
    6464    $self->{'content'} = "";
     
    222222    if (scalar(@$mm_files)==0)
    223223    {
    224     my $msg = "MARCXMLPlug ERROR: Can't locate mapping file \"" .
     224    my $msg = "MARCXMLPlugin ERROR: Can't locate mapping file \"" .
    225225        $self->{'metadata_mapping_file'} . "\".\n " .
    226226        "    No marc files can be processed.\n";
     
    269269    $self->{'indent'} = 0;
    270270    my $outhandle = $self->{'outhandle'};
    271     print $outhandle "MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    272     print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlug'>\n" if $self->{'gli'};
     271    print $outhandle "MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     272    print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlugin'>\n" if $self->{'gli'};
    273273 
    274274}
     
    310310    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
    311311    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    312     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     312    $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
    313313    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}");
    314314        if ($self->{'cover_image'}) {
     
    319319
    320320    my $outhandle = $self->{'outhandle'};
    321     print $outhandle "Record $self->{'record_count'} - MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     321    print $outhandle "Record $self->{'record_count'} - MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    322322
    323323        $self->{'record_count'}++;
  • gsdl/trunk/perllib/plugins/METSPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # METSPlug.pm
     3# METSPlugin.pm
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    3030
    3131
    32 package METSPlug;
     32package METSPlugin;
    3333
    3434use ghtml;
     
    3737no strict 'refs'; # allow filehandles to be variables and viceversa
    3838
    39 use XMLPlug;
     39use ReadXMLFile;
    4040use XML::XPath;
    4141use XML::XPath::XMLParser;
    4242
    4343sub BEGIN {
    44     @METSPlug::ISA = ('XMLPlug');
     44    @METSPlugin::ISA = ('ReadXMLFile');
    4545}
    4646
    4747my $arguments = [
    4848         ];
    49 my $options = { 'name'     => "METSPlug",
    50         'desc'     => "{METSPlug.desc}",
     49my $options = { 'name'     => "METSPlugin",
     50        'desc'     => "{METSPlugin.desc}",
    5151        'abstract' => "no",
    5252        'inherits' => "yes" };
     
    6464    push(@$pluginlist, $class);
    6565
    66     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    67     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    68 
    69     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     66    # have no args - do we still want this?
     67    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     68    push(@{$hashArgOptLists->{"OptList"}},$options);
     69
     70    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    7071
    7172    $self->{'section'} = "";
     
    117118    }
    118119    my $outhandle = $self->{'outhandle'};
    119     print $outhandle "METSPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    120     print STDERR "<Processing n='$self->{'file'}' p='METSPlug'>\n" if ($self->{'gli'});
     120    print $outhandle "METSPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     121    print STDERR "<Processing n='$self->{'file'}' p='METSPlugin'>\n" if ($self->{'gli'});
    121122
    122123}
  • gsdl/trunk/perllib/plugins/MP3Plugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MP3Plug.pm -- Plugin for MP3 files (MPEG audio layer 3).
     3# MP3Plugin.pm -- Plugin for MP3 files (MPEG audio layer 3).
    44#
    55# A component of the Greenstone digital library software from the New
     
    2626
    2727
    28 package MP3Plug;
    29 
    30 use UnknownPlug;
     28package MP3Plugin;
     29
     30use BasePlugin;
    3131
    3232use strict;
    3333no strict 'refs'; # allow filehandles to be variables and viceversa
     34no strict 'subs';
    3435
    3536use MP3::Info;
     
    3839
    3940sub BEGIN {
    40     @MP3Plug::ISA = ('UnknownPlug');
     41    @MP3Plugin::ISA = ('BasePlugin');
    4142}
    4243
    4344my $arguments =
    4445    [ { 'name' => "process_exp",
    45     'desc' => "{BasPlug.process_exp}",
     46    'desc' => "{BasePlugin.process_exp}",
    4647    'type' => "regexp",
    4748    'deft' => &get_default_process_exp(),
    4849    'reqd' => "no" },
    4950      { 'name' => "assoc_images",
    50         'desc' => "{MP3Plug.assoc_images}",
     51        'desc' => "{MP3Plugin.assoc_images}",
    5152        'type' => "flag",
    5253        'deft' => "",
    5354        'reqd' => "no" },
    5455      { 'name' => "applet_metadata",
    55     'desc' => "{MP3Plug.applet_metadata}",
     56    'desc' => "{MP3Plugin.applet_metadata}",
    5657    'type' => "flag",
    5758    'deft' => "" },
    5859      { 'name' => "metadata_fields",
    59     'desc' => "{MP3Plug.metadata_fields}",
     60    'desc' => "{MP3Plugin.metadata_fields}",
    6061    'type' => "string",
    6162    'deft' => "Title,Artist,Genre" } ];
    6263
    63 my $options = { 'name'     => "MP3Plug",
    64         'desc'     => "{MP3Plug.desc}",
     64my $options = { 'name'     => "MP3Plugin",
     65        'desc'     => "{MP3Plugin.desc}",
    6566        'abstract' => "no",
    6667        'inherits' => "yes",
     
    7273    push(@$pluginlist, $class);
    7374
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    76 
    77     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
     75    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     76    push(@{$hashArgOptLists->{"OptList"}},$options);
     77
     78    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7879
    7980    return bless $self, $class;
     
    8283sub get_default_process_exp {
    8384    return q^(?i)\.mp3$^;
     85}
     86
     87sub process {
     88    my $self = shift (@_);
     89    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     90
     91    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     92    # do something about OIDtype so no hashing
     93     
     94    # old code was in effect the following.
     95    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
     96    $doc_obj->set_OIDtype ("incremental");
     97    }
     98
     99
     100    # associate the file with the document
     101    if ($self->associate_mp3_file($filename_full_path, $filename_no_path, $doc_obj) != 1)
     102    {
     103    print "MP3Plugin: couldn't process \"$filename_full_path\"\n";
     104    return 0;
     105    }
     106   
     107    #whats this crap?
     108   my $text = &gsprintf::lookup_string("{BasePlugin.dummy_text}",1);
     109    if ($self->{'assoc_images'}) {
     110    $text .= "[img1]<br>";
     111    $text .= "[img2]<br>";
     112    }
     113    $doc_obj->add_utf8_text($doc_obj->get_top_section(), $text);
     114
    84115}
    85116
     
    155186
    156187    $doc_obj->associate_file($filename, $dst_file, $mime_type, $section);
    157     $doc_obj->add_metadata ($section, "Source", $file);
    158188    $doc_obj->add_metadata ($section, $assoc_field, $assoc_name);
    159189    $doc_obj->add_metadata ($section, "srcurl", $assoc_url);
     
    295325
    296326
    297 
    298 # The MP3Plug read() function is based on UnknownPlug read().  This
    299 # function does all the right things to make general options work for
    300 # a given plugin.
    301 
    302 my $mp3_doc_count = 0; ## is this used anywhere now !!???
    303 
    304 sub read {
    305     my $self = shift (@_);
    306     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    307 
    308     my $outhandle = $self->{'outhandle'};
    309 
    310     #check for associate_ext, blocking etc
    311     my ($block_status,$filename) = $self->read_block(@_);   
    312     return $block_status if ((!defined $block_status) || ($block_status==0));
    313 
    314     print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli);
    315     print $outhandle "MP3Plug processing \"$filename\"\n"
    316         if $self->{'verbosity'} > 1;
    317 
    318     #if there's a leading directory name, eat it...
    319     $file =~ s/^.*[\/\\]//;
    320    
    321     # create a new document
    322     my $doc_obj = new doc ($filename, "indexed_doc");
    323     $mp3_doc_count++;
    324    
    325 ##    $doc_obj->set_OIDtype ($processor->{'OIDtype'}); 
    326     if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
    327     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    328     }
    329     else {
    330     $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
    331     }
    332     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    333     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));
    334 
    335     # associate the file with the document
    336     if (associate_mp3_file($self, $filename, $file, $doc_obj) != 1)
    337     {
    338     print "MP3Plug: couldn't process \"$filename\"\n";
    339     return 0;
    340     }
    341 
    342     #create an empty text string so we don't break downstream plugins
    343     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    344     if ($self->{'assoc_images'}) {
    345     $text .= "[img1]<br>";
    346     $text .= "[img2]<br>";
    347     }
    348     # include any metadata passed in from previous plugins
    349     my $section = $doc_obj->get_top_section();
    350     $self->extra_metadata ($doc_obj, $section, $metadata);
    351 
    352     $self->title_fallback($doc_obj,$section,$file);
    353 
    354     # do plugin specific processing of doc_obj
    355     return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,
    356                          $file, $metadata, $doc_obj));
    357 
    358     # do any automatic metadata extraction
    359     $self->auto_extract_metadata ($doc_obj);
    360 
    361     # add an OID
    362     $doc_obj->set_OID();
    363     $doc_obj->add_utf8_text($section, $text);
    364 
    365     # process the document
    366     $processor->process($doc_obj);
    367 
    368     $self->{'num_processed'} ++;
    369     return 1;
    370 }
    371 
    372 
     327# we want to use mp3:Title if its there, otherwise we'll use BasePlugin method
    373328sub title_fallback
    374329{
     
    382337    }
    383338    else {
    384         &BasPlug::title_fallback($self, $doc_obj, $section, $file);
     339        $self->BasePlugin::title_fallback($doc_obj, $section, $file);
    385340    }
    386341    }
  • gsdl/trunk/perllib/plugins/MediaWikiPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MediaWikiPlug.pm -- html plugin with extra facilities for wiki page
     3# MediaWikiPlugin.pm -- html plugin with extra facilities for wiki page
    44#
    55# A component of the Greenstone digital library software
     
    3131# collection's Home page.
    3232
    33 package MediaWikiPlug;
    34 
    35 use HTMLPlug;
    36 # use ImagePlug;
     33package MediaWikiPlugin;
     34
     35use HTMLPlugin;
     36# use ImagePlugin;
    3737# use File::Copy;
    3838use unicode;
     
    4343
    4444sub BEGIN {
    45     @MediaWikiPlug::ISA = ('HTMLPlug');       
     45    @MediaWikiPlugin::ISA = ('HTMLPlugin');       
    4646}
    4747
     
    5050     # show the table of contents on collection's home page
    5151     { 'name' => "show_toc",
    52        'desc' => "{MediaWikiPlug.show_toc}",
     52       'desc' => "{MediaWikiPlugin.show_toc}",
    5353       'type' => "flag",
    5454       'reqd' => "no"},
    5555     # set to delete the table of contents section on each MediaWiki page
    5656     { 'name' => "delete_toc",
    57        'desc' => "{MediaWikiPlug.delete_toc}",
     57       'desc' => "{MediaWikiPlugin.delete_toc}",
    5858       'type' => "flag",
    5959       'reqd' => "no"},
    6060     # regexp to match the table of contents
    6161     { 'name' => "toc_exp",
    62        'desc' => "{MediaWikiPlug.toc_exp}",
     62       'desc' => "{MediaWikiPlugin.toc_exp}",
    6363       'type' => "regexp",
    6464       'reqd' => "no",
     
    6666     # set to delete the navigation section
    6767     { 'name' => "delete_nav",
    68        'desc' => "{MediaWikiPlug.delete_nav}",
     68       'desc' => "{MediaWikiPlugin.delete_nav}",
    6969       'type' => "flag",
    7070       'reqd' => "no",
     
    7272     # regexp to match the navigation section   
    7373     { 'name' => "nav_div_exp",
    74        'desc' => "{MediaWikiPlug.nav_div_exp}",
     74       'desc' => "{MediaWikiPlugin.nav_div_exp}",
    7575       'type' => "regexp",
    7676       'reqd' => "no",
     
    7878     # set to delete the searchbox section
    7979     { 'name' => "delete_searchbox",
    80        'desc' => "{MediaWikiPlug.delete_searchbox}",
     80       'desc' => "{MediaWikiPlugin.delete_searchbox}",
    8181       'type' => "flag",
    8282       'reqd' => "no",
     
    8484     # regexp to match the searchbox section
    8585     { 'name' => "searchbox_div_exp",
    86        'desc' => "{MediaWikiPlug.searchbox_div_exp}",
     86       'desc' => "{MediaWikiPlugin.searchbox_div_exp}",
    8787       'type' => "regexp",
    8888       'reqd' => "no",
    8989       'deft' => "<div([^>]*)id=(\\\"|')p-search(\\\"|')(.|\\n)*?<\/div>"},     
    9090     # regexp to match title suffix
    91      # can't use the title_sub option in HTMLPlug instead
     91     # can't use the title_sub option in HTMLPlugin instead
    9292     # because title_sub always matches from the begining     
    9393     { 'name' => "remove_title_suffix_exp",
    94        'desc' => "{MediaWikiPlug.remove_title_suffix_exp}",
     94       'desc' => "{MediaWikiPlugin.remove_title_suffix_exp}",
    9595       'type' => "regexp",
    9696       'reqd' => "no",
     
    9898     ];
    9999
    100 my $options = { 'name'     => "MediaWikiPlug",
    101         'desc'     => "{MediaWikiPlug.desc}",
     100my $options = { 'name'     => "MediaWikiPlugin",
     101        'desc'     => "{MediaWikiPlugin.desc}",
    102102        'abstract' => "no",
    103103        'inherits' => "yes",
     
    109109    push(@$pluginlist, $class);
    110110   
    111     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    112     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    113    
    114     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);   
     111    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     112    push(@{$hashArgOptLists->{"OptList"}},$options);
     113   
     114    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);   
    115115    return bless $self, $class;
    116116}
     
    123123    my $outhandle = $self->{'outhandle'};
    124124
    125     print $outhandle "MediaWikiPlug: processing $file\n" if $self->{'verbosity'} > 1;
     125    print $outhandle "MediaWikiPlugin: processing $file\n" if $self->{'verbosity'} > 1;
    126126         
    127127    my @head_and_body = split(/<body/i,$$textref);
     
    205205                                        # linux: /research/lh92/greenstone/greenstone2.73/collect/wiki/import
    206206    # $file use different delimiters : forward slash for linux; backward slash for windows
    207     # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlug.html   
     207    # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlugin.html   
    208208                                        # linux: greenstone.sourceforge.net/wiki/index.php/Using_GreenstoneWiki.html
    209209   
     
    618618        $value = $1;
    619619        if (!defined $value || !defined $tag){
    620         #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
     620        #print $outhandle "MediaWikiPlugin: can't find VALUE in \"$tag\"\n";
    621621        next;
    622622        } else {
  • gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MetadataCSVPlug.pm -- A plugin for metadata in comma-separated value format
     3# MetadataCSVPlugin.pm -- A plugin for metadata in comma-separated value format
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 package MetadataCSVPlug;
     27package MetadataCSVPlugin;
    2828
    2929
    30 use BasPlug;
     30use BasePlugin;
    3131use strict;
    3232
    3333
    3434sub BEGIN {
    35     @MetadataCSVPlug::ISA = ('BasPlug');
     35    @MetadataCSVPlugin::ISA = ('BasePlugin');
    3636}
    3737
     
    3939my $arguments =
    4040    [ { 'name' => "block_exp",
    41     'desc' => "{BasPlug.block_exp}",
     41    'desc' => "{BasePlugin.block_exp}",
    4242    'type' => "regexp",
    4343    'reqd' => "no",
     
    4545
    4646
    47 my $options = { 'name'     => "MetadataCSVPlug",
    48         'desc'     => "{MetadataCSVPlug.desc}",
     47my $options = { 'name'     => "MetadataCSVPlugin",
     48        'desc'     => "{MetadataCSVPlugin.desc}",
    4949        'abstract' => "no",
    5050        'inherits' => "yes",
     
    5858    push(@$pluginlist, $class);
    5959
    60     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    61     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     60    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     61    push(@{$hashArgOptLists->{"OptList"}},$options);
    6262
    63     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    6464
    6565    return bless $self, $class;
     
    7474
    7575
    76 # We don't want any other plugins to see .csv files
     76# Used by BasePlugin read to block this file
    7777sub get_default_block_exp
    7878{
     
    9191    return undef;
    9292    }
    93     print STDERR "\n<Processing n='$file' p='MetadataCSVPlug'>\n" if ($gli);
    94     print STDERR "MetadataCSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
     93    print STDERR "\n<Processing n='$file' p='MetadataCSVPlugin'>\n" if ($gli);
     94    print STDERR "MetadataCSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
    9595
    9696    # Read the CSV file to get the metadata
     
    9898    open(CSV_FILE, "$filename");
    9999    my $csv_file_reader = new multiread();
    100     $csv_file_reader->set_handle('MetadataCSVPlug::CSV_FILE');
     100    $csv_file_reader->set_handle('MetadataCSVPlugin::CSV_FILE');
    101101    $csv_file_reader->read_file(\$csv_file_content);
    102102    close(CSV_FILE);
     
    118118
    119119    if (!$found_filename_field) {
    120     print STDERR "MetadataCSVPlug Error: No Filename field in CSV file: $filename\n";
     120    print STDERR "MetadataCSVPlugin Error: No Filename field in CSV file: $filename\n";
    121121    return -1; # error
    122122    }
     
    153153        # The line must be formatted incorrectly
    154154        else {
    155         print STDERR "MetadataCSVPlug Error: Badly formatted CSV line: $csv_line.\n";
     155        print STDERR "MetadataCSVPlugin Error: Badly formatted CSV line: $csv_line.\n";
    156156        last;
    157157        }
     
    163163    my $csv_line_filename_array = $csv_line_metadata{"Filename"};
    164164    if (!defined $csv_line_filename_array) {
    165         print STDERR "MetadataCSVPlug Error: No Filename metadata in CSV line: $orig_csv_line\n";
     165        print STDERR "MetadataCSVPlugin Error: No Filename metadata in CSV line: $orig_csv_line\n";
    166166        next;
    167167    }
  • gsdl/trunk/perllib/plugins/MetadataPass.pm

    r12970 r15872  
    2929no strict 'refs'; # allow filehandles to be variables and viceversa
    3030
    31 use BasPlug; # uses BasPlug, but is not inherited
     31use PrintInfo; # uses PrintInfo, but is not inherited
    3232
    3333
     
    5555sub print_xml_usage
    5656{
    57     BasPlug::print_xml_usage(@_);
     57    PrintInfo::print_xml_usage(@_);
    5858}
    5959
    6060sub print_xml
    6161{
    62     BasPlug::print_xml(@_);
     62    PrintInfo::print_xml(@_);
    6363}
    6464
    6565sub set_incremental
    6666{
    67     BasPlug::set_incremental(@_);
     67    PrintInfo::set_incremental(@_);
    6868}
    6969
  • gsdl/trunk/perllib/plugins/MetadataXMLPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # MetadataXMLPlug.pm --
     3# MetadataXMLPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2424###########################################################################
    2525
    26 # MetadataXMLPlug process metadata.xml files in a collection
     26# MetadataXMLPlugin process metadata.xml files in a collection
    2727
    2828# Here's an example of a metadata file that uses three FileSet structures
     
    8585# metadata is explictly overridden later in the import.
    8686
    87 package MetadataXMLPlug;
     87package MetadataXMLPlugin;
    8888
    8989use strict;
    9090no strict 'refs';
    91 use BasPlug;
     91use BasePlugin;
    9292use util;
    9393use metadatautil;
    9494
    9595sub BEGIN {
    96     @MetadataXMLPlug::ISA = ('BasPlug');
     96    @MetadataXMLPlugin::ISA = ('BasePlugin');
    9797    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
    9898}
     
    102102my $arguments = [
    103103      { 'name' => "block_exp",
    104     'desc' => "{BasPlug.block_exp}",
     104    'desc' => "{BasePlugin.block_exp}",
    105105    'type' => "regexp",
    106106    'reqd' => "no",
     
    108108];
    109109
    110 my $options = { 'name'     => "MetadataXMLPlug",
    111         'desc'     => "{MetadataXMLPlug.desc}",
     110my $options = { 'name'     => "MetadataXMLPlugin",
     111        'desc'     => "{MetadataXMLPlugin.desc}",
    112112        'abstract' => "no",
    113113        'inherits' => "yes",
     
    121121    push(@$pluginlist, $class);
    122122
    123     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    124     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    125 
    126     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     123    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     124    push(@{$hashArgOptLists->{"OptList"}},$options);
     125
     126    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    127127
    128128    if ($self->{'info_only'}) {
     
    180180    }
    181181
    182     print STDERR "\n<Processing n='$file' p='MetadataXMLPlug'>\n" if ($gli);
    183     print STDERR "MetadataXMLPlug: processing $file\n" if ($self->{'verbosity'})> 1;
     182    print STDERR "\n<Processing n='$file' p='MetadataXMLPlugin'>\n" if ($gli);
     183    print STDERR "MetadataXMLPlugin: processing $file\n" if ($self->{'verbosity'})> 1;
    184184
    185185    $self->{'metadataref'} = $extrametadata;
  • gsdl/trunk/perllib/plugins/NulPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # NULPlug.pm -- Plugin for dummy (.nul) files
     3# NulPlugin.pm -- Plugin for dummy (.nul) files
    44#
    55# A component of the Greenstone digital library software from the New
     
    2626###########################################################################
    2727
    28 # NULPlug - a plugin for dummy files
     28# NulPlugin - a plugin for dummy files
    2929
    3030# This is a simple Plugin for importing dummy files, along with
     
    3535# databases
    3636
    37 package NULPlug;
     37package NulPlugin;
    3838
    39 use BasPlug;
     39use BasePlugin;
    4040
    4141use strict;
     
    4343
    4444sub BEGIN {
    45     @NULPlug::ISA = ('BasPlug');
     45    @NulPlugin::ISA = ('BasePlugin');
    4646}
    4747
    4848my $arguments =
    4949    [ { 'name' => "process_exp",
    50     'desc' => "{BasPlug.process_exp}",
     50    'desc' => "{BasePlugin.process_exp}",
    5151    'type' => "regexp",
    5252    'reqd' => "no",
    5353    'deft' => &get_default_process_exp() },
    5454      { 'name' => "assoc_field",
    55     'desc' => "{NULPlug.assoc_field}",
     55    'desc' => "{NulPlugin.assoc_field}",
    5656    'type' => "string",
    57     'deft' => "",
     57    'deft' => "null_file",
    5858    'reqd' => "no" },
    5959      { 'name' => "add_metadata_as_text",
    60     'desc' => "{NULPlug.add_metadata_as_text}",
     60    'desc' => "{NulPlugin.add_metadata_as_text}",
    6161    'type' => "flag" },
    6262      { 'name' => "remove_namespace_for_text",
    63     'desc' => "{NULPlug.remove_namespace_for_text}",
     63    'desc' => "{NulPlugin.remove_namespace_for_text}",
    6464    'type' => "flag" }
    6565      ];
    6666
    67 my $options = { 'name'     => "NULPlug",
    68         'desc'     => "{NULPlug.desc}",
     67my $options = { 'name'     => "NulPlugin",
     68        'desc'     => "{NulPlugin.desc}",
    6969        'abstract' => "no",
    7070        'inherits' => "yes",
     
    7777    push(@$pluginlist, $class);
    7878
    79     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    80     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     80    push(@{$hashArgOptLists->{"OptList"}},$options);
    8181
    82     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     82    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    8383   
    8484    return bless $self, $class;
     
    8989}
    9090
    91 # The NULPlug read() function. This function does all the right
    92 # things to make general options work for a given plugin.  NULPlug
    93 # overrides read() because there is no need to read the actual text of
    94 # the file in, because the contents of the file is not text...
    95 #
    96 #
    97 # Return number of files processed, undef if can't process
    98 #
    99 # Note that $base_dir might be "" and that $file might include directories
     91# NulPlugin specific processing of doc_obj.
     92sub process {
     93    my $self = shift (@_);
     94    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     95   
     96    my $topsection = $doc_obj->get_top_section();
     97     
     98    my $assoc_field = $self->{'assoc_field'}; # || "null_file"; TODO, check this
     99    $doc_obj->add_metadata ($topsection, $assoc_field, $file);
    100100
    101 sub read {
    102     my $self = shift (@_);
    103     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    104 
    105     my $outhandle = $self->{'outhandle'};
    106 
    107     #check for associate_ext, blocking etc
    108     my ($block_status,$filename) = $self->read_block(@_);   
    109     return $block_status if ((!defined $block_status) || ($block_status==0));
    110 
    111     print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli);
    112     print $outhandle "NULPlug processing \"$filename\"\n"
    113         if $self->{'verbosity'} > 1;
    114 
    115     #if there's a leading directory name, eat it...
    116     $file =~ s/^.*[\/\\]//;
    117    
    118     # create a new document
    119     my $doc_obj = new doc ($filename, "indexed_doc");
    120     my $top_section = $doc_obj->get_top_section();
    121 
    122     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});   
    123     #$doc_obj->set_OIDtype ("incremental");
    124     $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    125     $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins
    126 
    127     $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));
    128 
    129     # the metadata NoText is used to suppress the dummy text 'This document has no text.'
    130     $doc_obj->add_metadata ($top_section, "NoText",    "1");
    131 
    132     my $assoc_field = $self->{'assoc_field'} || "null_file";
    133     $doc_obj->add_metadata ($top_section, $assoc_field, $file);
    134    
    135      if ($self->{'cover_image'}) {
    136     $self->associate_cover_image($doc_obj, $filename);
    137     }
    138 
    139     # include any metadata passed in from previous plugins
    140     my $section = $doc_obj->get_top_section();
    141     $self->extra_metadata ($doc_obj, $section, $metadata);
    142    
    143101    # format the metadata passed in (presumably from metadata.xml)
    144102    my $text = "";
    145103    if ($self->{'add_metadata_as_text'}) {
    146104    $text = &metadatautil::format_metadata_as_table($metadata, $self->{'remove_namespace_for_text'});
     105    $doc_obj->add_utf8_text($topsection, $text);
    147106    } else {
    148     #create an empty text string so we don't break downstream plugins
    149     $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
     107    $self->add_dummy_text($doc_obj, $topsection);
    150108    }
    151     $self->title_fallback($doc_obj,$section,$file);
    152    
    153     # do plugin specific processing of doc_obj
    154     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
    155     print STDERR "<ProcessingError n='$file'>\n" if ($gli);
    156     return -1;
    157     }
    158 
    159     # do any automatic metadata extraction
    160     $self->auto_extract_metadata ($doc_obj);
    161 
    162     # add an OID
    163     $doc_obj->set_OID();
    164     $doc_obj->add_utf8_text($section, $text);
    165    
    166     # process the document
    167     $processor->process($doc_obj);
    168 
    169     $self->{'num_processed'} ++;
    170     return 1;
    171 }
    172 
    173 
    174 # NULPlug processing of doc_obj.  In practice we don't need to do
    175 # anything here because the read function takes care of everything.
    176 
    177 sub process {
    178     my $self = shift (@_);
    179     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    180     my $outhandle = $self->{'outhandle'};
    181109   
    182110    return 1;
  • gsdl/trunk/perllib/plugins/OAIPlugin.pm

    r15865 r15872  
    2525###########################################################################
    2626
    27 package OAIPlug;
    28 
    29 use BasPlug;
     27package OAIPlugin;
     28
    3029use unicode;
    3130use util;
     
    3433no strict 'refs'; # allow filehandles to be variables and viceversa
    3534
    36 use XMLPlug;
     35use ReadXMLFile;
    3736
    3837sub BEGIN {
    39     @OAIPlug::ISA = ('XMLPlug');
     38    @OAIPlugin::ISA = ('ReadXMLFile');
    4039}
    4140
     
    4948      ];
    5049
    51 my $options = { 'name'     => "OAIPlug",
    52         'desc'     => "{OAIPlug.desc}",
     50my $options = { 'name'     => "OAIPlugin",
     51        'desc'     => "{OAIPlugin.desc}",
    5352        'abstract' => "no",
    5453        'inherits' => "yes",
     
    6160    push(@$pluginlist, $class);
    6261
    63     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    64     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    65 
    66     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     62    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     63    push(@{$hashArgOptLists->{"OptList"}},$options);
     64
     65    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    6766
    6867    return bless $self, $class;
     
    9897
    9998    my $outhandle = $self->{'outhandle'};
    100     print $outhandle "OAIPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    101     print STDERR "<Processing n='$self->{'file'}' p='OAIPlug'>\n" if $self->{'gli'};
     99    print $outhandle "OAIPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     100    print STDERR "<Processing n='$self->{'file'}' p='OAIPlugin'>\n" if $self->{'gli'};
    102101
    103102}
     
    199198    if ($srcdoc_exists)
    200199    {
    201         print $outhandle "OAIPlug: passing metadata on to $url_array->[0]\n"
     200        print $outhandle "OAIPlugin: passing metadata on to $url_array->[0]\n"
    202201        if ($self->{'verbosity'}>1);
    203202       
     
    265264    my $outhandle = $self->{'outhandle'};
    266265
    267     print STDERR "<Processing n='$file' p='OAIPlug'>\n" if ($gli);
    268     print $outhandle "OAIPlug: processing $file\n"
     266    print STDERR "<Processing n='$file' p='OAIPlugin'>\n" if ($gli);
     267    print $outhandle "OAIPlugin: processing $file\n"
    269268    if $self->{'verbosity'} > 1;
    270269
     
    411410
    412411    if ($top_level_prefix !~ /dc$/) {
    413         print $outhandle "Warning: OAIPlug currently only designed for Dublin Core (or variant) metadata\n";
     412        print $outhandle "Warning: OAIPlugin currently only designed for Dublin Core (or variant) metadata\n";
    414413        print $outhandle "         This recorded metadata section '$top_level_prefix' does not appear to match.\n";
    415414        print $outhandle "         Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n";
  • gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm

    r15865 r15872  
    2727###########################################################################
    2828
    29 package OggVorbisPlug;
     29package OggVorbisPlugin;
    3030
    3131
    32 use UnknownPlug;
     32use BasePlugin;
    3333use Ogg::Vorbis::Header::PurePerl;
    3434
    3535use strict;
    3636no strict 'refs'; # allow filehandles to be variables and viceversa
     37no strict 'subs';
    3738
    3839sub BEGIN {
    39     @OggVorbisPlug::ISA = ('UnknownPlug');
     40    @OggVorbisPlugin::ISA = ('BasePlugin');
    4041}
    4142
     
    4344my $arguments =
    4445    [ { 'name' => "process_exp",
    45     'desc' => "{BasPlug.process_exp}",
     46    'desc' => "{BasePlugin.process_exp}",
    4647    'type' => "string",
    4748    'deft' => &get_default_process_exp(),
    4849    'reqd' => "no" },
    4950      { 'name' => "add_technical_metadata",
    50     'desc' => "{OggVorbisPlug.add_technical_metadata}",
     51    'desc' => "{OggVorbisPlugin.add_technical_metadata}",
    5152    'type' => "flag",
    5253    'deft' => "" } ];
    5354
    54 my $options = { 'name'     => "OggVorbisPlug",
    55         'desc'     => "{OggVorbisPlug.desc}",
     55my $options = { 'name'     => "OggVorbisPlugin",
     56        'desc'     => "{OggVorbisPlugin.desc}",
    5657        'inherits' => "yes",
    5758        'abstract' => "no",
     
    7273    push(@$pluginlist, $class);
    7374   
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     75    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     76    push(@{$hashArgOptLists->{"OptList"}},$options);
    7677   
    77     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
     78    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7879   
    7980    return bless $self, $class;
    8081}
    8182
    82 
    83 sub read
     83sub process
    8484{
    8585    my $self = shift (@_);
    86     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     86    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    8787
    88     my $outhandle = $self->{'outhandle'};
     88    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     89    # do something about OIDtype so no hashing
     90     
     91    # old code was in effect the following.
     92    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
     93    $doc_obj->set_OIDtype ("incremental");
     94    }
    8995
    90     #check process and block exps, smart block, etc
    91     my ($block_status,$filename) = $self->read_block(@_);   
    92     return $block_status if ((!defined $block_status) || ($block_status==0));
    93 
    94      # Report that we're processing the file
    95     print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli);
    96     print $outhandle "OggVorbisPlug: processing $file\n"
    97     if ($self->{'verbosity'}) > 1;
    98    
    99     # file is just the name of the file (need to get rid off any leading directory names)
    100     $file =~ s/^.*[\/\\]//;
    101 
    102     # create a new index document
    103     my $doc_obj = new doc ($filename, "indexed_doc");
    104     if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
    105     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    106     }
    107     else {
    108     $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
    109     }
    110     my $section = $doc_obj->get_top_section();
    111    
    112     # replace spaces in filename with %20 in url for metadata entry
    113     my $url = $file;
    114     ##$url =~ s/ /%20/g;
    115 
    116     # Source (filename) to be consistent with other plugins
    117     $doc_obj->add_metadata ($section, "Source", $url);
    118 
     96    my $top_section = $doc_obj->get_top_section();
    11997    # Extract metadata
    120     my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename);
     98    my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename_full_path);
    12199
    122100    # Comments added to the file
     
    128106    {
    129107        if (defined $value && $value ne "") {
    130         $doc_obj->add_metadata($section, $keytc, $value);
     108        $doc_obj->add_metadata($top_section, $keytc, $value);
    131109        }
    132110    }
     
    141119        my $value = $ogg->info->{$key};
    142120        if (defined $value && $value ne "") {
    143         $doc_obj->add_metadata($section, $keytc, $value);
     121        $doc_obj->add_metadata($top_section, $keytc, $value);
    144122        }
    145123    }
    146124    }
    147125
    148     # srclink
    149     $doc_obj->add_metadata ($section, "FileFormat", "OggVorbis");
    150     $doc_obj->add_metadata ($section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
    151     $doc_obj->add_metadata ($section, "/srclink", "</a>");
     126    $doc_obj->add_metadata ($top_section, "FileFormat", "OggVorbis");
     127    $doc_obj->add_metadata ($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
     128    $doc_obj->add_metadata ($top_section, "/srclink", "</a>");
    152129    # srcicon (need to include "iogg.gif" in the greenstone images directory
    153     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>");
     130    $doc_obj->add_metadata ($top_section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>");
    154131
    155132    # add NoText metadata which can be used to suppress the dummy text
    156     $doc_obj->add_metadata ($section, "NoText", "1");
     133    $doc_obj->add_metadata ($top_section, "NoText", "1");
    157134
    158135    # Add the actual file as an associated file
    159     $doc_obj->associate_file($filename, $file, "VORBIS", $section);
     136    $doc_obj->associate_file($filename_full_path, $filename_no_path, "VORBIS", $top_section);
    160137
    161     # Create an empty text string so we don't break downstream plugins
    162      my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    163 
    164     # include any metadata passed in from previous plugins
    165     $self->extra_metadata ($doc_obj, $section, $metadata);
    166 
    167     # do plugin specific processing of doc_obj
    168     return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
    169 
    170     # do any automatic metadata extraction
    171     $self->auto_extract_metadata($doc_obj);
    172 
    173     # add an OID
    174     $doc_obj->set_OID();
    175     $doc_obj->add_utf8_text($section, $text);
    176 
    177     # process the document
    178     $processor->process($doc_obj);
    179 
    180     $self->{'num_processed'}++;
    181     return 1;
    182138}
    183139
  • gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # OpenDocumentPlug.pm -- The Open Document plugin
     3# OpenDocumentPlugin.pm -- The Open Document plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    3333#This basically extracts any text out of the document, but not much else.
    3434
    35 package OpenDocumentPlug;
     35# this inherits ReadXMLFile, and therefore offers -xslt option, but does
     36# nothing with it.
     37
     38package OpenDocumentPlugin;
    3639
    3740use strict;
    3841no strict 'refs'; # allow filehandles to be variables and viceversa
    3942
    40 use XMLPlug;
     43use ReadXMLFile;
    4144use XML::XPath;
    4245use XML::XPath::XMLParser;
     
    4649
    4750sub BEGIN {
    48     @OpenDocumentPlug::ISA = ('XMLPlug');
    49 }
    50 
    51 
    52 #our @filesAssoc = ();
     51    @OpenDocumentPlugin::ISA = ('ReadXMLFile');
     52}
     53
    5354our @filesProcess = ( "content.xml" , "meta.xml" );
    54 #XML plug has this so we need it too
    55 our ($self);
    5655
    5756my $arguments = [
     
    6261         ];
    6362
    64 my $options = { 'name'     => "OpenDocumentPlug",
    65         'desc'     => "{OpenDocumentPlug.desc}",
     63my $options = { 'name'     => "OpenDocumentPlugin",
     64        'desc'     => "{OpenDocumentPlugin.desc}",
    6665        'abstract' => "no",
    6766        'inherits' => "yes",
     
    7574    push(@$pluginlist, $class);
    7675
    77     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    78     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    79 
    80     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     76    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     77    push(@{$hashArgOptLists->{"OptList"}},$options);
     78
     79    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    8180
    8281    $self->{'section'} = "";
     
    160159
    161160sub read {
    162     # $self must be global to work with XML callback routines.
    163     $self = shift (@_); 
     161    my $self = shift (@_); 
    164162   
    165163    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     
    188186   
    189187    $self->unzip ("\"$file_only\"");
    190     foreach my $xmlFile (@OpenDocumentPlug::filesProcess) {
     188    foreach my $xmlFile (@OpenDocumentPlugin::filesProcess) {
    191189        if (-e $xmlFile) {
    192         $self->parse_file($xmlFile);
     190        $self->{'parser'}->parsefile($xmlFile);
    193191        }
    194192    }
     
    203201
    204202    # parsefile may either croak somewhere in XML::Parser (e.g. because
    205     # the document is not well formed) or die somewhere in XMLPlug or a
     203    # the document is not well formed) or die somewhere in ReadXMLFile or a
    206204    # derived plugin (e.g. because we're attempting to process a
    207205    # document whose DOCTYPE is not meant for this plugin). For the
     
    255253    $doc_obj->add_utf8_metadata ("", "srcicon",  "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[archivedir]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">");
    256254    $doc_obj->add_utf8_metadata ("", "/srclink", "</a>");
    257     $doc_obj->add_utf8_metadata ("", "Source", &ghtml::dmsafe($file_only));
    258     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));
     255    $self->set_Source_metadata($doc_obj, $file_only);
     256     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));
    259257     
    260258    # include any metadata passed in from previous plugins
     
    268266   
    269267    # add an OID
    270     $doc_obj->set_OID();
     268    $self->add_OID($doc_obj);
    271269   
    272270    $doc_obj->add_utf8_metadata("", "Plugin", "$self->{'plugin_type'}");
  • gsdl/trunk/perllib/plugins/PDFPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # PDFPlug.pm -- reasonably with-it pdf plugin
     3# PDFPlugin.pm -- reasonably with-it pdf plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2323#
    2424###########################################################################
    25 package PDFPlug;
    26 
    27 use ConvertToPlug;
     25package PDFPlugin;
     26
     27use ConvertBinaryFile;
     28use ReadTextFile;
    2829use unicode;
    2930use strict;
     
    3132
    3233sub BEGIN {
    33     @PDFPlug::ISA = ('ConvertToPlug');
     34    @PDFPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
    3435}
    3536
    3637my $convert_to_list =
    3738    [ { 'name' => "auto",
    38     'desc' => "{ConvertToPlug.convert_to.auto}" },
     39    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    3940      { 'name' => "html",
    40     'desc' => "{ConvertToPlug.convert_to.html}" },
     41    'desc' => "{ConvertBinaryFile.convert_to.html}" },
    4142      { 'name' => "text",
    42     'desc' => "{ConvertToPlug.convert_to.text}" },
     43    'desc' => "{ConvertBinaryFile.convert_to.text}" },
    4344      { 'name' => "pagedimg_jpg",
    44     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}"},
     45    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}"},
    4546      { 'name' => "pagedimg_gif",
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}"},
     47    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}"},
    4748      { 'name' => "pagedimg_png",
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}"},
     49    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}"},
    4950      ];
    5051
     
    5354    [
    5455     { 'name' => "convert_to",
    55        'desc' => "{ConvertToPlug.convert_to}",
     56       'desc' => "{ConvertBinaryFile.convert_to}",
    5657       'type' => "enum",
    5758       'reqd' => "yes",
     
    5960       'deft' => "html" },   
    6061     { 'name' => "process_exp",
    61        'desc' => "{BasPlug.process_exp}",
     62       'desc' => "{BasePlugin.process_exp}",
    6263       'type' => "regexp",
    6364       'deft' => &get_default_process_exp(),
    6465       'reqd' => "no" },
    6566     { 'name' => "block_exp",
    66        'desc' => "{BasPlug.block_exp}",
     67       'desc' => "{BasePlugin.block_exp}",
    6768       'type' => "regexp",
    6869       'deft' => &get_default_block_exp() },
    6970     { 'name' => "metadata_fields",
    70        'desc' => "{HTMLPlug.metadata_fields}",
     71       'desc' => "{HTMLPlugin.metadata_fields}",
    7172       'type' => "string",
    7273       'deft' => "" },
    7374     { 'name' => "noimages",
    74        'desc' => "{PDFPlug.noimages}",
     75       'desc' => "{PDFPlugin.noimages}",
    7576       'type' => "flag" },
    7677     { 'name' => "allowimagesonly",
    77        'desc' => "{PDFPlug.allowimagesonly}",
     78       'desc' => "{PDFPlugin.allowimagesonly}",
    7879       'type' => "flag" },
    7980     { 'name' => "complex",
    80        'desc' => "{PDFPlug.complex}",
     81       'desc' => "{PDFPlugin.complex}",
    8182       'type' => "flag" },
    8283     { 'name' => "nohidden",
    83        'desc' => "{PDFPlug.nohidden}",
     84       'desc' => "{PDFPlugin.nohidden}",
    8485       'type' => "flag" },
    8586     { 'name' => "zoom",
    86        'desc' => "{PDFPlug.zoom}",
     87       'desc' => "{PDFPlugin.zoom}",
    8788       'deft' => "2",
    8889       'range' => "1,3", # actually the range is 0.5-3
    8990       'type' => "int" },
    9091     { 'name' => "use_sections",
    91        'desc' => "{PDFPlug.use_sections}",
     92       'desc' => "{PDFPlugin.use_sections}",
    9293       'type' => "flag" },
    9394     { 'name' => "description_tags",
    94        'desc' => "{HTMLPlug.description_tags}",
     95       'desc' => "{HTMLPlugin.description_tags}",
    9596       'type' => "flag" }
    9697     ];
    9798
    98 my $options = { 'name'     => "PDFPlug",
    99         'desc'     => "{PDFPlug.desc}",
     99my $options = { 'name'     => "PDFPlugin",
     100        'desc'     => "{PDFPlugin.desc}",
    100101        'abstract' => "no",
    101102        'inherits' => "yes",
     
    111112    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    112113
    113     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    114     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     114    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     115    push(@{$hashArgOptLists->{"OptList"}},$options);
    115116
    116117    my @arg_array = @$inputargs;
    117     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     118    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    118119   
    119120    if ($self->{'info_only'}) {
     
    122123    }
    123124
    124     # these are passed through to gsConvert.pl by ConvertToPlug.pm
     125    $self->{'filename_extension'} = "pdf";
     126    $self->{'file_type'} = "PDF";
     127
     128    # these are passed through to gsConvert.pl by ConvertBinaryFile.pm
    125129    my $zoom = $self->{"zoom"};
    126130    $self->{'convert_options'} = "-pdf_zoom $zoom";
     
    132136    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    133137
    134     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
    135     $secondary_plugin_options->{'HTMLPlug'} = [];
    136     }
    137     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
    138     $secondary_plugin_options->{'TEXTPlug'} = [];
     138    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
     139    $secondary_plugin_options->{'HTMLPlugin'} = [];
     140    }
     141    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
     142    $secondary_plugin_options->{'TextPlugin'} = [];
    139143    }
    140144    if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) {
    141     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
    142         $secondary_plugin_options->{'PagedImgPlug'} = [];
    143         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
     145    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
     146        $secondary_plugin_options->{'PagedImagePlugin'} = [];
     147        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
    144148        push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    145149    }
    146150    }
    147     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    148     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
    149     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
     151    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
     152    my $text_options = $secondary_plugin_options->{'TextPlugin'};
     153    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
    150154   
    151155    if ($self->{'input_encoding'} eq "auto") {
     
    227231    && $self->{'converted_to'} eq "HTML") {
    228232
    229     print $outhandle "PDFPlug: Calculating sections...\n";
     233    print $outhandle "PDFPlugin: Calculating sections...\n";
    230234
    231235    # we have "<a name=1></a>" etc for each page
     
    236240
    237241    if (scalar (@sections) == 1) { #only one section - no split!
    238         print $outhandle "PDFPlug: warning - no sections found\n";
     242        print $outhandle "PDFPlugin: warning - no sections found\n";
    239243    } else {
    240244        $top_section .= shift @sections; # keep HTML header etc as top_section
     
    274278        $title = " "; # get rid of the undefined warning in next line
    275279        }
    276         my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n";
     280        my $newsection = "<!-- from PDFPlugin -->\n<!-- <Section>\n";
    277281        $newsection .= "<Metadata name=\"Title\">" . $title
    278282        . "</Metadata>\n--><p>\n";
     
    296300sub process {
    297301    my $self = shift (@_);
    298     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     302    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    299303
    300304    my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj);
  • gsdl/trunk/perllib/plugins/PPTPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # PPTPlug.pm -- plugin for importing Microsoft PowerPoint files.
     3# PPTPlugin.pm -- plugin for importing Microsoft PowerPoint files.
    44#  (currently only versions 95 and 97)
    55#
     
    2626###########################################################################
    2727
    28 package PPTPlug;
     28package PPTPlugin;
    2929
    30 use ConvertToPlug;
     30use ConvertBinaryFile;
     31use ReadTextFile; # for read_file in convert_post_process. do we need it?
     32
    3133use strict;
    3234no strict 'refs'; # allow filehandles to be variables and viceversa
    3335
    3436sub BEGIN {
    35     @PPTPlug::ISA = ('ConvertToPlug');
     37    @PPTPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
    3638}
    3739
    3840my $convert_to_list =
    3941    [ { 'name' => "auto",
    40     'desc' => "{ConvertToPlug.convert_to.auto}" },
     42    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    4143      { 'name' => "html",
    42     'desc' => "{ConvertToPlug.convert_to.html}" },
     44    'desc' => "{ConvertBinaryFile.convert_to.html}" },
    4345      { 'name' => "text",
    44     'desc' => "{ConvertToPlug.convert_to.text}" },
     46    'desc' => "{ConvertBinaryFile.convert_to.text}" },
    4547      { 'name' => "pagedimg_jpg",
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" },
     48    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
    4749      { 'name' => "pagedimg_gif",
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" },
     50    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
    4951      { 'name' => "pagedimg_png",
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" }
     52    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
    5153      ];
    5254
    5355my $arguments =
    5456    [ { 'name' => "process_exp",
    55     'desc' => "{BasPlug.process_exp}",
     57    'desc' => "{BasePlugin.process_exp}",
    5658    'type' => "regexp",
    5759    'reqd' => "no",
     
    5961      ];
    6062
    61 my $options = { 'name'     => "PPTPlug",
    62         'desc'     => "{PPTPlug.desc}",
     63my $options = { 'name'     => "PPTPlugin",
     64        'desc'     => "{PPTPlugin.desc}",
    6365        'abstract' => "no",
    6466        'inherits' => "yes",
     
    7375    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    7476    my $ws_arg =[{ 'name' => "convert_to",
    75                'desc' => "{ConvertToPlug.convert_to}",
     77               'desc' => "{ConvertBinaryFile.convert_to}",
    7678               'type' => "enum",
    7779               'reqd' => "yes",
     
    7981               'deft' => "html" },
    8082             { 'name' => "windows_scripting",
    81                'desc' => "{PPTPlug.windows_scripting}",
     83               'desc' => "{PPTPlugin.windows_scripting}",
    8284               'type' => "flag",
    8385               'reqd' => "no" }
     
    8688    }
    8789   
    88     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    89     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     90    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     91    push(@{$hashArgOptLists->{"OptList"}},$options);
    9092
    9193
    92     my @arg_array = @$inputargs;
    93     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     94    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    9495 
    9596    if ($self->{'info_only'}) {
     
    9899    }
    99100
     101    $self->{'filename_extension'} = "ppt";
     102    $self->{'file_type'} = "PPT";
     103
    100104    # ppthtml outputs utf-8 already.
    101     #these are passed through to gsConvert.pl by ConvertToPlug.pm
     105    #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
    102106    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
    103107    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    104108
    105109    if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) {
    106     $secondary_plugin_options->{'PagedImgPlug'} = [];
     110    $secondary_plugin_options->{'PagedImagePlugin'} = [];
    107111    } else {
    108     $secondary_plugin_options->{'HTMLPlug'} = [];
     112    $secondary_plugin_options->{'HTMLPlugin'} = [];
    109113    }
    110     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    111     my $pageimg_options = $secondary_plugin_options->{'PagedImgPlug'};
     114    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
     115    my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
    112116
    113117    if ($self->{'input_encoding'} eq "auto") {
    114118    $self->{'input_encoding'} = "utf8";
    115     if (defined $secondary_plugin_options->{'HTMLPlug'}){
     119    if (defined $secondary_plugin_options->{'HTMLPlugin'}){
    116120        push(@$html_options,"-input_encoding", "utf8");
    117121        push(@$html_options,"-extract_language") if $self->{'extract_language'};
    118122
    119         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     123        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    120124        # to extract these metadata fields from the HEAD META fields
    121125        push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    122126    }
    123     if (defined $secondary_plugin_options->{'PagedImgPlug'}){
     127    if (defined $secondary_plugin_options->{'PagedImagePlugin'}){
    124128        push(@$pageimg_options,"-input_encoding", "utf8");
    125129        push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
     
    138142}
    139143
    140 sub get_file_type {
    141     my $self = shift (@_);
    142     my $file_type = "PPT";
    143     return $file_type;
    144 }
    145 
     144# do we need this? above states that ppthtml produces utf8 text...
    146145sub convert_post_process
    147146{
     
    161160}
    162161
    163 sub process {
    164     my $self = shift (@_);
    165     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    166 
    167     return $self->process_type("ppt",$base_dir,$file,$doc_obj);
    168 }
    169162
    1701631;
  • gsdl/trunk/perllib/plugins/PSPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # PSPlug.pm -- this might look VERY similar to the PDF plugin...
     3# PSPlugin.pm -- this might look VERY similar to the PDF plugin...
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2626# 12/05/02 Added usage datastructure - John Thompson
    2727
    28 package PSPlug;
    29 
    30 use ConvertToPlug;
     28package PSPlugin;
     29
     30use ConvertBinaryFile;
     31use ReadTextFile; # for read_file in convert_post_process. do we need it?
    3132use sorttools;
    3233
     
    3536
    3637sub BEGIN {
    37     @PSPlug::ISA = ('ConvertToPlug');
     38    @PSPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
    3839}
    3940
    4041my $convert_to_list =
    4142    [ { 'name' => "auto",
    42     'desc' => "{ConvertToPlug.convert_to.auto}" },
     43    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    4344      { 'name' => "text",
    44     'desc' => "{ConvertToPlug.convert_to.text}" },
     45    'desc' => "{ConvertBinaryFile.convert_to.text}" },
    4546      { 'name' => "pagedimg_jpg",
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" },
     47    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
    4748      { 'name' => "pagedimg_gif",
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" },
     49    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
    4950      { 'name' => "pagedimg_png",
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" }
     51    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
    5152      ];
    5253
    5354my $arguments =
    5455    [ { 'name' => "convert_to",
    55     'desc' => "{ConvertToPlug.convert_to}",
     56    'desc' => "{ConvertBinaryFile.convert_to}",
    5657    'type' => "enum",
    5758    'reqd' => "yes",
     
    5960    'deft' => "text" },
    6061      { 'name' => "process_exp",
    61     'desc' => "{BasPlug.process_exp}",
     62    'desc' => "{BasePlugin.process_exp}",
    6263    'type' => "regexp",
    6364    'deft' => &get_default_process_exp(),
    6465    'reqd' => "no" },
    6566      { 'name' => "block_exp",
    66     'desc' => "{BasPlug.block_exp}",
     67    'desc' => "{BasePlugin.block_exp}",
    6768    'type' => 'regexp',
    6869    'deft' => &get_default_block_exp() },
    6970      { 'name' => "extract_date",
    70     'desc' => "{PSPlug.extract_date}",
     71    'desc' => "{PSPlugin.extract_date}",
    7172    'type' => "flag" },
    7273      { 'name' => "extract_pages",
    73     'desc' => "{PSPlug.extract_pages}",
     74    'desc' => "{PSPlugin.extract_pages}",
    7475    'type' => "flag" },
    7576      { 'name' => "extract_title",
    76     'desc' => "{PSPlug.extract_title}",
     77    'desc' => "{PSPlugin.extract_title}",
    7778    'type' => "flag" } ];
    7879
    79 my $options = { 'name'     => "PSPlug",
    80         'desc'     => "{PSPlug.desc}",
     80my $options = { 'name'     => "PSPlugin",
     81        'desc'     => "{PSPlugin.desc}",
    8182        'abstract' => "no",
    8283        'inherits' => "yes",
     
    9495    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    9596   
    96     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    97     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    98    
    99     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     97    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     98    push(@{$hashArgOptLists->{"OptList"}},$options);
     99   
     100    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    100101
    101102    if ($self->{'info_only'}) {
     
    104105    }
    105106
     107    $self->{'filename_extension'} = "ps";
     108    $self->{'file_type'} = "PS";
     109
    106110    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    107111
    108     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
    109     $secondary_plugin_options->{'TEXTPlug'} = [];
    110     }
    111 
    112     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
     112    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
     113    $secondary_plugin_options->{'TextPlugin'} = [];
     114    }
     115
     116    my $text_options = $secondary_plugin_options->{'TextPlugin'};
    113117
    114118    if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) {
    115     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
    116         $secondary_plugin_options->{'PagedImgPlug'} = [];
    117         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
     119    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
     120        $secondary_plugin_options->{'PagedImagePlugin'} = [];
     121        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
    118122        push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    119123    }
     
    148152}
    149153
     154# this has been commented out in other plugins. do we need it here?
    150155sub convert_post_process
    151156{
     
    179184    my $date_found = 0;
    180185
    181     print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n"
     186    print STDERR "PSPlugin: extracting PostScript metadata from \"$filename\"\n"
    182187    if $self->{'verbosity'} > 1;
    183188
     
    237242}
    238243
    239 # do plugin specific processing of doc_obj for HTML type
     244# do plugin specific processing of doc_obj
    240245sub process {
    241246    my $self = shift (@_);
    242     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    243 
    244 #    my $outhandle = $self->{'outhandle'};
    245 
    246 #    print $outhandle "PSPlug: passing $file on to $self->{'converted_to'}Plug\n"
    247 #   if $self->{'verbosity'} > 1;
    248 #    print STDERR "<Processing n='$file' p='PSPlug'>\n" if ($gli);
     247    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    249248
    250249    my $filename = &util::filename_cat($base_dir,$file);
    251250    $self->extract_metadata_from_postscript($filename, $doc_obj);
    252251
    253     return $self->process_type("ps",$base_dir,$file,$doc_obj);
     252    return $self->SUPER::process(@_);
     253
    254254}
    255255
  • gsdl/trunk/perllib/plugins/PagedImagePlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # PagedImgPlug.pm -- plugin for sets of images and OCR text that
     3# PagedImagePlugin.pm -- plugin for sets of images and OCR text that
    44#  make up a document
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 # PagedImgPlug
     27# PagedImagePlugin
    2828# processes sequences of images, with optional OCR text
    2929#
     
    116116# To have it create medium size images for display, use the '-screenview'
    117117# option. As usual, running
    118 # 'perl -S pluginfo.pl PagedImgPlug' will list all the options.
     118# 'perl -S pluginfo.pl PagedImagePlugin' will list all the options.
    119119
    120120# If you want the resulting documents to be presented with a table of
     
    132132# FileName (only for document level metadata).
    133133
    134 package PagedImgPlug;
    135 
    136 use XMLPlug;
     134package PagedImagePlugin;
     135
     136use ReadXMLFile;
     137use ReadTextFile;
     138use ImageConverter;
     139
    137140use strict;
    138141no strict 'refs'; # allow filehandles to be variables and viceversa
    139142
    140143sub BEGIN {
    141     @PagedImgPlug::ISA = ('XMLPlug');
     144    @PagedImagePlugin::ISA = ('ReadXMLFile', 'ReadTextFile', 'ImageConverter');
    142145}
    143146
    144147my $type_list =
    145148    [ { 'name' => "paged",
    146         'desc' => "{PagedImgPlug.documenttype.paged}" },
     149        'desc' => "{PagedImagePlugin.documenttype.paged}" },
    147150      { 'name' => "hierarchy",
    148         'desc' => "{PagedImgPlug.documenttype.hierarchy}" } ];
     151        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ];
    149152
    150153my $arguments =
     
    163166    'type' => "string",
    164167    'deft' => "" },
    165       { 'name' => "noscaleup",
    166     'desc' => "{ImagePlug.noscaleup}",
    167     'type' => "flag",
    168     'reqd' => "no" },
    169       { 'name' => "thumbnail",
    170     'desc' => "{PagedImgPlug.thumbnail}",
    171     'type' => "flag",
    172     'reqd' => "no" },
    173       { 'name' => "thumbnailsize",
    174     'desc' => "{ImagePlug.thumbnailsize}",
    175     'type' => "int",
    176     'deft' => "100",
    177     'range' => "1,",
    178     'reqd' => "no" },
    179       { 'name' => "thumbnailtype",
    180     'desc' => "{ImagePlug.thumbnailtype}",
    181     'type' => "string",
    182     'deft' => "gif",
    183     'reqd' => "no" },
    184       { 'name' => "screenview",
    185     'desc' => "{PagedImgPlug.screenview}",
    186     'type' => "flag",
    187     'reqd' => "no" },
    188       { 'name' => "screenviewsize",
    189     'desc' => "{PagedImgPlug.screenviewsize}",
    190     'type' => "int",
    191         'deft' => "500",
    192         'range' => "1,",
    193     'reqd' => "no" },
    194       { 'name' => "screenviewtype",
    195     'desc' => "{PagedImgPlug.screenviewtype}",
    196     'type' => "string",
    197     'deft' => "jpg",
    198     'reqd' => "no" },
    199       { 'name' => "converttotype",
    200     'desc' => "{ImagePlug.converttotype}",
    201     'type' => "string",
    202     'deft' => "",
    203     'reqd' => "no" },
    204       { 'name' => "minimumsize",
    205     'desc' => "{ImagePlug.minimumsize}",
    206     'type' => "int",
    207     'deft' => "100",
    208     'range' => "1,",
    209     'reqd' => "no" },
    210168      { 'name' => "headerpage",
    211     'desc' => "{PagedImgPlug.headerpage}",
     169    'desc' => "{PagedImagePlugin.headerpage}",
    212170    'type' => "flag",
    213171    'reqd' => "no" },
    214172      { 'name' => "documenttype",
    215     'desc' => "{PagedImgPlug.documenttype}",
     173    'desc' => "{PagedImagePlugin.documenttype}",
    216174    'type' => "enum",
    217175    'list' => $type_list,
     
    220178
    221179
    222 my $options = { 'name'     => "PagedImgPlug",
    223         'desc'     => "{PagedImgPlug.desc}",
     180my $options = { 'name'     => "PagedImagePlugin",
     181        'desc'     => "{PagedImagePlugin.desc}",
    224182        'abstract' => "no",
    225183        'inherits' => "yes",
     
    231189    push(@$pluginlist, $class);
    232190
    233     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    234     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    235    
    236     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     191    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     192    push(@{$hashArgOptLists->{"OptList"}},$options);
     193   
     194    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
     195    new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     196    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
    237197   
    238198    return bless $self, $class;
     199}
     200
     201
     202sub init {
     203    my $self = shift (@_);
     204    my ($verbosity, $outhandle, $failhandle) = @_;
     205
     206    $self->SUPER::init(@_);
     207    $self->ImageConverter::init();
    239208}
    240209
     
    260229}
    261230
    262 # Create the thumbnail and screenview images, and discover the Image's
    263 # size, width, and height using the convert utility.
     231sub rotate_image {
     232    my $self = shift (@_);
     233    my ($filename_full_path) = @_;
     234   
     235    my ($this_filetype) = $filename_full_path =~ /\.([^\.]*)$/;
     236    my $result = $self->convert($filename_full_path, $this_filetype, "-rotate 180", "ROTATE");
     237    my ($new_filename) = ($result =~ /=>(.*\.$this_filetype)/);
     238    if (-e "$new_filename") {
     239    return $new_filename;
     240    }
     241    # somethings gone wrong
     242    return $filename_full_path;
     243
     244}
     245
    264246sub process_image {
    265     my $self = shift (@_);
    266     my $filename = shift (@_); # filename with full path
    267     my $srcfile = shift (@_); # filename without path
    268     my $doc_obj = shift (@_);
    269     my $section = shift (@_); #the current section
    270     my $rotate = shift (@_); # whether to rotate the image or not
    271     $rotate = 0 unless defined $rotate;
    272    
    273     # check that the image file exists!!
    274     if (!-f $filename) {
    275     print "PagedImgPlug: ERROR: File $filename does not exist, skipping\n";
    276     return 0;
    277     }
    278 
    279     my $top=0;
    280     if ($section eq $doc_obj->get_top_section()) {
    281     $top=1;
    282     }
    283     my $verbosity = $self->{'verbosity'};
    284     my $outhandle = $self->{'outhandle'};
    285 
    286     # check the filename is okay
    287     return 0 if ($srcfile eq "" || $filename eq "");
    288    
    289     my $minimumsize = $self->{'minimumsize'};
    290     if (defined $minimumsize && (-s $filename < $minimumsize)) {
    291         print $outhandle "PagedImgPlug: \"$filename\" too small, skipping\n"
    292         if ($verbosity > 1);
    293     }
    294 
    295     # Convert the image to a new type (if required), and rotate if required.
    296     my $converttotype = $self->{'converttotype'};
    297     my $originalfilename = "";  # only set if we do a conversion
    298     my $type = "unknown";
    299     my $converted = 0;
    300     my $rotated=0;
    301 
    302     if ($converttotype ne "" && $filename !~ /$converttotype$/) {
    303     $converted=1;
    304     $originalfilename = $filename;
    305     my $filehead = &util::get_tmp_filename();
    306     $filename = $filehead . ".$converttotype";
    307     my $n = 1;
    308     while (-e $filename) {
    309         $filename = "$filehead$n\.$converttotype";
    310         $n++;
    311     }
    312     $self->{'tmp_filename1'} = $filename;
    313 
    314     my $rotate_option = "";
    315     if ($rotate eq "r") {
    316         $rotate_option = "-rotate 180 ";
    317     }
    318    
    319     my $command = "convert -verbose \"$originalfilename\" $rotate_option \"$filename\"";
    320     print $outhandle "CONVERT: $command\n" if ($verbosity > 2);
    321     my $result = '';
    322     $result = `$command`;
    323     print $outhandle "CONVERT RESULT = $result\n" if ($verbosity > 2);
    324 
    325     $type = $converttotype;
    326     } elsif ($rotate eq "r") {
    327     $rotated=1;
    328     $originalfilename = $filename;
    329     $filename  = &util::get_tmp_filename();
    330    
    331     my $command = "convert \"$originalfilename\" -rotate 180 \"$filename\"";
    332     print $outhandle "ROTATE: $command\n" if ($verbosity > 2);
    333     my $result = '';
    334     $result = `$command`;
    335     print $outhandle "ROTATE RESULT = $result\n" if ($verbosity > 2);
    336    
    337     }
    338    
    339 
    340     # Add the image metadata
    341     my $file; # the new file name
    342     my $id = $srcfile;
    343     $id =~ s/\.([^\.]*)$//;  # the new file name without an extension
    344     if ($converted) {
    345     # we have converted the image
    346     # add on the new extension
    347     $file .= "$id.$converttotype";
    348     } else {
    349     $file = $srcfile;
    350     }
    351    
    352     my $url =$file; # the new file name prepared for a url
    353     my $srcurl = $srcfile;
    354     ##$url =~ s/ /%20/g;
    355     ##$srcurl =~ s/ /%20/g;
    356    
    357     $doc_obj->add_metadata ($section, "Image", $url);
    358 
    359     # Also want to set filename as 'Source' metadata to be
    360     # consistent with other plugins
    361     $doc_obj->add_metadata ($section, "Source", $srcurl);
    362 
    363     my ($image_type, $image_width, $image_height, $image_size)
    364     = &identify($filename, $outhandle, $verbosity);
    365 
    366     $doc_obj->add_metadata ($section, "ImageType",   $image_type);
    367     $doc_obj->add_metadata ($section, "ImageWidth",  $image_width);
    368     $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
    369     $doc_obj->add_metadata ($section, "ImageSize",   $image_size);
    370     $doc_obj->add_metadata ($section, "FileFormat", "PagedImg");
    371     # add NoText metadata which can be used to suppress the dummy text
    372     $doc_obj->add_metadata ($section, "NoText", "1");
    373 
    374     if ($type eq "unknown" && $image_type) {
    375     $type = $image_type;
    376     }
    377 
    378     if ($top) {
    379     $doc_obj->add_metadata ($section, "srclink",
    380                 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
    381     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
    382 
    383     } else {
    384     $doc_obj->add_metadata ($section, "srclink",
    385                 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
    386     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
    387 
    388     }
    389     $doc_obj->add_metadata ($section, "/srclink", "</a>");
    390 
    391 
    392     # Add the image as an associated file
    393     $doc_obj->associate_file($filename,$file,"image/$type",$section);
    394     print $outhandle "associating file $filename as name $file\n" if ($verbosity > 2);
    395 
    396     if ($self->{'thumbnail'}) {
    397     # Make the thumbnail image
    398     my $thumbnailsize = $self->{'thumbnailsize'} || 100;
    399     my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
    400    
    401     my $filehead = &util::get_tmp_filename();
    402     my $thumbnailfile = $filehead . ".$thumbnailtype";
    403     my $n=1;
    404     while (-e $thumbnailfile) {
    405         $thumbnailfile = $filehead . $n . ".$thumbnailtype";
    406         $n++;
    407     }
    408    
    409     $self->{'tmp_filename2'} = $thumbnailfile;
    410    
    411     # Generate the thumbnail with convert
    412     my $command = "convert  -verbose -geometry $thumbnailsize"
    413         . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
    414     print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
    415     my $result = '';
    416     $result = `$command 2>&1` ;
    417     print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
    418    
    419     # Add the thumbnail as an associated file ...
    420     if (-e "$thumbnailfile") {
    421         $doc_obj->associate_file("$thumbnailfile", $id."thumb.$thumbnailtype", "image/$thumbnailtype",$section);
    422         $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
    423         $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype");
    424         if ($top) {
    425         $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
    426         } else {
    427         $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
    428         }
    429     }
    430    
    431     # Extract Thumnail metadata from convert output
    432     if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
    433         $doc_obj->add_metadata ($section, "ThumbWidth", $1);
    434         $doc_obj->add_metadata ($section, "ThumbHeight", $2);
    435     }
    436     }
    437     # Make a screen-sized version of the picture if requested
    438     if ($self->{'screenview'}) {
    439    
    440     # To do: if the actual image is smaller than the screenview size,
    441     # we should use the original !
    442 
    443     my $screenviewsize = $self->{'screenviewsize'} || 500;
    444     my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
    445     my $filehead = &util::get_tmp_filename();
    446     my $screenviewfilename = $filehead . ".$screenviewtype";
    447     my $n=1;
    448     while (-e $screenviewfilename) {
    449         $screenviewfilename = "$filehead$n\.$screenviewtype";
    450         $n++;
    451     }
    452     $self->{'tmp_filename3'} = $screenviewfilename;
    453 
    454     # make the screenview image
    455     my $command = "convert  -verbose -geometry $screenviewsize"
    456         . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
    457     print $outhandle "SCREENVIEW: $command\n" if ($verbosity > 2);
    458     my $result = "";
    459     $result = `$command  2>&1` ;
    460     print $outhandle "SCREENVIEW RESULT: $result\n" if ($verbosity > 3);
    461 
    462     # get screenview dimensions, size and type
    463         if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
    464         $doc_obj->add_metadata ($section, "ScreenWidth", $1);
    465         $doc_obj->add_metadata ($section, "ScreenHeight", $2);
    466     }elsif ($result =~ m/([0-9]+)x([0-9]+)/) {
    467         #if the image hasn't changed size,  the previous regex doesn't match
    468         $doc_obj->add_metadata ($section, "ScreenWidth", $1);
    469         $doc_obj->add_metadata ($section, "ScreenHeight", $2);
    470     }
    471    
    472     #add the screenview as an associated file ...
    473     if (-e "$screenviewfilename") {
    474         $doc_obj->associate_file("$screenviewfilename", $id."sv.$screenviewtype",
    475                      "image/$screenviewtype",$section);
    476         print $outhandle "associating screen file $screenviewfilename as name $id sv.$screenviewtype\n" if ($verbosity > 2);
    477        
    478         $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
    479         $doc_obj->add_metadata ($section, "Screen", $id."sv.$screenviewtype");
    480 
    481         if ($top) {
    482         $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
    483         } else {
    484         $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
    485 
    486         }
    487     } else {
    488         print $outhandle "PagedImgPlug: couldn't find \"$screenviewfilename\"\n";
    489     }
    490     }
    491 
    492     return $type;
    493 
    494 
    495 }
    496 
    497 
    498 
    499 # Discover the characteristics of an image file with the ImageMagick
    500 # "identify" command.
    501 
    502 sub identify {
    503     my ($image, $outhandle, $verbosity) = @_;
    504 
    505     # Use the ImageMagick "identify" command to get the file specs
    506     my $command = "identify \"$image\" 2>&1";
    507     print $outhandle "$command\n" if ($verbosity > 2);
    508     my $result = '';
    509     $result = `$command`;
    510     print $outhandle "$result\n" if ($verbosity > 3);
    511 
    512     # Read the type, width, and height
    513     my $type =   'unknown';
    514     my $width =  'unknown';
    515     my $height = 'unknown';
    516 
    517     my $image_safe = quotemeta $image;
    518     if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
    519     $type = $1;
    520     $width = $2;
    521     $height = $3;
    522     }
    523    
    524     # Read the size
    525     my $size = "unknown";
    526     if ($result =~ m/^.* ([0-9]+)b/) {
    527     $size = $1;
    528     } elsif ($result =~ m/^.* ([0-9]+)kb/) {
    529     $size = 1024 * $1;
    530     }
    531 
    532     print $outhandle "file: $image:\t $type, $width, $height, $size\n"
    533     if ($verbosity > 3);
    534 
    535     # Return the specs
    536     return ($type, $width, $height, $size);
    537 }
    538 
    539 
    540 # The PagedImgPlug read() function. This function does all the right things
    541 # to make general options work for a given plugin. It calls the process()
    542 # function which does all the work specific to a plugin (like the old
    543 # read functions used to do). Most plugins should define their own
    544 # process() function and let this read() function keep control.
    545 #
    546 # PagedImgPlug overrides read() because there is no need to read the actual
    547 # text of the file in, because the contents of the file is not text...
    548 #
    549 # Return number of files processed, undef if can't process
    550 # Note that $base_dir might be "" and that $file might
    551 # include directories
    552 
    553 sub read_into_doc_obj {
    554     my $self = shift (@_);
    555     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    556     my $outhandle   = $self->{'outhandle'};
    557    
    558     #check process and block exps, smart block, etc
    559     my ($block_status,$filename) = $self->read_block(@_);   
    560     return $block_status if ((!defined $block_status) || ($block_status==0));
    561    
    562     print $outhandle "PagedImgPlug processing \"$filename\"\n"
    563     if $self->{'verbosity'} > 1;
    564     print STDERR "<Processing n='$file' p='PagedImgPlug'>\n" if ($gli);
    565    
    566     # here we need to decide if we have an old text .item file, or a new xml
    567     # .item file - for now the test is if the first non-empty line is
    568     # <PagedDocument> then its xml
    569     my $xml_version = 0;
    570     open (ITEMFILE, $filename) || die "couldn't open $filename\n";
    571    
    572     my $backup_filename = "backup.item";
    573     open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n";
    574     my $line = "";
    575     my $num = 0;
    576     $line = <ITEMFILE>;
    577     while ($line !~ /\w/) {
    578     $line = <ITEMFILE>;
    579     }
    580     chomp $line;
    581     if ($line =~ /<PagedDocument/) {
    582     $xml_version = 1;
    583     }
    584     close ITEMFILE;
    585     open (ITEMFILE, $filename) || die "couldn't open $filename\n";
    586     $line = <ITEMFILE>;
    587     $line =~ s/^\xEF\xBB\xBF//; # strip BOM
    588     $line =~ s/\x0B+//ig;
    589     $line =~ s/&/&amp;/g;
    590     print BACKUP ($line);
    591     #Tidy up the item file some metadata title contains \vt-vertical tab
    592     while ($line = <ITEMFILE>) {
    593     $line =~ s/\x0B+//ig;
    594     $line =~ s/&/&amp;/g;
    595     print BACKUP ($line);
    596     }
    597     close ITEMFILE;
    598     close BACKUP;
    599     &File::Copy::copy ($backup_filename, $filename);
    600     &util::rm($backup_filename);
    601 
    602     my $doc_obj;
    603     if ($xml_version) {
    604     $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
    605     $self->{'file'} = $file;
    606     $self->{'filename'} = $filename;
    607     $self->{'processor'} = $processor;
    608     $self->{'metadata'} = $metadata;
    609 
     247    my $self = shift(@_);
     248    my ($filename_full_path, $filename_no_path, $doc_obj, $section, $rotate) = @_;
     249    # do rotation
     250    if ($rotate eq "r") {
     251    # check the filenames
     252    return 0 if ($filename_no_path eq "" || !-f $filename_full_path);
     253
     254    # we get a new temporary file which is rotated
     255    $filename_full_path = $self->rotate_image($filename_full_path);
     256    }
     257   
     258    # do generate images
     259    my $result = $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $section);
     260    #overwrite one set in ImageConverter
     261    $doc_obj->set_metadata_element ($section, "FileFormat", "PagedImage");
     262    return $result;
     263}
     264
     265sub old_read_stuff_for_xml_version {
     266    my ($self, $filename, $file, $gli);
     267
     268    # this bit same as ReadXMLFile read
     269#   $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     270#   $self->{'file'} = $file;
     271#   $self->{'filename'} = $filename_full_path;
     272#   $self->{'processor'} = $processor;
     273#   $self->{'metadata'} = $metadata;
     274#
    610275    eval {
    611276        $@ = "";
     
    616281       
    617282        # feed transformed file (now in memory as string) into XML parser
    618         #$self->{'parser'}->parse($transformed_xml);
    619         $self->parse_string($transformed_xml);
     283        $self->{'parser'}->parse($transformed_xml);
     284        ###$self->parse_string($transformed_xml);
    620285        }
    621286        else {
    622         #$self->{'parser'}->parsefile($filename);
    623         $self->parse_file($filename);
     287        $self->{'parser'}->parsefile($filename);
     288        #$self->parse_file($filename);
    624289        }
    625290    };
     
    650315        return -1; # error during processing
    651316    }
     317
     318}
     319
     320
     321# The PagedImagePlugin read() function. This function does all the right things
     322# to make general options work for a given plugin. It calls the process()
     323# function which does all the work specific to a plugin (like the old
     324# read functions used to do). Most plugins should define their own
     325# process() function and let this read() function keep control.
     326#
     327# PagedImagePlugin overrides read() because there is no need to read the actual
     328# text of the file in, because the contents of the file is not text...
     329#
     330# Return number of files processed, undef if can't process
     331# Note that $base_dir might be "" and that $file might
     332# include directories
     333
     334sub read_into_doc_obj {
     335    my $self = shift (@_);
     336    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     337    my $outhandle = $self->{'outhandle'};
     338   
     339    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     340
     341    print $outhandle "PagedImagePlugin processing \"$filename_full_path\"\n"
     342    if $self->{'verbosity'} > 1;
     343    print STDERR "<Processing n='$file' p='PagedImagePlugin'>\n" if ($gli);
     344   
     345   
     346    # here we need to decide if we have an old text .item file, or a new xml
     347    # .item file
     348    my $xml_version = $self->is_xml_item_file($filename_full_path);
     349
     350    $self->tidy_item_file($filename_full_path);
     351   
     352    my $doc_obj;
     353    if ($xml_version) {
     354    # careful checking needed here!! are we using local xml handlers or super ones
     355    $self->ReadXMLFile::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
    652356    $doc_obj = $self->{'doc_obj'};
    653357    } else {
    654358    my ($dir);
    655     ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;
     359    ($dir, $file) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
    656360
    657361    #process the .item file
    658     $doc_obj = $self->process_item($filename, $dir, $file, $processor);
     362    $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor);
    659363   
    660364    }
    661    
    662     if ($self->{'cover_image'}) {
    663     $self->associate_cover_image($doc_obj, $filename);
    664     }
     365
     366    my $section = $doc_obj->get_top_section();
     367       
     368    $doc_obj->add_utf8_metadata($section, "Plugin", "$self->{'plugin_type'}");
     369    $doc_obj->add_metadata($section, "FileFormat", "PagedImage");
    665370
    666371    # include any metadata passed in from previous plugins
    667372    # note that this metadata is associated with the top level section
    668     my $section = $doc_obj->get_top_section();
     373    $self->add_associated_files($doc_obj, $filename_full_path);
    669374    $self->extra_metadata ($doc_obj, $section, $metadata);
    670     #my $text="";
    671     # do plugin specific processing of doc_obj
    672     #unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
    673     #print STDERR "<ProcessingError n='$file'>\n" if ($gli);
    674     #return -1;
    675     #}
    676     # do any automatic metadata extraction
    677375    $self->auto_extract_metadata ($doc_obj);
    678376
    679     $self->{'num_processed'}++;
     377    # if we haven't found any Title so far, assign one
     378    $self->title_fallback($doc_obj,$section,$filename_no_path);
     379
     380    $self->add_OID($doc_obj);
    680381    return (1,$doc_obj);
    681382}
    682383
     384# for now, the test is if the first non-empty line is <PagedDocument>, then its xml
     385sub is_xml_item_file {
     386    my $self = shift(@_);
     387    my ($filename) = @_;
     388
     389    my $xml_version = 0;
     390    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
     391   
     392    my $line = "";
     393    my $num = 0;
     394    $line = <ITEMFILE>;
     395    while ($line !~ /\w/) {
     396    $line = <ITEMFILE>;
     397    }
     398    chomp $line;
     399    if ($line =~ /<PagedDocument/) {
     400    $xml_version = 1;
     401    }
     402    close ITEMFILE;
     403    return $xml_version;
     404}
     405
     406sub tidy_item_file {
     407    my $self = shift(@_);
     408    my ($filename) = @_;
     409
     410    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
     411    my $backup_filename = "backup.item";
     412    open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n";
     413    my $line = "";
     414    $line = <ITEMFILE>;
     415    $line =~ s/^\xEF\xBB\xBF//; # strip BOM
     416    $line =~ s/\x0B+//ig;
     417    $line =~ s/&/&amp;/g;
     418    print BACKUP ($line);
     419    #Tidy up the item file some metadata title contains \vt-vertical tab
     420    while ($line = <ITEMFILE>) {
     421    $line =~ s/\x0B+//ig;
     422    $line =~ s/&/&amp;/g;
     423    print BACKUP ($line);
     424    }
     425    close ITEMFILE;
     426    close BACKUP;
     427    &File::Copy::copy ($backup_filename, $filename);
     428    &util::rm($backup_filename);
     429
     430}
     431# de we need this? old read was the same as BasePlug read, not the same as ReadXMLfile read
    683432sub read
    684433{
    685434    my $self = shift (@_);
    686     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
    687 
    688     if ((defined $process_status) && ($process_status == 1)) {
    689     # process the document
    690     $processor->process($doc_obj);
    691 
    692     #if(defined($self->{'places_filename'})){
    693     #    &util::rm($self->{'places_filename'});
    694     #    $self->{'places_filename'} = undef;
    695     #}
    696     #$self->{'num_processed'} ++;
    697     undef $doc_obj;
    698     }
    699    
    700     # clean up temporary files - we do this here instead of in 
    701     # process_image becuase associated files aren't actually copied
    702     # until after process has been run.
    703     if (defined $self->{'tmp_filename1'} &&
    704     -e $self->{'tmp_filename1'}) {
    705     &util::rm($self->{'tmp_filename1'})
    706     }
    707     if (defined $self->{'tmp_filename2'} &&
    708     -e $self->{'tmp_filename2'}) {
    709     &util::rm($self->{'tmp_filename2'})
    710     }
    711     if (defined $self->{'tmp_filename3'} &&
    712     -e $self->{'tmp_filename3'}) {
    713       &util::rm($self->{'tmp_filename3'})
    714     }
    715     # if process_status == 1, then the file has been processed.
    716     return $process_status;
     435    $self->BasePlugin::read(@_);
    717436}
    718437
     
    741460    if (defined($txtfile)&& $txtfile ne "") {
    742461        $self->process_text ($self->{'base_dir'}.$txtfile, $txtfile, $doc_obj, $self->{'current_section'});
    743             $doc_obj->set_metadata_element($self->{'current_section'},"NoText","0");
    744     } else {
    745         # otherwise add in some dummy text
    746         #create an empty text string so we don't break downstream plugins
    747         my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    748         $doc_obj->add_utf8_text($self->{'current_section'}, $text);
    749             $doc_obj->add_metadata($self->{'current_section'},"NoText","1");
     462    } else {
     463        $self->add_dummy_text($doc_obj, $self->{'current_section'});
    750464    }
    751465    } elsif ($element eq "Metadata") {
     
    794508    # create a new document
    795509    $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc");
    796     my $doc_obj = $self->{'doc_obj'};
    797     $doc_obj->set_OIDtype ($self->{'processor'}->{'OIDtype'});
     510    # TODO is file filenmae_no_path??
     511    $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'});
     512
    798513    my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/;
    799514    $self->{'base_dir'} = $dir;
    800515    $self->{'num_pages'} = 0;
    801     my $topsection = $doc_obj->get_top_section();
    802     if ($self->{'documenttype'} eq 'paged') {
    803     # set the gsdlthistype metadata to Paged - this ensures this document will
    804     # be treated as a Paged doc, even if Titles are not numeric
    805    
    806     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
    807     } else {
    808     $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
    809     }
    810 
    811     $doc_obj->add_metadata ($topsection, "Source", $file);
    812     if ($self->{'headerpage'}) {
    813     $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
    814     }
    815516
    816517}
     
    819520    my $self = shift(@_);
    820521    my $doc_obj = $self->{'doc_obj'};
    821        
    822     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    823     $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "PagedImg");
    824522   
    825523    # add numpages metadata
    826524    $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(), 'NumPages', $self->{'num_pages'});
    827525
    828     # add an OID
    829     $doc_obj->set_OID();
    830    
    831 }
    832 
    833 sub process_item {
    834     my $self = shift (@_);
    835     my ($filename, $dir, $file, $processor) = @_;
    836 
    837     my $doc_obj = new doc ($filename, "indexed_doc");
     526   
     527}
     528
     529
     530sub set_initial_doc_fields {
     531    my $self = shift(@_);
     532    my ($doc_obj, $filename_no_path, $processor) = @_;
     533
    838534    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    839535    my $topsection = $doc_obj->get_top_section();
    840     $doc_obj->add_utf8_metadata($topsection, "Plugin", "$self->{'plugin_type'}");
    841     $doc_obj->add_metadata($topsection, "FileFormat", "PagedImg");
    842536
    843537    if ($self->{'documenttype'} eq 'paged') {
     
    849543    }
    850544
    851     $doc_obj->add_metadata ($topsection, "Source", $file);
    852 
    853     open (ITEMFILE, $filename) || die "couldn't open $filename\n";
     545    $self->set_Source_metadata($doc_obj, $filename_no_path);
     546   
     547    # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible
     548    if ($self->{'headerpage'}) {
     549    $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasePlugin.dummy_text}"));
     550    }
     551
     552
     553}
     554
     555
     556sub process_item {
     557    my $self = shift (@_);
     558    my ($filename_full_path, $dir, $filename_no_path, $processor) = @_;
     559
     560    my $doc_obj = new doc ($filename_full_path, "indexed_doc");
     561    $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor);
     562    my $topsection = $doc_obj->get_top_section();
     563    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n";
    854564    my $line = "";
    855565    my $num = 0;
     
    879589        if (!defined $result1)
    880590        {
    881             print "PagedImgPlug: couldn't process image \"$dir.$imgname\" for item \"$filename\"\n";
     591            print "PagedImagePlugin: couldn't process image \"$dir.$imgname\" for item \"$filename_full_path\"\n";
    882592        }
    883593        }
     
    887597               
    888598        if (!defined $result2) {
    889             print "PagedImgPlug: couldn't process text file \"$dir.$txtname\" for item \"$filename\"\n";
    890         }
    891                 else{
    892             $doc_obj->set_metadata_element($cursection, "NoText", "0");
     599            print "PagedImagePlugin: couldn't process text file \"$dir.$txtname\" for item \"$filename_full_path\"\n";
     600            $self->add_dummy_text($doc_obj, $cursection);
    893601        }
    894602        } else {
    895603        # otherwise add in some dummy text
    896         $doc_obj->add_text($cursection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
    897                  # add NoText metadata which can be used to suppress the dummy text
    898          }
     604        $self->add_dummy_text($doc_obj, $cursection);
     605        }
    899606    }
    900607    }
     
    902609    close ITEMFILE;
    903610
    904     # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible
    905     if ($self->{'headerpage'}) {
    906     $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
    907     }
    908     $file =~ s/\.item//i;
    909     $doc_obj->set_OID ();
    910611    # add numpages metadata
    911612    $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', "$num");
     
    915616sub process_text {
    916617    my $self = shift (@_);
    917     my ($fullpath, $file, $doc_obj, $cursection) = @_;
     618    my ($filename_full_path, $file, $doc_obj, $cursection) = @_;
    918619   
    919620    # check that the text file exists!!
    920     if (!-f $fullpath) {
    921     print "PagedImgPlug: ERROR: File $fullpath does not exist, skipping\n";
     621    if (!-f $filename_full_path) {
     622    print "PagedImagePlugin: ERROR: File $filename_full_path does not exist, skipping\n";
    922623    return 0;
    923624    }
    924625
    925626    # Do encoding stuff
    926     my ($language, $encoding) = $self->textcat_get_language_encoding ($fullpath);
     627    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path);
    927628
    928629    my $text="";
    929     &BasPlug::read_file($self, $fullpath, $encoding, $language, \$text);
     630    &ReadTextFile::read_file($self, $filename_full_path, $encoding, $language, \$text);
    930631    if (!length ($text)) {
    931632    # It's a bit unusual but not out of the question to have no text, so just give a warning
    932         print "PagedImgPlug: WARNING: $fullpath contains no text\n";
     633        print "PagedImagePlugin: WARNING: $filename_full_path contains no text\n";
    933634    }
    934635
     
    961662
    962663# do plugin specific processing of doc_obj
    963 sub process {
     664sub process_old {
    964665    my $self = shift (@_);
    965666    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     
    969670}
    970671
     672sub clean_up_after_doc_obj_processing {
     673    my $self = shift(@_);
     674   
     675    $self->ImageConverter::clean_up_temporary_files();
     676}
     677
    9716781;
  • gsdl/trunk/perllib/plugins/ProCitePlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ProCitePlug.pm -- A plugin for (exported) ProCite databases
     3# ProCitePlugin.pm -- A plugin for (exported) ProCite databases
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 package ProCitePlug;
     27package ProCitePlugin;
    2828
    2929
    3030use multiread;
    31 use SplitPlug;
     31use SplitTextFile;
    3232
    3333use strict;
    3434no strict 'refs'; # allow filehandles to be variables and viceversa
    3535
    36 # ProCitePlug is a sub-class of SplitPlug
     36# ProCitePlugin is a sub-class of SplitTextFile
    3737sub BEGIN {
    38     @ProCitePlug::ISA = ('SplitPlug');
     38    @ProCitePlugin::ISA = ('SplitTextFile');
    3939}
    4040
     
    4242my $arguments =
    4343    [ { 'name' => "process_exp",
    44     'desc' => "{BasPlug.process_exp}",
     44    'desc' => "{BasePlugin.process_exp}",
    4545    'type' => "regexp",
    4646    'reqd' => "no",
    4747    'deft' => &get_default_process_exp() },
    4848      { 'name' => "split_exp",
    49     'desc' => "{SplitPlug.split_exp}",
     49    'desc' => "{SplitTextFile.split_exp}",
    5050    'type' => "regexp",
    5151    'deft' => &get_default_split_exp(),
     
    5353      ];
    5454
    55 my $options = { 'name'     => "ProCitePlug",
    56         'desc'     => "{ProCitePlug.desc}",
     55my $options = { 'name'     => "ProCitePlugin",
     56        'desc'     => "{ProCitePlugin.desc}",
    5757        'abstract' => "no",
    5858        'inherits' => "yes",
     
    8181    push(@$pluginlist, $class);
    8282
    83     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    84     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    85 
    86     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     83    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     84    push(@{$hashArgOptLists->{"OptList"}},$options);
     85
     86    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    8787
    8888    return bless $self, $class;
     
    124124    open(PROCITE_FILE, "<$filename");
    125125    my $reader = new multiread();
    126     $reader->set_handle ('ProCitePlug::PROCITE_FILE');
     126    $reader->set_handle ('ProCitePlugin::PROCITE_FILE');
    127127    $reader->set_encoding ($encoding);
    128128    $reader->read_file ($textref);
     
    161161    my $cursection = $doc_obj->get_top_section();
    162162    # Report that we're processing the file
    163     print STDERR "<Processing n='$file' p='ProCitePlug'>\n" if ($gli);
    164     print $outhandle "ProCitePlug: processing $file\n"
     163    print STDERR "<Processing n='$file' p='ProCitePlugin'>\n" if ($gli);
     164    print $outhandle "ProCitePlugin: processing $file\n"
    165165    if ($self->{'verbosity'}) > 1;
    166166
  • gsdl/trunk/perllib/plugins/RTFPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # RTFPlug.pm -- plugin for importing Rich Text Format files.
     3# RTFPlugin.pm -- plugin for importing Rich Text Format files.
    44#
    55# A component of the Greenstone digital library software
     
    2525###########################################################################
    2626
    27 # 12/05/02 Added usage datastructure - John Thompson
     27package RTFPlugin;
    2828
    29 package RTFPlug;
    30 
    31 use ConvertToPlug;
     29use ConvertBinaryFile;
    3230use strict;
    3331no strict 'refs'; # allow filehandles to be variables and viceversa
    3432
    3533sub BEGIN {
    36     @RTFPlug::ISA = ('ConvertToPlug');
     34    @RTFPlugin::ISA = ('ConvertBinaryFile');
    3735}
    3836
    3937my $arguments =
    4038    [ { 'name' => "process_exp",
    41     'desc' => "{BasPlug.process_exp}",
     39    'desc' => "{BasePlugin.process_exp}",
    4240    'type' => "regexp",
    4341    'deft' => &get_default_process_exp(),
    4442    'reqd' => "no" },
    4543      { 'name' => "description_tags",
    46     'desc' => "{HTMLPlug.description_tags}",
     44    'desc' => "{HTMLPlugin.description_tags}",
    4745    'type' => "flag" }
    4846];
    4947
    50 my $options = { 'name'     => "RTFPlug",
    51         'desc'     => "{RTFPlug.desc}",
     48my $options = { 'name'     => "RTFPlugin",
     49        'desc'     => "{RTFPlugin.desc}",
    5250        'abstract' => "no",
    5351        'inherits' => "yes",
     
    6058    push(@$pluginlist, $class);
    6159
    62     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    63     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     60    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     61    push(@{$hashArgOptLists->{"OptList"}},$options);
    6462 
    65     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     63    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    6664
    6765    if ($self->{'info_only'}) {
     
    7068    }
    7169
     70    $self->{'filename_extension'} = "rtf";
     71    $self->{'file_type'} = "RTF";
     72
    7273    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    73     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
    74     $secondary_plugin_options->{'TEXTPlug'} = [];
     74    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
     75    $secondary_plugin_options->{'TextPlugin'} = [];
    7576    }
    76     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
    77     $secondary_plugin_options->{'HTMLPlug'} = [];
     77    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
     78    $secondary_plugin_options->{'HTMLPlugin'} = [];
    7879    }
    79     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
    80     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     80    my $text_options = $secondary_plugin_options->{'TextPlugin'};
     81    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
    8182   
    8283    #$self->{'input_encoding'} = "utf8";
     
    9899    return q^(?i)\.rtf$^;
    99100}
    100    
    101 sub process {
    102     my $self = shift (@_);
    103     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    104 
    105     return $self->process_type("rtf",$base_dir,$file,$doc_obj);
    106 }
    107101
    1081021;
  • gsdl/trunk/perllib/plugins/RealMediaPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # RealMediaPlug.pm -- Extract metadata from Real Media files
     3# RealMediaPlugin.pm -- Extract metadata from Real Media files
    44#
    55# Original code by Xin Gao
     
    2727###########################################################################
    2828
    29 package RealMediaPlug;
     29package RealMediaPlugin;
    3030
    3131
    32 use UnknownPlug;
     32use BasePlugin;
    3333use rm::Header::PurePerl;
    3434
    3535use strict;
    3636no strict 'refs'; # make an exception so we can use variables as filehandles
    37 
     37no strict 'subs';
    3838
    3939sub BEGIN {
    40     @RealMediaPlug::ISA = ('UnknownPlug');
     40    @RealMediaPlugin::ISA = ('BasePlugin');
    4141}
    4242
     
    4444my $arguments =
    4545    [ { 'name' => "process_exp",
    46     'desc' => "{BasPlug.process_exp}",
     46    'desc' => "{BasePlugin.process_exp}",
    4747    'type' => "regexp",
    4848    'deft' => &get_default_process_exp(),
    4949    'reqd' => "no" } ];
    5050
    51 my $options = { 'name'     => "RealMediaPlug",
    52         'desc'     => "{RealMediaPlug.desc}",
     51my $options = { 'name'     => "RealMediaPlugin",
     52        'desc'     => "{RealMediaPlugin.desc}",
    5353        'abstract' => "no",
    5454        'inherits' => "yes",
     
    6969    push(@$pluginlist, $class);
    7070
    71     if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }
    72     if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }
     71    push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});
     72    push(@{$hashArgOptLists->{"OptList"}}, $options);
    7373
    74     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
     74    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    7575   
    7676    return bless $self, $class;
    7777}
    7878
    79 
    80 # do plugin specific processing of doc_obj
    81 sub read
     79sub process
    8280{
    8381    my $self = shift (@_);
    84     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
     82    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    8583
    86     my $outhandle = $self->{'outhandle'};
     84    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     85    my $top_section = $doc_obj->get_top_section();
     86    # prevent hashing: old code was in effect the following.
     87    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
     88    $doc_obj->set_OIDtype ("incremental");
     89    }
    8790
    88     #check process and block exps, smart block, etc
    89     my ($block_status,$filename) = $self->read_block(@_);   
    90     return $block_status if ((!defined $block_status) || ($block_status==0));
    91 
    92     # Report that we're processing the file
    93     print STDERR "<Processing n='$file' p='RealMediaPlug'>\n" if ($gli);
    94     print $outhandle "RealMediaPlug: processing $file\n"
    95     if ($self->{'verbosity'}) > 1;
    96 
    97     # create a new index document
    98     my $doc_obj = new doc ($filename, "indexed_doc");
    99     if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
    100     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
    101     }
    102     else {
    103     $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
    104     }
    105     my $top_section = $doc_obj->get_top_section();
    106 
    107     #if there's a leading directory name, eat it...
    108     $file =~ s/^.*[\/\\]//;
    109          
    110     my $url = $file;
    111  
    112     # Source (filename) to be consistent with other plugins
    113     $doc_obj->add_metadata($top_section, "Source", $url);
    114  
    115  
    11691    my $text = "";
    117     my $real_media = rm::Header::PurePerl->new($filename);
     92    my $real_media = rm::Header::PurePerl->new($filename_full_path);
    11893    foreach my $key (keys %{$real_media->info})
    11994    {
     
    12499
    125100    $doc_obj->add_utf8_text($top_section, "<pre>\n$text\n</pre>");
     101    $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia");
    126102
    127     # srclink
    128     $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia");
    129103    $doc_obj->add_metadata($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
    130104    $doc_obj->add_metadata($top_section, "/srclink", "</a>");
     
    133107
    134108    # Add the actual file as an associated file
    135     $doc_obj->associate_file($filename, $file, "RealMedia", $top_section);
     109    $doc_obj->associate_file($filename_full_path, $filename_no_path, "RealMedia", $top_section);
    136110
    137     # include any metadata passed in from previous plugins
    138     my $section = $doc_obj->get_top_section();
    139     $self->extra_metadata ($doc_obj, $section, $metadata);
    140 
    141     # do plugin specific processing of doc_obj
    142     return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
    143 
    144     # do any automatic metadata extraction
    145     $self->auto_extract_metadata($doc_obj);
    146    
    147     # have we found a Title?? is the Title empty??
    148     if(!defined $doc_obj->get_metadata_element($section, "Title") or $doc_obj->get_metadata_element($section, "Title") eq ""){         
    149         my $file_derived_title = &BasPlug::filename_based_title($self, $file);
    150         if(!defined $doc_obj->get_metadata_element($section, "Title")) {
    151             $doc_obj->add_metadata ($section, "Title", $file_derived_title);
    152         }
    153         else {
    154             $doc_obj->set_metadata_element ($section, "Title", $file_derived_title);
    155         }
    156     }
    157 
    158     # add an OID
    159     $doc_obj->set_OID();
    160 
    161     # process the document
    162     $processor->process($doc_obj);
    163 
    164     $self->{'num_processed'}++;
    165     return 1;
    166111}
    167112
  • gsdl/trunk/perllib/plugins/ReferPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ReferPlug.pm - a plugin for bibliography records in Refer format
     3# ReferPlugin.pm - a plugin for bibliography records in Refer format
    44#
    55# A component of the Greenstone digital library software
     
    2626###########################################################################
    2727
    28 # ReferPlug reads bibliography files in Refer format.
     28# ReferPlugin reads bibliography files in Refer format.
    2929#
    3030# by Gordon W. Paynter ([email protected]), November 2000
     
    3636#
    3737#
    38 # ReferPlug creates a document object for every reference in the file.
    39 # It is a subclass of SplitPlug, so if there are multiple records, all
     38# ReferPlugin creates a document object for every reference in the file.
     39# It is a subclass of SplitTextFile, so if there are multiple records, all
    4040# are read.
    4141#
     
    6161#
    6262
    63 # 12/05/02 Added usage datastructure - John Thompson
    64 
    65 package ReferPlug;
    66 
    67 use SplitPlug;
     63package ReferPlugin;
     64
     65use SplitTextFile;
    6866use strict;
    6967no strict 'refs'; # allow filehandles to be variables and viceversa
    7068
    71 # ReferPlug is a sub-class of BasPlug.
     69# ReferPlugin is a sub-class of BasePlugin.
    7270sub BEGIN {
    73     @ReferPlug::ISA = ('SplitPlug');
     71    @ReferPlugin::ISA = ('SplitTextFile');
    7472}
    7573
    7674my $arguments =
    7775    [ { 'name' => "process_exp",
    78     'desc' => "{BasPlug.process_exp}",
     76    'desc' => "{BasePlugin.process_exp}",
    7977    'type' => "regexp",
    8078    'deft' => &get_default_process_exp(),
    8179    'reqd' => "no" },
    8280      { 'name' => "split_exp",
    83     'desc' => "{SplitPlug.split_exp}",
     81    'desc' => "{SplitTextFile.split_exp}",
    8482    'type' => "regexp",
    8583    'reqd' => "no",
     
    8785      ];
    8886
    89 my $options = { 'name'     => "ReferPlug",
    90         'desc'     => "{ReferPlug.desc}",
     87my $options = { 'name'     => "ReferPlugin",
     88        'desc'     => "{ReferPlugin.desc}",
    9189        'abstract' => "no",
    9290        'inherits' => "yes",
     
    109107    push(@$pluginlist, $class);
    110108
    111     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    112     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    113 
    114     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
     109    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     110    push(@{$hashArgOptLists->{"OptList"}},$options);
     111
     112    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
    115113
    116114    return bless $self, $class;
     
    130128    my $cursection = $doc_obj->get_top_section();
    131129    # Report that we're processing the file
    132     print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
    133     print $outhandle "ReferPlug: processing $file\n"
     130    print STDERR "<Processing n='$file' p='ReferPlugin'>\n" if ($gli);
     131    print $outhandle "ReferPlugin: processing $file\n"
    134132    if ($self->{'verbosity'}) > 1;
    135133
  • gsdl/trunk/perllib/plugins/RogPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # RogPlug.pm -- simple text plugin
     3# RogPlugin.pm -- simple text plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2626# creates simple single-level document from .rog or .mdb files
    2727
    28 package RogPlug;
    29 
    30 use BasPlug;
     28package RogPlugin;
     29
     30use BasePlugin;
    3131use sorttools;
    3232use doc;
     
    3636
    3737sub BEGIN {
    38     @RogPlug::ISA = ('BasPlug');
     38    @RogPlugin::ISA = ('BasePlugin');
    3939}
    4040
    4141my $arguments =
    4242    [ { 'name' => "process_exp",
    43     'desc' => "{BasPlug.process_exp}",
     43    'desc' => "{BasePlugin.process_exp}",
    4444    'type' => "regexp",
    4545    'reqd' => "no",
     
    4747      ];
    4848
    49 my $options = { 'name'     => "RogPlug",
    50         'desc'     => "{RogPlug.desc}",
     49my $options = { 'name'     => "RogPlugin",
     50        'desc'     => "{RogPlugin.desc}",
    5151        'abstract' => "no",
    5252        'inherits' => "yes",
     
    5858    push(@$pluginlist, $class);
    5959
    60     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    61     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    62 
    63     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     60    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     61    push(@{$hashArgOptLists->{"OptList"}},$options);
     62
     63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    6464
    6565    return bless $self, $class;
     
    230230    my $gz = (defined $3) ? 1: 0;
    231231
    232         print STDERR "<Processing n='$file' p='RogPlug'>\n" if ($gli);
    233     print STDERR "RogPlug: processing $filename\n" if $processor->{'verbosity'};
     232    print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
     233    print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
    234234   
    235235    if ($gz) {
    236236    open (FILE, "zcat $filename |")
    237         || die "RogPlug::read - zcat can't open $filename\n";
     237        || die "RogPlugin::read - zcat can't open $filename\n";
    238238    } else {
    239239    open (FILE, $filename)
    240         || die "RogPlug::read - can't open $filename\n";
     240        || die "RogPlugin::read - can't open $filename\n";
    241241    }
    242242
  • gsdl/trunk/perllib/plugins/SourceCodePlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # SRCPlug.pm -- source code plugin
     3# SourceCodePlugin.pm -- source code plugin
    44#
    55# A component of the Greenstone digital library software
     
    3737# 12/05/02 Added usage datastructure - John Thompson
    3838
    39 package SRCPlug;
    40 
    41 use BasPlug;
     39package SourceCodePlugin;
     40
     41use ReadTextFile;
    4242
    4343use strict;
     
    4545
    4646sub BEGIN {
    47     @SRCPlug::ISA = ('BasPlug');
     47    @SourceCodePlugin::ISA = ('ReadTextFile');
    4848}
    4949
    5050my $arguments =
    5151    [ { 'name' => "process_exp",
    52     'desc' => "{BasPlug.process_exp}",
     52    'desc' => "{ReadTextFile.process_exp}",
    5353    'type' => "regexp",
    5454    'deft' => &get_default_process_exp(),
    5555    'reqd' => "no" } ,
    5656      { 'name' => "block_exp",
    57     'desc' => "{BasPlug.block_exp}",
     57    'desc' => "{ReadTextFile.block_exp}",
    5858    'type' => "regexp",
    5959    'deft' => &get_default_block_exp(),
    6060    'reqd' => "no" },
    6161      { 'name' => "remove_prefix",
    62     'desc' => "{SRCPlug.remove_prefix}",
     62    'desc' => "{SourceCodePlugin.remove_prefix}",
    6363    'type' => "regexp",
    6464    'deft' => "^.*[/\\]",
    6565    'reqd' => "no" } ];
    6666
    67 my $options = { 'name'     => "SRCPlug",
    68         'desc'     => "{SRCPlug.desc}",
     67my $options = { 'name'     => "SourceCodePlugin",
     68        'desc'     => "{SourceCodePlugin.desc}",
    6969        'abstract' => "no",
    7070        'inherits' => "yes",
     
    7777    push(@$pluginlist, $class);
    7878
    79     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    80     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    81 
    82     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     80    push(@{$hashArgOptLists->{"OptList"}},$options);
     81
     82    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
    8383
    8484    return bless $self, $class;
     
    9494    my $self = shift (@_);
    9595
    96 #    return q^(?i)\.te?xt$^;
    9796    return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
    9897}
     
    106105    my $outhandle = $self->{'outhandle'};
    107106   
    108         print STDERR "<Processing n='$file' p='SRCPlug'>\n" if ($gli);
    109     print $outhandle "SRCPlug: processing $file\n"
     107    print STDERR "<Processing n='$file' p='SourceCodePlugin'>\n" if ($gli);
     108    print $outhandle "SourceCodePlugin: processing $file\n"
    110109    if $self->{'verbosity'} > 1;
    111110   
  • gsdl/trunk/perllib/plugins/StructuredHTMLPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # StructuredHTMLPlug.pm -- html plugin with extra facilities for teasing out
     3# StructuredHTMLPlugin.pm -- html plugin with extra facilities for teasing out
    44# hierarchical structure (such as h1, h2, h3, or user-defined tags) in an
    55# HTML document
     
    3232# format:e.g. level1 (Abstract_title|ChapterTitle|Referencing Heading) level2(SectionHeading)...
    3333
    34 package StructuredHTMLPlug;
    35 
    36 use HTMLPlug;
    37 use ImagePlug;
    38 
    39 #use strict; # every perl program should have this!
    40 #no strict 'refs'; # make an exception so we can use variables as filehandles
     34package StructuredHTMLPlugin;
     35
     36use HTMLPlugin;
     37use ImageConverter; # want the identify method
     38
     39use strict; # every perl program should have this!
     40no strict 'refs'; # make an exception so we can use variables as filehandles
    4141
    4242sub BEGIN {
    43     @StructuredHTMLPlug::ISA = ('HTMLPlug');
     43    @StructuredHTMLPlugin::ISA = ('HTMLPlugin');
    4444}
    4545
     
    4747    [
    4848     { 'name' => "level1_header",
    49        'desc' => "{StructuredHTMLPlug.level1_header}",
     49       'desc' => "{StructuredHTMLPlugin.level1_header}",
    5050       'type' => "regexp",
    5151       'reqd' => "no",
    5252       'deft' => "" },
    5353     { 'name' => "level2_header",
    54        'desc' => "{StructuredHTMLPlug.level2_header}",
     54       'desc' => "{StructuredHTMLPlugin.level2_header}",
    5555       'type' => "regexp",
    5656       'reqd' => "no",
    5757       'deft' => "" },
    5858     { 'name' => "level3_header",
    59        'desc' => "{StructuredHTMLPlug.level3_header}",
     59       'desc' => "{StructuredHTMLPlugin.level3_header}",
    6060       'type' => "regexp",
    6161       'reqd' => "no",
    6262       'deft' => "" },
    6363     { 'name' => "title_header",
    64        'desc' => "{StructuredHTMLPlug.title_header}",
     64       'desc' => "{StructuredHTMLPlugin.title_header}",
    6565       'type' => "regexp",
    6666       'reqd' => "no",
    6767       'deft' => "" },
    6868     { 'name' => "delete_toc",
    69        'desc' => "{StructuredHTMLPlug.delete_toc}",
     69       'desc' => "{StructuredHTMLPlugin.delete_toc}",
    7070       'type' => "flag",
    7171       'reqd' => "no"},
    7272     { 'name' => "toc_header",
    73        'desc' => "{StructuredHTMLPlug.toc_header}",
     73       'desc' => "{StructuredHTMLPlugin.toc_header}",
    7474       'type' => "regexp",
    7575       'reqd' => "no",
     
    7777     ];
    7878
    79 my $options = { 'name'     => "StructuredHTMLPlug",
    80         'desc'     => "{StructuredHTMLPlug.desc}",
     79my $options = { 'name'     => "StructuredHTMLPlugin",
     80        'desc'     => "{StructuredHTMLPlugin.desc}",
    8181        'abstract' => "no",
    8282        'inherits' => "yes",
     
    8888    push(@$pluginlist, $class);
    8989   
    90     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    91     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    92    
    93     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     90    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     91    push(@{$hashArgOptLists->{"OptList"}},$options);
     92   
     93    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);
    9494   
    9595    return bless $self, $class;
     
    102102    my $outhandle = $self->{'outhandle'};
    103103
    104     print $outhandle "StructuredHTMLPlug: processing $file\n"
     104    print $outhandle "StructuredHTMLPlugin: processing $file\n"
    105105        if $self->{'verbosity'} > 1;
    106106   
     
    161161    $body_text =~ s/(<p[^>]*><o:p>&nbsp;<\/o:p><\/p>)//isg;
    162162   
    163     $section_text .= "<!--\n<Section>\n-->\n";
     163    # what was the following line for. effectively unused. do we need it??
     164    #$section_text .= "<!--\n<Section>\n-->\n";
    164165    #my $top_section_tag = "<!--\n<Section>\n-->\n";
    165166    #$body_text =~ s/(<div.*)/$top_section_text$doctitle$1/i;
     
    308309
    309310    my ($image_type, $actual_width, $actual_height, $image_size)
    310         = &ImagePlug::identify($img_filename, $outhandle, $verbosity);
     311        = &ImageConverter::identify($img_filename, $outhandle, $verbosity);
    311312   
    312313    #print STDERR "**** $actual_width x $actual_height";
     
    318319        # derive new image name based on current image
    319320        my ($tailname, $dirname, $suffix)
    320         = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
     321        = &File::Basename::fileparse($img_filename, "\\.[^\\.]+\$");
    321322       
    322323        my $resized_filename
     
    326327       
    327328        # Generate smaller image with convert
    328         my $newsize = "$img_widthx$image_height";
     329        my $newsize = "$img_width"."x$img_height";
    329330        my $command = "convert -interlace plane -verbose "
    330         ."-geometry $newsize \"img_$filename\" \"$resized_filename\"";
     331        ."-geometry $newsize \"$img_filename\" \"$resized_filename\"";
    331332        #print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
    332333        #my $result = '';
     
    389390        $value = $1;
    390391        if (!defined $value || !defined $tag){
    391         #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
     392        #print $outhandle "StructuredHTMLPlugin: can't find VALUE in \"$tag\"\n";
    392393        next;
    393394        } else {
  • gsdl/trunk/perllib/plugins/TextPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # TEXTPlug.pm -- simple text plugin
     3# TextPlugin.pm -- simple text plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2929# 12/05/02 Added usage datastructure - John Thompson
    3030
    31 package TEXTPlug;
    32 
    33 use BasPlug;
     31package TextPlugin;
     32
     33use ReadTextFile;
    3434
    3535use strict;
    3636no strict 'refs'; # allow filehandles to be variables and viceversa
     37no strict 'subs';
    3738
    3839sub BEGIN {
    39     @TEXTPlug::ISA = ('BasPlug');
     40    @TextPlugin::ISA = ('ReadTextFile');
    4041}
    4142
    4243my $arguments =
    4344    [ { 'name' => "process_exp",
    44     'desc' => "{BasPlug.process_exp}",
     45    'desc' => "{BasePlugin.process_exp}",
    4546    'type' => "regexp",
    4647    'deft' => &get_default_process_exp(),
    4748    'reqd' => "no" } ,
    4849      { 'name' => "title_sub",
    49     'desc' => "{TEXTPlug.title_sub}",
     50    'desc' => "{TextPlugin.title_sub}",
    5051    'type' => "regexp",
    5152    'deft' => "",
    5253    'reqd' => "no" } ];
    5354
    54 my $options = { 'name'     => "TEXTPlug",
    55         'desc'     => "{TEXTPlug.desc}",
     55my $options = { 'name'     => "TextPlugin",
     56        'desc'     => "{TextPlugin.desc}",
    5657        'abstract' => "no",
    5758        'inherits' => "yes",
     
    6566    push(@$pluginlist, $class);
    6667
    67     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    68     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    69 
    70     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     68    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     69    push(@{$hashArgOptLists->{"OptList"}},$options);
     70
     71    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
    7172
    7273    return bless $self, $class;
     
    8586    my $outhandle = $self->{'outhandle'};
    8687
    87     print STDERR "<Processing n='$file' p='TEXTPlug'>\n" if ($gli);
    88     print $outhandle "TEXTPlug: processing $file\n"
     88    print STDERR "<Processing n='$file' p='TextPlugin'>\n" if ($gli);
     89    print $outhandle "TextPlugin xx: processing $file\n"
    8990    if $self->{'verbosity'} > 1;
    9091   
     
    109110    }
    110111    # Add FileFormat metadata
    111     $doc_obj->add_metadata($cursection, "FileFormat", "TEXT");
     112    $doc_obj->add_metadata($cursection, "FileFormat", "Text");
    112113
    113114    # insert preformat tags and add text to document object   
     
    136137# replace_srcdoc_with_html.pl requires all subroutines that support src_replaceable
    137138# to contain a method called tmp_area_convert_file - this is indeed the case with all
    138 # Perl modules that are subclasses of ConvertToPlug.pm, but as we want TEXTPlug to also
    139 # be srcreplaceable and because TEXTPlug does not inherit from ConvertToPlug.pm, we have
     139# Perl modules that are subclasses of ConvertToPlug.pm, but as we want TextPlugin to also
     140# be srcreplaceable and because TextPlugin does not inherit from ConvertToPlug.pm, we have
    140141# a similar subroutine with the same name here.
    141142sub tmp_area_convert_file {
     
    192193    # Recreate the original file for writing the updated contents
    193194    unless(open(TEXT, "<$tmp_filename")) { # open it as a new file for writing
    194     print STDERR "TEXTPlug.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
     195    print STDERR "TextPlugin.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
    195196    return ""; # no file name
    196197    }
     
    212213    # try creating this new file writing and try opening it for writing, else exit with error value
    213214    unless(open(HTML, ">$output_filename")) {  # open the new html file for writing
    214     print STDERR "TEXTPlug.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
     215    print STDERR "TextPlugin.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
    215216    return ""; # no filename
    216217    }
  • gsdl/trunk/perllib/plugins/UnknownPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
     3# UnknownPlugin.pm -- Plugin for files you know about but Greenstone doesn't
    44#
    55# A component of the Greenstone digital library software from the New
     
    2626###########################################################################
    2727
    28 # UnknownPlug - a plugin for unknown files
     28# UnknownPlugin - a plugin for unknown files
    2929
    3030# This is a simple Plugin for importing files in formats that
     
    3838# movies, I add this line to the collection configuration file:
    3939
    40 # plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
     40# plugin UnknownPlugin -process_exp "*.MOV" -assoc_field "movie"
    4141
    4242# A document is created for each movie, with the associated movie
     
    4949# You can also add extra metadata, such as the Title, Subject, and
    5050# Duration, with metadata.xml files and RecPlug.  (If you want to use
    51 # UnknownPlug with more than one type of file, you will have to add
     51# UnknownPlugin with more than one type of file, you will have to add
    5252# some sort of distinguishing metadata in this way.)
    5353
    5454
    5555
    56 package UnknownPlug;
     56package UnknownPlugin;
    5757
    58 use BasPlug;
     58use BasePlugin;
    5959
    6060use strict;
     
    6262
    6363sub BEGIN {
    64     @UnknownPlug::ISA = ('BasPlug');
     64    @UnknownPlugin::ISA = ('BasePlugin');
    6565}
    6666
    6767my $arguments =
    6868    [ { 'name' => "assoc_field",
    69     'desc' => "{UnknownPlug.assoc_field}",
     69    'desc' => "{UnknownPlugin.assoc_field}",
    7070    'type' => "string",
    7171    'deft' => "",
    7272    'reqd' => "no" },
    7373      { 'name' => "file_format",
    74     'desc' => "{UnknownPlug.file_format}",
     74    'desc' => "{UnknownPlugin.file_format}",
    7575    'type' => "string",
    7676    'deft' => "",
    7777    'reqd' => "no" },
    7878      { 'name' => "mime_type",
    79     'desc' => "{UnknownPlug.mime_type}",
     79    'desc' => "{UnknownPlugin.mime_type}",
    8080    'type' => "string",
    8181    'deft' => "",
    8282    'reqd' => "no" },
    8383      { 'name' => "srcicon",
    84     'desc' => "{UnknownPlug.srcicon}",
     84    'desc' => "{UnknownPlugin.srcicon}",
    8585    'type' => "string",
    8686    'deft' => "iconunknown",
    8787    'reqd' => "no" },
    8888      { 'name' => "process_extension",
    89     'desc' => "{UnknownPlug.process_extension}",
     89    'desc' => "{UnknownPlugin.process_extension}",
    9090    'type' => "string",
    9191    'deft' => "",
    9292    'reqd' => "no" } ];
    9393
    94 my $options = { 'name'     => "UnknownPlug",
    95         'desc'     => "{UnknownPlug.desc}",
     94my $options = { 'name'     => "UnknownPlugin",
     95        'desc'     => "{UnknownPlugin.desc}",
    9696        'abstract' => "no",
    9797        'inherits' => "yes",
     
    107107    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    108108
    109     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     109    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
    110110
    111111    # "-process_extension" is a simpler alternative to -process_exp for non-regexp people
     
    117117}
    118118
    119 sub get_default_process_exp {
    120     return '';
    121 }
    122119
     120sub process {
     121    my $self = shift (@_);
     122    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    123123
    124 # Associate the unknown file with the new document
     124    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
     125    my $outhandle = $self->{'outhandle'};
     126    my $verbosity = $self->{'verbosity'};
    125127
    126 sub associate_unknown_file {
    127     my $self = shift (@_);
    128     my $filename = shift (@_);   # filename with full path
    129     my $file = shift (@_);       # filename without path
    130     my $doc_obj = shift (@_);
    131    
    132     my $verbosity = $self->{'verbosity'};
    133     my $outhandle = $self->{'outhandle'};
    134 
    135     # check the filename is okay
    136     return 0 if ($file eq "" || $filename eq "");
    137 
    138    
    139     my $url = $file;
    140     ##$url =~ s/ /%20/g;
     128    # check the filename is okay - do we need this??
     129    if ($filename_full_path eq "" || $filename_no_path eq "") {
     130    print $outhandle "UnknownPlugin: couldn't process \"$filename_no_path\"\n";
     131    return undef;
     132    }
    141133
    142134    # Add the file as an associated file ...
     
    146138    my $assoc_field = $self->{'assoc_field'} || "unknown_file";
    147139
    148     $doc_obj->associate_file($filename, $file, $mime_type, $section);
     140    $doc_obj->associate_file($filename_full_path, $filename_no_path, $mime_type, $section);
    149141    $doc_obj->add_metadata ($section, "FileFormat", $file_format);
    150142    $doc_obj->add_metadata ($section, "MimeType", $mime_type);
    151     $doc_obj->add_metadata ($section, $assoc_field, $file);
     143    $doc_obj->add_metadata ($section, $assoc_field, $filename_full_path);
    152144   
    153145    $doc_obj->add_metadata ($section, "srclink",
    154146                "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">");
    155     #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");
    156147    $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_");
    157148    $doc_obj->add_metadata ($section, "/srclink", "</a>");
    158149   
    159     # add NoText metadata which can be used to suppress the dummy text
    160     $doc_obj->add_metadata ($section, "NoText", "1");
     150    # we have no text - add dummy text and NoText metadata
     151    $self->add_dummy_text($doc_obj, $section);
    161152
    162     return 1;
    163 }
    164 
    165 
    166 
    167 # The UnknownPlug read() function. This function does all the right
    168 # things to make general options work for a given plugin.  UnknownPlug
    169 # overrides read() because there is no need to read the actual text of
    170 # the file in, because the contents of the file is not text...
    171 #
    172 #
    173 # Return number of files processed, undef if can't process
    174 #
    175 # Note that $base_dir might be "" and that $file might include directories
    176 
    177 sub read {
    178     my $self = shift (@_);
    179     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    180 
    181     my $outhandle = $self->{'outhandle'};
    182 
    183     # Make sure we're processing the correct file
    184     my ($block_status,$filename) = $self->read_block(@_);   
    185     return $block_status if ((!defined $block_status) || ($block_status==0));
    186 
    187     print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
    188     print $outhandle "UnknownPlug processing \"$filename\"\n"
    189         if $self->{'verbosity'} > 1;
    190 
    191     #if there's a leading directory name, eat it...
    192     $file =~ s/^.*[\/\\]//;
    193    
    194     # create a new document
    195     my $doc_obj = new doc ($filename, "indexed_doc");
    196     my $top_section = $doc_obj->get_top_section();
    197 
    198     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});   
    199     $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
    200     $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins
    201     $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));
    202 
    203     # URL metadata (even invalid ones) are used to support internal
    204     # links, so even if 'file_is_url' is off, still need to store info
    205 
    206     my $web_url = "http://$file";
    207     $doc_obj->add_metadata($top_section, "URL", $web_url);
    208 
    209 
    210     # associate the file with the document
    211     if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
    212     {
    213     if ($gli) {
    214         print STDERR "<ProcessingError n='$file'>\n";
    215     }
    216     print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";
    217     return -1; # error during processing
    218     }
    219 
    220     #create an empty text string so we don't break downstream plugins
    221     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
    222 
    223     # include any metadata passed in from previous plugins
    224     my $section = $doc_obj->get_top_section();
    225     $self->extra_metadata ($doc_obj, $section, $metadata);
    226 
    227     $self->title_fallback($doc_obj,$section,$file);
    228 
    229     # do plugin specific processing of doc_obj
    230     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
    231     print STDERR "<ProcessingError n='$file'>\n" if ($gli);
    232     return -1;
    233     }
    234 
    235     # do any automatic metadata extraction
    236     $self->auto_extract_metadata ($doc_obj);
    237 
    238     # add an OID
    239     $doc_obj->set_OID();
    240     $doc_obj->add_utf8_text($section, $text);
    241 
    242     # process the document
    243     $processor->process($doc_obj);
    244 
    245     $self->{'num_processed'} ++;
    246     return 1;
    247 }
    248 
    249 
    250 # UnknownPlug processing of doc_obj.  In practice we don't need to do
    251 # anything here because the read function takes care of everything.
    252 
    253 sub process {
    254     my $self = shift (@_);
    255     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    256     my $outhandle = $self->{'outhandle'};
    257    
    258153    return 1;
    259154}
  • gsdl/trunk/perllib/plugins/W3ImagePlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # W3ImgPlug.pm -- Context-based image indexing plugin for HTML documents
     3# W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents
    44#
    55# A component of the Greenstone digital library software
     
    3939#  collection builds at the import stage.
    4040#
    41 #  W3ImgPlug is a subclass of HTMLPlug (i.e. it will index pages also
    42 #  if required). It can be used in place of HTMLPlug to index both
     41#  W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also
     42#  if required). It can be used in place of HTMLPlugin to index both
    4343#  pages and their images.
    4444#
     
    5555#    ImageMagick can be downloaded from the website above.
    5656#    Make sure the system path includes the ImageMagick binaries
    57 #    before using W3ImgPlug.
     57#    before using W3ImagePlugin.
    5858#
    5959#    NOTE: NT/2000/XP contain a filesystem utility 'convert.exe'
     
    9797#   ...
    9898#
    99 #   plugin W3ImgPlug -index_pages -aggressiveness 6
     99#   plugin W3ImagePlugin -index_pages -aggressiveness 6
    100100#
    101101#   ...
     
    110110#
    111111 
    112 package W3ImgPlug;
    113 
    114 use HTMLPlug;
     112package W3ImagePlugin;
     113
     114use HTMLPlugin;
    115115use ghtml;
    116116use unicode;
     
    120120
    121121sub BEGIN {
    122     @W3ImgPlug::ISA = qw( HTMLPlug );
     122    @W3ImagePlugin::ISA = qw( HTMLPlugin );
    123123}
    124124
    125125my $aggressiveness_list =
    126126    [ { 'name' => "1",
    127     'desc' => "{W3ImgPlug.aggressiveness.1}" },
     127    'desc' => "{W3ImagePlugin.aggressiveness.1}" },
    128128      { 'name' => "2",
    129     'desc' => "{W3ImgPlug.aggressiveness.2}" },
     129    'desc' => "{W3ImagePlugin.aggressiveness.2}" },
    130130      { 'name' => "3",
    131     'desc' => "{W3ImgPlug.aggressiveness.3}" },
     131    'desc' => "{W3ImagePlugin.aggressiveness.3}" },
    132132      { 'name' => "4",
    133     'desc' => "{W3ImgPlug.aggressiveness.4}" },
     133    'desc' => "{W3ImagePlugin.aggressiveness.4}" },
    134134      { 'name' => "5",
    135     'desc' => "{W3ImgPlug.aggressiveness.5}" },
     135    'desc' => "{W3ImagePlugin.aggressiveness.5}" },
    136136      { 'name' => "6",
    137     'desc' => "{W3ImgPlug.aggressiveness.6}" },
     137    'desc' => "{W3ImagePlugin.aggressiveness.6}" },
    138138      { 'name' => "7",
    139     'desc' => "{W3ImgPlug.aggressiveness.7}" },
     139    'desc' => "{W3ImagePlugin.aggressiveness.7}" },
    140140      { 'name' => "8",
    141     'desc' => "{W3ImgPlug.aggressiveness.8}" },
     141    'desc' => "{W3ImagePlugin.aggressiveness.8}" },
    142142      { 'name' => "9",
    143     'desc' => "{W3ImgPlug.aggressiveness.9}" } ];
     143    'desc' => "{W3ImagePlugin.aggressiveness.9}" } ];
    144144
    145145my $arguments =
    146146    [ { 'name' => "aggressiveness",
    147     'desc' => "{W3ImgPlug.aggressiveness}",
     147    'desc' => "{W3ImagePlugin.aggressiveness}",
    148148    'type' => "int",
    149149    'list' => $aggressiveness_list,
     
    151151    'reqd' => "no" },
    152152      { 'name' => "index_pages",
    153     'desc' => "{W3ImgPlug.index_pages}",
     153    'desc' => "{W3ImagePlugin.index_pages}",
    154154    'type' => "flag",
    155155    'reqd' => "no" },
    156156      { 'name' => "no_cache_images",
    157     'desc' => "{W3ImgPlug.no_cache_images}",
     157    'desc' => "{W3ImagePlugin.no_cache_images}",
    158158    'type' => "flag",
    159159    'reqd' => "no" },
    160160      { 'name' => "min_size",
    161     'desc' => "{W3ImgPlug.min_size}",
     161    'desc' => "{W3ImagePlugin.min_size}",
    162162    'type' => "int",
    163163    'deft' => "2000",
    164164    'reqd' => "no" },
    165165      { 'name' => "min_width",
    166     'desc' => "{W3ImgPlug.min_width}",
     166    'desc' => "{W3ImagePlugin.min_width}",
    167167    'type' => "int",
    168168    'deft' => "50",
    169169    'reqd' => "no" },
    170170      { 'name' => "min_height",
    171     'desc' => "{W3ImgPlug.min_height}",
     171    'desc' => "{W3ImagePlugin.min_height}",
    172172    'type' => "int",
    173173    'deft' => "50",
    174174    'reqd' => "no" },
    175175      { 'name' => "thumb_size",
    176     'desc' => "{W3ImgPlug.thumb_size}",
     176    'desc' => "{W3ImagePlugin.thumb_size}",
    177177    'type' => "int",
    178178    'deft' => "100",
    179179    'reqd' => "no" },
    180180      { 'name' => "convert_params",
    181     'desc' => "{W3ImgPlug.convert_params}",
     181    'desc' => "{W3ImagePlugin.convert_params}",
    182182    'type' => "string",
    183183    'deft' => "",
    184184    'reqd' => "no" },
    185185      { 'name' => "min_near_text",
    186     'desc' => "{W3ImgPlug.min_near_text}",
     186    'desc' => "{W3ImagePlugin.min_near_text}",
    187187    'type' => "int",
    188188    'deft' => "10",
    189189    'reqd' => "no" },
    190190      { 'name' => "max_near_text",
    191     'desc' => "{W3ImgPlug.max_near_text}",
     191    'desc' => "{W3ImagePlugin.max_near_text}",
    192192    'type' => "int",
    193193    'deft' => "400",
    194194    'reqd' => "no" },
    195195      { 'name' => "smallpage_threshold",
    196     'desc' => "{W3ImgPlug.smallpage_threshold}",
     196    'desc' => "{W3ImagePlugin.smallpage_threshold}",
    197197    'type' => "int",
    198198    'deft' => "2048",
    199199    'reqd' => "no" },
    200200      { 'name' => "textrefs_threshold",
    201     'desc' => "{W3ImgPlug.textrefs_threshold}",
     201    'desc' => "{W3ImagePlugin.textrefs_threshold}",
    202202    'type' => "int",
    203203    'deft' => "2",
    204204    'reqd' => "no" },
    205205      { 'name' => "caption_length",
    206     'desc' => "{W3ImgPlug.caption_length}",
     206    'desc' => "{W3ImagePlugin.caption_length}",
    207207    'type' => "int",
    208208    'deft' => "80",
    209209    'reqd' => "no" },
    210210      { 'name' => "neartext_length",
    211     'desc' => "{W3ImgPlug.neartext_length}",
     211    'desc' => "{W3ImagePlugin.neartext_length}",
    212212    'type' => "int",
    213213    'deft' => "300",
    214214    'reqd' => "no" },
    215215      { 'name' => "document_text",
    216     'desc' => "{W3ImgPlug.document_text}",
     216    'desc' => "{W3ImagePlugin.document_text}",
    217217    'type' => "flag",
    218218    'reqd' => "no" } ];
    219219
    220 my $options = { 'name'     => "W3ImgPlug",
    221         'desc'     => "{W3ImgPlug.desc}",
     220my $options = { 'name'     => "W3ImagePlugin",
     221        'desc'     => "{W3ImagePlugin.desc}",
    222222        'abstract' => "no",
    223223        'inherits' => "yes",
     
    229229    push(@$pluginlist, $class);
    230230
    231     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    232     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    233 
    234     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);
     231    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     232    push(@{$hashArgOptLists->{"OptList"}},$options);
     233
     234    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);
    235235
    236236    # init class variables
     
    247247}
    248248
    249 # if indexing pages, let HTMLPlug do it's stuff
     249# if indexing pages, let HTMLPlugin do it's stuff
    250250# image extraction done through read()
    251251sub process {
    252     my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     252    my $self = shift(@_);
     253    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    253254    $self->{'imglist'} = ();
    254255    if ( $self->{'index_pages'} ) {
     
    280281
    281282# get complex configuration options from configuration files
    282 # -- $GSDLCOLLECTION/etc/W3ImgPlug.cfg (tag sets for aggr 2+)
     283# -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+)
    283284# -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+)
    284285
    285 # If there's no W3ImgPlug.cfg file we'll use the following default values
     286# If there's no W3ImagePlugin.cfg file we'll use the following default values
    286287my $defaultcfg = '
    287288<delimitertagset>
     
    322323    my ($filepath);
    323324
    324     print {$self->{'outhandle'}} "W3ImgPlug: Initialising\n"
     325    print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n"
    325326    if $self->{'verbosity'} > 1;
    326     # etc/W3ImgPlug.cfg (XML)
     327    # etc/W3ImagePlugin.cfg (XML)
    327328    # tag sets for captions and neartext
    328329    if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) {
     
    331332    my ($cfg, @tagsets, $tagset, $type, @delims);
    332333
    333     $filepath = "$collpath/etc/W3ImgPlug.cfg";
     334    $filepath = "$collpath/etc/W3ImagePlugin.cfg";
    334335    if ( open CFG, "<$filepath" ) {
    335336        while (<CFG>) { $cfg .= $_ }
     
    353354    # output a warning if there seem to be no delimiters
    354355    if ( scalar(@{$self->{'cdelims'}} == 0)) {
    355         print {$self->{'outhandle'}} "W3ImgPlug: Warning: no caption delimiters found in $filepath\n";
     356        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n";
    356357    }
    357358    if ( scalar(@{$self->{'delims'}} == 0)) {
    358         print {$self->{'outhandle'}} "W3ImgPlug: Warning: no neartext delimiters found in $filepath\n";
     359        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n";
    359360    }
    360361    }
     
    372373        close STOPWORDS;
    373374    } else {
    374         print {$self->{'outhandle'}} "W3ImgPlug: Warning: couldn't open stopwords file at $filepath ($!)\n";
     375        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n";
    375376    }
    376377   
     
    379380    if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) {
    380381    $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33;
    381     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
     382    print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    382383    }
    383384    if ( $self->{'caption_length'} > $self->{'max_near_text'} ) {
    384385    $self->{'max_near_text'} = $self->{'caption_length'} * 1.33;
    385     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
     386    print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
    386387    }
    387388
     
    396397    my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_);
    397398    my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs);
    398     # forward normal read (runs HTMLPlug if index_pages T)
     399    # forward normal read (runs HTMLPlugin if index_pages T)
    399400    my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
    400401    if ( ! $ok ) { return $ok } # what is this returning??
     
    419420        ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is );
    420421        if (! defined($imgtag)) { $imgtag = $filepath }
    421         print $outhandle "W3ImgPlug: extracting $filepath\n"
     422        print $outhandle "W3ImagePlugin: extracting $filepath\n"
    422423        if ( $self->{'verbosity'} > 1 );
    423424        $doc_obj = new doc ("", "indexed_doc");
     
    433434    return $numdocs;
    434435    } else {
    435     print $outhandle "W3ImgPlug: No images from $file indexed\n"
     436    print $outhandle "W3ImagePlugin: No images from $file indexed\n"
    436437        if ( $self->{'verbosity'} > 2 );
    437438    return 1;
     
    472473    `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp;
    473474    if ( ! (-e $thumbfp) ) {
    474     print STDERR "W3ImgPlug: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
     475    print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
    475476    }
    476477   
     
    853854    } elsif ( $bestlen[$best1] < $mintext ) {
    854855    # use plain text extraction if tags failed (e.g. usable tag outside context)
    855     print {$self->{'outhandle'}} "W3ImgPlug: Fallback to plain-text extraction for $tag\n"
     856    print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n"
    856857        if $self->{'verbosity'} > 2;
    857858    $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>";
     
    985986        `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m;
    986987    if (! ($width && $height)) {
    987         print STDERR "W3ImgPlug: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
     988        print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
    988989    }
    989990    $filesize = (-s $abspath);
     
    998999       $imgs->{$filepath}{'filesize'} = $filesize;
    9991000       } else {
    1000        print {$self->{'outhandle'}} "W3ImgPlug: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
     1001       print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
    10011002           if $self->{'verbosity'} > 2;
    10021003       }
     
    10291030}
    10301031
    1031 # HTMLPlug only extracts meta-data if it is specified in plugin options
     1032# HTMLPlugin only extracts meta-data if it is specified in plugin options
    10321033# hence a special function to do it here
    10331034sub get_meta_value {
     
    10481049# so we can go straight to the image
    10491050# within the cached version of the source page
    1050 # (augment's HTMLPlug sub)
     1051# (augment's HTMLPlugin sub)
    10511052sub replace_images {
    10521053    my $self = shift (@_);
  • gsdl/trunk/perllib/plugins/WordPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # WordPlug.pm -- plugin for importing Microsoft Word documents
     3# WordPlugin.pm -- plugin for importing Microsoft Word documents
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2525# 12/05/02 Added usage datastructure - John Thompson
    2626
    27 package WordPlug;
    28 
    29 use ConvertToPlug;
     27package WordPlugin;
     28
     29use ConvertBinaryFile;
    3030use strict;
    3131no strict 'refs'; # allow filehandles to be variables and viceversa
    3232
    3333sub BEGIN {
    34     @WordPlug::ISA = ('ConvertToPlug');
     34    @WordPlugin::ISA = ('ConvertBinaryFile');
    3535}
    3636
    3737my $arguments =
    3838    [ { 'name' => "process_exp",
    39     'desc' => "{BasPlug.process_exp}",
     39    'desc' => "{BasePlugin.process_exp}",
    4040    'type' => "regexp",
    4141    'deft' => &get_default_process_exp(),
    4242    'reqd' => "no" },
    4343      { 'name' => "description_tags",
    44     'desc' => "{HTMLPlug.description_tags}",
     44    'desc' => "{HTMLPlugin.description_tags}",
    4545    'type' => "flag" }
    4646      ];
    4747
    48 my $options = { 'name'     => "WordPlug",
    49         'desc'     => "{WordPlug.desc}",
     48my $options = { 'name'     => "WordPlugin",
     49        'desc'     => "{WordPlugin.desc}",
    5050        'abstract' => "no",
    5151        'inherits' => "yes",
     
    6060    if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
    6161    my $ws_arg = [ { 'name' => "windows_scripting",
    62              'desc' => "{WordPlug.windows_scripting}",
     62             'desc' => "{WordPlugin.windows_scripting}",
    6363             'type' => "flag",
    6464                 'reqd' => "no" },
     
    6767             'deft' => "Title" },
    6868               { 'name' => "level1_header",
    69              'desc' => "{StructuredHTMLPlug.level1_header}",
     69             'desc' => "{StructuredHTMLPlugin.level1_header}",
    7070             'type' => "regexp",
    7171             'reqd' => "no",
    7272             'deft' => "" },
    7373               { 'name' => "level2_header",
    74              'desc' => "{StructuredHTMLPlug.level2_header}",
     74             'desc' => "{StructuredHTMLPlugin.level2_header}",
    7575             'type' => "regexp",
    7676             'reqd' => "no",
    7777             'deft' => "" },
    7878               { 'name' => "level3_header",
    79              'desc' => "{StructuredHTMLPlug.level3_header}",
     79             'desc' => "{StructuredHTMLPlugin.level3_header}",
    8080             'type' => "regexp",
    8181             'reqd' => "no",
    8282             'deft' => "" },
    8383               { 'name' => "title_header",
    84              'desc' => "{StructuredHTMLPlug.title_header}",
     84             'desc' => "{StructuredHTMLPlugin.title_header}",
    8585             'type' => "regexp",
    8686             'reqd' => "no",
    8787             'deft' => "" },
    8888               { 'name' => "delete_toc",
    89              'desc' => "{StructuredHTMLPlug.delete_toc}",
     89             'desc' => "{StructuredHTMLPlugin.delete_toc}",
    9090             'type' => "flag",
    9191             'reqd' => "no",
     
    9494             'modegli' => "3"},
    9595               { 'name' => "toc_header",
    96              'desc' => "{StructuredHTMLPlug.toc_header}",
     96             'desc' => "{StructuredHTMLPlugin.toc_header}",
    9797             'type' => "regexp",
    9898             'reqd' => "no",
     
    103103    }
    104104
    105     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    106     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    107 
    108     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
     105    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     106    push(@{$hashArgOptLists->{"OptList"}},$options);
     107
     108    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    109109
    110110    if ($self->{'info_only'}) {
     
    113113    }
    114114
    115     #this is passed through to gsConvert.pl by ConvertToPlug.pm
     115    $self->{'filename_extension'} = "doc";
     116    $self->{'file_type'} = "Word";
     117
     118    #this is passed through to gsConvert.pl by ConvertBinaryFile.pm
    116119    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
    117120
     
    123126    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    124127    if (defined $self->{'windows_scripting'}) {
    125     if (!defined $secondary_plugin_options->{'StructuredHTMLPlug'}){
    126         $secondary_plugin_options->{'StructuredHTMLPlug'} = [];
    127         my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
     128    if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){
     129        $secondary_plugin_options->{'StructuredHTMLPlugin'} = [];
     130        my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};
    128131       
    129         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     132        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    130133        # to extract these metadata fields from the HEAD META fields
    131134        push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     
    142145    }
    143146    }
    144     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
    145     $secondary_plugin_options->{'HTMLPlug'} = [];
    146     }
    147     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
    148     $secondary_plugin_options->{'TEXTPlug'} = [];
    149     }
    150 
    151     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    152     my $text_options = $secondary_plugin_options->{'TextPlug'};
    153     my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};   
    154     # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this
     147    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
     148    $secondary_plugin_options->{'HTMLPlugin'} = [];
     149    }
     150    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
     151    $secondary_plugin_options->{'TextPlugin'} = [];
     152    }
     153
     154    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
     155    my $text_options = $secondary_plugin_options->{'TextPlugin'};
     156    my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};   
     157    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this
    155158    push(@$html_options,"-input_encoding", "utf8");
    156159    push(@$html_options,"-extract_language") if $self->{'extract_language'};
    157160    push(@$html_options, "-description_tags") if $self->{'description_tags'};
    158161
    159     # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     162    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
    160163    # to extract these metadata fields from the HEAD META fields
    161164    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     
    181184}
    182185
    183 sub convert_post_process
     186sub convert_post_process_old
    184187{
    185188    my $self = shift (@_);
     
    199202    # Write it out again!
    200203    #$self->utf8_write_file (\$text, $conv_filename);
    201 }
    202 
    203 sub get_file_type {
    204     my $self = shift (@_);
    205     my $file_type = "Word";
    206     return $file_type;
    207204}
    208205
     
    230227}
    231228
    232 # do plugin specific processing of doc_obj for HTML type
    233 sub process {
    234     my $self = shift (@_);
    235     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    236 
    237     return $self->process_type("doc", $base_dir, $file, $doc_obj);
    238 }
    239229
    2402301;
  • gsdl/trunk/perllib/plugins/ZIPPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # ZIPPlug.pm --
     3# ZIPPlugin.pm --
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    4444
    4545
    46 package ZIPPlug;
     46package ZIPPlugin;
    4747
    48 use BasPlug;
     48use AbstractPlugin;
    4949use plugin;
    5050use util;
     
    5555
    5656BEGIN {
    57     @ZIPPlug::ISA = ('BasPlug');
     57    @ZIPPlugin::ISA = ('AbstractPlugin');
    5858}
    5959
    6060my $arguments =
    6161    [ { 'name' => "process_exp",
    62     'desc' => "{BasPlug.process_exp}",
     62    'desc' => "{BasePlugin.process_exp}",
    6363    'type' => "string",
    6464    'deft' => &get_default_process_exp(),
    6565    'reqd' => "no" } ];
    6666
    67 my $options = { 'name'     => "ZIPPlug",
    68         'desc'     => "{ZIPPlug.desc}",
     67my $options = { 'name'     => "ZIPPlugin",
     68        'desc'     => "{ZIPPlugin.desc}",
    6969        'abstract' => "no",
    7070        'inherits' => "yes",
     
    7777    push(@$pluginlist, $class);
    7878
    79     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    80     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     80    push(@{$hashArgOptLists->{"OptList"}},$options);
    8181
    82     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     82    my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists);
    8383
    8484    return bless $self, $class;
     
    112112    &util::mk_all_dir ($tmpdir);
    113113   
    114     print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
     114    print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n"
    115115    if $self->{'verbosity'} > 1;
    116116   
Note: See TracChangeset for help on using the changeset viewer.