Changeset 15872

gsdl/trunk/perllib/plugins/BibTexPlugin.pm

-              r15864
+              r15872
 ###########################################################################
+#
 # BibTexPlug.pm - a plugin for bibliography records in BibTex format
+# BibTexPlugin.pm - a plugin for bibliography records in BibTex format
+#
 # A component of the Greenstone digital library software
 …
 # BibTexPlug reads bibliography files in BibTex format.
+# BibTexPlugin reads bibliography files in BibTex format.
+#
 # by Gordon W. Paynter ([email protected]), November 2000
 # Based on ReferPlug.  See ReferPlug for geneology.
+#
 # BibTexPlug creates a document object for every reference a the file.
 # It is a subclass of SplitPlug, so if there are multiple records, all
+# BibTexPlugin creates a document object for every reference a the file.
+# It is a subclass of SplitTextFile, so if there are multiple records, all
 # are read.
+#
 …
 package BibTexPlug;
 use SplitPlug;
+package BibTexPlugin;
+use SplitTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 # BibTexPlug is a sub-class of BasPlug.
+# BibTexPlugin is a sub-class of SplitTextFile.
 sub BEGIN {
     @BibTexPlug::ISA = ('SplitPlug');
+    @BibTexPlugin::ISA = ('SplitTextFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'deft' => &get_default_split_exp(),
 …
       ];
 my $options = { 'name'     => "BibTexPlug",
         'desc'     => "{BibTexPlug.desc}",
+my $options = { 'name'     => "BibTexPlugin",
+        'desc'     => "{BibTexPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     return q^\n+(?=@)^;
+}
 sub new {
     my ($class) = shift (@_);
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     # Report that we're processing the file
     print STDERR "<Processing n='$file' p='BibTexPlug'>\n" if ($gli);
     print $outhandle "BibTexPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='BibTexPlugin'>\n" if ($gli);
+    print $outhandle "BibTexPlugin: processing $file\n"
     if ($self->{'verbosity'}) > 1;
 …
             $vonlast=shift @parts;
             if (scalar(@parts) > 0) {
             print $outhandle "BibTexPlug: couldn't parse name $a\n";
+            print $outhandle "BibTexPlugin: couldn't parse name $a\n";
             # but we continue anyway...
+            }
 …
             # some non-English names do start with lowercase
             # eg "Marie desJardins". Also we can get typos...
             print $outhandle "BibTexPlug: couldn't parse surname $vonlast\n";
+            print $outhandle "BibTexPlugin: couldn't parse surname $vonlast\n";
             $von="";
             if ($vonlast =~ /^[a-z]+$/) {
 …
         my $replacement=$utf8_chars{$tex};
         if (!defined($replacement)) {
         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
+        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
         $replacement=$char;
+        }
 …
         my $replacement=$special_utf8_chars{$tex};
         if (!defined($replacement)) {
         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
+        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
         $replacement=$tex;
+        }
 …
           my $replacement=$special_utf8_chars{$tex};
           if (!defined($replacement)) {
           print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
+          print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n";
           $replacement=$char;
+      }

gsdl/trunk/perllib/plugins/BookPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # BookPlug.pm (formally called HBSPlug) -- plugin for processing simple
+# BookPlugin.pm (formally called HBSPlug) -- plugin for processing simple
 # html (or text) books
+#
 …
 # taken as the cover image (jpg files are blocked by this plugin)
 # BookPlug is a simplification (and extension) of the HBPlug used
 # by the Humanity Library collections. BookPlug is faster as it expects
+# BookPlugin is a simplification (and extension) of the HBPlug used
+# by the Humanity Library collections. BookPlugin is faster as it expects
 # the input files to be cleaner (The input to the HDL collections
 # contains lots of excess html tags around <<TOC>> tags, uses <<I>>
 …
 # use this plugin instead of HBPlug.
+# 12/05/02 Added usage datastructure - John Thompson
+package BookPlug;
+use BasPlug;
+package BookPlugin;
+use AutoExtractMetadata;
 use util;
 use strict;
 …
 sub BEGIN {
     @BookPlug::ISA = ('BasPlug');
+    @BookPlugin::ISA = ('AutoExtractMetadata');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_block_exp() } ];
 my $options = { 'name'     => "BookPlug",
         'desc'     => "{BookPlug.desc}",
+my $options = { 'name'     => "BookPlugin",
+        'desc'     => "{BookPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $outhandle = $self->{'outhandle'};
     print STDERR "<Processing n='$file' p='BookPlug'>\n" if ($gli);
     print $outhandle "BookPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='BookPlugin'>\n" if ($gli);
+    print $outhandle "BookPlugin: processing $file\n"
     if $self->{'verbosity'} > 1;
 …
     if ($imagetype eq "jpg") {$imagetype = "jpeg";}
     if ($imagetype !~ /^(jpeg|gif|png)$/) {
     print $outhandle "BookPlug: Warning - unknown image type ($imagetype)\n";
+    print $outhandle "BookPlugin: Warning - unknown image type ($imagetype)\n";
+    }
     my ($imagefile) = $link =~ /([^\/]*)$/;
 …
         $foundimage = 1;
     } else {
         $error = "BookPlug: Warning - couldn't find image file $imagefile in either $filename or";
+        $error = "BookPlugin: Warning - couldn't find image file $imagefile in either $filename or";
+    }
+    }
 …
         print $outhandle "$error $filename\n";
     } else {
         print $outhandle "BookPlug: Warning - couldn't find image file $imagefile in $filename\n";
+        print $outhandle "BookPlugin: Warning - couldn't find image file $imagefile in $filename\n";
+    }
+    }

gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # CONTENTdmPlug.pm -- reasonably with-it pdf plugin
+# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
+#
 ###########################################################################
+package CONTENTdmPlug;
+use ConvertToPlug;
+package CONTENTdmPlugin;
+use ConvertBinaryFile;
+use ReadXMLFile;
 use unicode;
 use ghtml;
 …
 use XMLParser;
+# inherit ReadXMLFile for the apply_xslt method
 sub BEGIN {
     @CONTENTdmPlug::ISA = ('ConvertToPlug');
+    @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
+}
 …
 my $convert_to_list =
     [ { 'name' => "auto",
     'desc' => "{ConvertToPlug.convert_to.auto}" },
+    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
       { 'name' => "html",
     'desc' => "{ConvertToPlug.convert_to.html}" },
+    'desc' => "{ConvertBinaryFile.convert_to.html}" },
       { 'name' => "text",
     'desc' => "{ConvertToPlug.convert_to.text}" },
+    'desc' => "{ConvertBinaryFile.convert_to.text}" },
       { 'name' => "pagedimg",
     'desc' => "{ConvertToPlug.convert_to.pagedimg}"},
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
       ];
 …
+      [
        { 'name' => "convert_to",
     'desc' => "{ConvertToPlug.convert_to}",
+    'desc' => "{ConvertBinaryFile.convert_to}",
     'type' => "enum",
     'reqd' => "yes",
 …
     'deft' => "html" },
       { 'name' => "xslt",
     'desc' => "{XMLPlug.xslt}",
+    'desc' => "{ReadXMLFile.xslt}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" },
        { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'deft' => &get_default_block_exp() }
 ];
 my $options = { 'name'     => "CONTENTdmPlug",
         'desc'     => "{CONTENTdmPlug.desc}",
+my $options = { 'name'     => "CONTENTdmPlugin",
+        'desc'     => "{CONTENTdmPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
         # CONTENTdmPlug is one of the few ConvertToPlug subclasses whose source doc can't be replaced by a GS-generated html
+        # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
         'srcreplaceable' => "no",
         'args'     => $arguments };
-our ($self);
 sub new {
 …
     push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my @arg_array = @$inputargs;
     $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
+    my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
     if ($self->{'info_only'}) {
 …
     my $parser = new XML::Parser('Style' => 'Stream',
+                 'Pkg' => 'CONTENTdmPlugin',
+                 'PluginObj' => $self,
                  'Handlers' => {'Char' => \&Char,
                         'XMLDecl' => \&XMLDecl,
 …
     $self->{'metadata_value'} = undef;
     $self->{'convert_to'} = "PagedImg";
+    $self->{'convert_to'} = "PagedImage";
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
+    if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
+    $secondary_plugin_options->{'PagedImgPlug'} = [];
+    my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
+    push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
+    push(@$pagedimg_options, "-thumbnail", "-screenview");
+    }
+    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
+    $secondary_plugin_options->{'PagedImagePlugin'} = [];
+    }
+    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
+    push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
+    push(@$pagedimg_options, "-thumbnail", "-screenview");
     $self = bless $self, $class;
 …
+}
-# so we don't inherit HTMLPlug's block exp...
 sub get_default_block_exp {
     return q^(?i)\.(jpg|jpeg|gif)$^;
+}
-# A smarter (?) option would be to add XMLPlug into inheritence above
-# thereby avoiding a fair amount of code duplication
-sub apply_xslt
+{
-    my $self = shift @_;
-    my ($xslt,$filename) = @_;
-    my $outhandle = $self->{'outhandle'};
-    my $xslt_filename = $xslt;
-    if (! -e $xslt_filename) {
-    # Look in main site directory
-    my $gsdlhome = $ENV{'GSDLHOME'};
-    $xslt_filename = &util::filename_cat($gsdlhome,$xslt);
+    }
-    if (! -e $xslt_filename) {
-    # Look in collection directory
-    my $coldir = $ENV{'GSDLCOLLECTDIR'};
-    $xslt_filename = &util::filename_cat($coldir,$xslt);
+    }
-    if (! -e $xslt_filename) {
-    print $outhandle "Warning: Unable to find XSLT $xslt\n";
-    if (open(XMLIN,"<$filename")) {
-        my $untransformed_xml = "";
-        while (defined (my $line = <XMLIN>)) {
-        $untransformed_xml .= $line;
+        }
-        close(XMLIN);
-        return $untransformed_xml;
+    }
-    else {
-        print $outhandle "Error: Unable to open file $filename\n";
-        print $outhandle "       $!\n";
-        return "";
+    }
+    }
-    my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");
-    my $jar_filename = &util::filename_cat($bin_java,"xalan.jar");
-    my $xslt_base_cmd = "java -jar $jar_filename";
-    my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\"";
-    my $transformed_xml = "";
-    if (open(XSLT_IN,"$xslt_cmd |")) {
-    while (defined (my $line = <XSLT_IN>)) {
-        $transformed_xml .= $line;
+    }
-    close(XSLT_IN);
+    }
-    else {
-    print $outhandle "Error: Unable to run command $xslt_cmd\n";
-    print $outhandle "       $!\n";
+    }
-    return $transformed_xml;
+}
 …
 # Override ConvertToPlug tmp_area_convert_file() to provide solution specific
+# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
 # to CONTENTdm
+#
 …
     $self->{'converted_to'} = "HTML";
     } elsif ($output_type =~ /te?xt/i) {
     $self->{'converted_to'} = "TEXT";
+    $self->{'converted_to'} = "Text";
     } elsif ($output_type =~ /item/i){
     $self->{'converted_to'} = "PagedImg";
+    $self->{'converted_to'} = "PagedImage";
+    }
 …
 # Override ConvertToPlug read
+# Override ConvertBinaryFile read
 # Needed so multiple .item files generate are sent down secondary plugin
 …
     my ($block_status,$filename) = $self->read_block(@_);
     return $block_status if ((!defined $block_status) || ($block_status==0));
     $file = $self->read_tidy_file($file);
     # read() deviates at this point from ConvertToPlug
+    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
+    # read() deviates at this point from ConvertBinaryFile
     # Need to work with list of filename returned
 …
     my ($filemeta) = $file =~ /([^\\\/]+)$/;
     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
+    $self->set_Source_metadata($doc_obj, $filemeta);
     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
 …
     # add an OID
     $doc_obj->set_OID();
+    $self->add_OID($doc_obj);
     # process the document
     $processor->process($doc_obj);
 …
+}
 sub process {
+    my $self = shift (@_);
+    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+}
+# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
+sub process_old {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
 …
+}
-sub StartDocument {$self->xml_start_document(@_);}
-sub XMLDecl {$self->xml_xmldecl(@_);}
-sub Entity {$self->xml_entity(@_);}
-sub Doctype {$self->xml_doctype(@_);}
-sub StartTag {$self->xml_start_tag(@_);}
-sub EndTag {$self->xml_end_tag(@_);}
-sub Text {$self->xml_text(@_);}
-sub PI {$self->xml_pi(@_);}
-sub EndDocument {$self->xml_end_document(@_);}
-sub Default {$self->xml_default(@_);}
-# This Char function overrides the one in XML::Parser::Stream to overcome a
-# problem where $expat->{Text} is treated as the return value, slowing
-# things down significantly in some cases.
-sub Char {
-    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
-    $_[0]->{'Text'} .= $_[1];
-    return undef;
+}
 # Called at the beginning of the XML document.
 …
+}
-# Called for XML declarations
-sub xml_xmldecl {
-    my $self = shift(@_);
-    my ($expat, $version, $encoding, $standalone) = @_;
+}
-# Called for XML entities
-sub xml_entity {
-  my $self = shift(@_);
-  my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;
+}
 # Called for DOCTYPE declarations - use die to bail out if this doctype
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "CONTENTdmPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+}
 …
+}
-# Called for processing instructions. The $_ variable will contain a copy
-# of the pi.
-sub xml_pi {
-    my $self = shift(@_);
-    my ($expat, $target, $data) = @_;
+}
 # Called at the end of the XML document.
 sub xml_end_document {
 …
+}
-# Called for any characters not handled by the above functions.
-sub xml_default {
-    my $self = shift(@_);
-    my ($expat, $text) = @_;
+}
 ;

gsdl/trunk/perllib/plugins/CSVPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # CSVPlug.pm -- A plugin for files in comma-separated value format
+# CSVPlugin.pm -- A plugin for files in comma-separated value format
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 package CSVPlug;
+package CSVPlugin;
 use SplitPlug;
+use SplitTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 # CSVPlug is a sub-class of SplitPlug.
+# CSVPlugin is a sub-class of SplitTextFile.
 sub BEGIN {
     @CSVPlug::ISA = ('SplitPlug');
+    @CSVPlugin::ISA = ('SplitTextFile');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
 my $options = { 'name'     => "CSVPlug",
         'desc'     => "{CSVPlug.desc}",
+my $options = { 'name'     => "CSVPlugin",
+        'desc'     => "{CSVPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});}
     if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options)};
+    push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}}, $options);
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     open(FILE, $filename);
     my $reader = new multiread();
     $reader->set_handle('CSVPlug::FILE');
+    $reader->set_handle('CSVPlugin::FILE');
     $reader->set_encoding($encoding);
     $reader->read_file($textref);
 …
     # Report that we're processing the file
     print STDERR "\n<Processing n='$file' p='CSVPlug'>\n" if ($gli);
     print $outhandle "CSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
+    print STDERR "\n<Processing n='$file' p='CSVPlugin'>\n" if ($gli);
+    print $outhandle "CSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
     # Add the raw line as the document text

gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ConvertToRogPlug.pm -- plugin that inherits from RogPlug
+# ConvertToRogPlugin.pm -- plugin that inherits from RogPlugin
+#
 # A component of the Greenstone digital library software
 …
+package ConvertToRogPlug;
+use BasPlug;
+use RogPlug;
+package ConvertToRogPlugin;
+use RogPlugin;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @ConvertToRogPlug::ISA = ('RogPlug');
+    @ConvertToRogPlugin::ISA = ('RogPlugin');
+}
 my $arguments = [
          ];
 my $options = { 'name'     => "ConvertToRogPlug",
         'desc'     => "{ConvertToRogPlug.desc}",
+my $options = { 'name'     => "ConvertToRogPlugin",
+        'desc'     => "{ConvertToRogPlugin.desc}",
         'abstract' => "yes",
         'inherits' => "yes" };
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new RogPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new RogPlugin($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'convert_to'} = "Rog";
 …
 # Exact copy of read_rog_record from RogPlug
+# Exact copy of read_rog_record from RogPlugin
 # Needed for FILE in right scope
 …
+}
 # Override RogPlug function so rog files are stored as sections (not docs)
+# Override RogPlugin function so rog files are stored as sections (not docs)
 sub process_rog_record
 …
 # Override BasPlug read
+# Override BasePlugin read
 # We don't want to get language encoding stuff until after we've converted
 # our file to Rog format
 …
     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
     my ($filemeta) = $file =~ /([^\\\/]+)$/;
+    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
+    $self->set_Source_metadata($doc_obj, $filemeta);
     if ($self->{'cover_image'}) {
     $self->associate_cover_image($doc_obj, $filename);
 …
     my $ret_val = 1;
 #   $ret_val = &RogPlug::process($self, $textref, $pluginfo,
+#   $ret_val = &RogPlugin::process($self, $textref, $pluginfo,
 #                $tmp_dirname, $tmp_tailname,
 #                $metadata, $doc_obj);

gsdl/trunk/perllib/plugins/DBPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # DBPlug.pm -- plugin to import records from a database
+# DBPlugin.pm -- plugin to import records from a database
+#
 # A component of the Greenstone digital library software
 …
 # Mar, Apr 2003
 package DBPlug;
+package DBPlugin;
 use strict;
 no strict 'refs'; # allow variable as a filehandle
 use BasPlug;
+use AutoExtractMetadata;
 use unicode;
-#use DBI; # database independent stuff
 sub BEGIN {
     @DBPlug::ISA = ('BasPlug');
+    @DBPlugin::ISA = ('AutoExtractMetadata');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{AutoExtractMetadata.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" }];
 my $options = { 'name'     => "DBPlug",
         'desc'     => "{DBPlug.desc}",
+my $options = { 'name'     => "DBPlugin",
+        'desc'     => "{DBPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     return q^(?i)\.dbi$^;
+}
-# we don't have a per-greenstone document process() function!
-sub process {
+}
 …
     my $verbosity = $self->{'verbosity'};
     print $outhandle "DBPlug: processing $file\n"
+    print $outhandle "DBPlugin: processing $file\n"
     if $self->{'verbosity'} > 1;
 …
     my $db=undef;
+# get id of pages from "nonempty", get latest version number from "recent", and
 # then get pagename from "page" and content from "version" !
+    # get id of pages from "nonempty", get latest version number from
+    # "recent", and then get pagename from "page" and content from "version" !
     my $sql_query_prime = undef ;
 …
     # read in config file.
     if (!open (CONF, $filename)) {
         print $outhandle "DBPlug: can't read $filename: $!\n";
+        print $outhandle "DBPlugin: can't read $filename: $!\n";
         return 0;
+    }
 …
         $callback =~ /[\`]|\|\-/) {
         # no backticks or functions that start new processes allowed
         print $outhandle "DBPlug: bad function in callback\n";
+        print $outhandle "DBPlugin: bad function in callback\n";
         return 0;
+        }
 …
         my $ret = eval "\$callbacks{'$fieldname'} = $callback ; 1";
         if (!defined($ret)) {
         print $outhandle "DBPlug: error eval'ing callback: $@\n";
+        print $outhandle "DBPlugin: error eval'ing callback: $@\n";
         exit(1);
+        }
         $callback="";
         print $outhandle "DBPlug: callback registered for '$fieldname'\n"
+        print $outhandle "DBPlugin: callback registered for '$fieldname'\n"
             if $dbplug_debug;
     } elsif ($callback) {
 …
             chomp $err;
             $err =~ s/\.$//; # remove a trailing .
             print $outhandle "DBPlug: error evaluating `$statement'\n";
+            print $outhandle "DBPlugin: error evaluating `$statement'\n";
             print $outhandle " $err (in $filename)\n";
             return 0; # there was an error reading the config file
 …
         $statement = "";
         } else {
         print $outhandle "DBPlug: skipping statement `$statement'\n";
+        print $outhandle "DBPlugin: skipping statement `$statement'\n";
+        }
         $statement = "";
 …
     if (!defined($db)) {
     print $outhandle "DBPlug: error: $filename does not specify a db!\n";
+    print $outhandle "DBPlugin: error: $filename does not specify a db!\n";
     return 0;
+    }
     if (!defined($sql_query)) {
         print $outhandle "DBPlug: error: no SQL query specified!\n";
+        print $outhandle "DBPlugin: error: no SQL query specified!\n";
     return 0;
+    }
 …
     if (!defined($dbhandle)) {
     die "DBPlug: could not connect to database, exiting.\n";
+    die "DBPlugin: could not connect to database, exiting.\n";
+    }
     if (defined($dbplug_debug) && $dbplug_debug==1) {
     print $outhandle "DBPlug (debug): connected ok\n";
+    print $outhandle "DBPlugin (debug): connected ok\n";
+    }
 …
     if (defined($db_to_greenstone_fields{$fieldname})) {
         if (defined($dbplug_debug) && $dbplug_debug==1) {
         print $outhandle "DBPlug (debug): mapping db field "
+        print $outhandle "DBPlugin (debug): mapping db field "
             . "'$fieldname' to "
             . $db_to_greenstone_fields{$fieldname} . "\n";
 …
     while (scalar(@row_array)) {
     if (defined($dbplug_debug) && $dbplug_debug==1) {
         print $outhandle "DBPlug (debug): retrieved a row from query\n";
+        print $outhandle "DBPlugin (debug): retrieved a row from query\n";
+    }
 …
     my $cursection = $doc_obj->get_top_section();
     # if $language not set in config file, will use BasPlug's default
+    # if $language not set in config file, will use BasePlugin's default
     if (defined($language)) {
         $doc_obj->add_utf8_metadata($cursection, "Language", $language);
+    }
     # if $encoding not set in config file, will use BasPlug's default
+    # if $encoding not set in config file, will use BasePlugin's default
     if (defined($encoding)) {
         # allow some common aliases
 …
         $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding);
+    }
     $doc_obj->add_utf8_metadata($cursection,
+                    "Source", &ghtml::dmsafe($db));
+    $self->set_Source_metadata($doc_obj, $db, $encoding);
     if ($self->{'cover_image'}) {
         $self->associate_cover_image($doc_obj, $filename);
 …
     # check "$sth->err" if empty array for error
     if ($statement_hand->err) {
     print $outhandle "DBPlug: received error: \"" .
+    print $outhandle "DBPlugin: received error: \"" .
         $statement_hand->errstr . "\"\n";
+    }
 …
     if (defined($dbplug_debug) && $dbplug_debug==1) {
         print $outhandle "DBPlug: imported $count DB records as documents.\n";
+        print $outhandle "DBPlugin: imported $count DB records as documents.\n";
+    }
     $count;

gsdl/trunk/perllib/plugins/DSpacePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # DSpacePlug.pm -- plugin for import the collection from DSpace
+# DSpacePlugin.pm -- plugin for importing a collection from DSpace
+#
 # A component of the Greenstone digital library software
 …
 # University of Waikato, New Zealand.
+#
 # Copyright (C) 1999 New Zealand Digital Library Project
+# Copyright (C) 2004 New Zealand Digital Library Project
+#
 # This program is free software; you can redistribute it and/or modify
 …
 ###########################################################################
+# DSpace Plug - 10/2004
+#
+#
 # This plugin takes "contents" and dublin_core.xml file, which contain
 # Metadata and lists of associated files for a particular document
 …
+#
 package DSpacePlug;
 use BasPlug;
+package DSpacePlugin;
+use BasePlugin;
 use plugin;
-#use ghtml;
 use XMLParser;
 use strict;
 …
 sub BEGIN {
     @DSpacePlug::ISA = ('BasPlug');
+    @DSpacePlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "string",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "only_first_doc",
     'desc' => "{DSpacePlug.only_first_doc}",
+    'desc' => "{DSpacePlugin.only_first_doc}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "first_inorder_ext",
     'desc' => "{DSpacePlug.first_inorder_ext}",
+    'desc' => "{DSpacePlugin.first_inorder_ext}",
     'type' => "string",
     'reqd' => "no" },
       { 'name' => "first_inorder_mime",
     'desc' => "{DSpacePlug.first_inorder_mime}",
+    'desc' => "{DSpacePlugin.first_inorder_mime}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'deft' => &get_default_block_exp(),
 …
 my $options = { 'name'     => "DSpacePlug",
         'desc'     => "{DSpacePlug.desc}",
+my $options = { 'name'     => "DSpacePlugin",
+        'desc'     => "{DSpacePlugin.desc}",
         'inherits' => "yes",
         'abstract' => "no",
 …
     push(@$pluginlist, $class);
+    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
+    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
+    if ($self->{'info_only'}) {
+    # don't worry about creating the XML parser as all we want is the
+    # list of plugin options
+    return bless $self, $class;
+    }
     #create XML::Parser object for parsing dublin_core.xml files
     my $parser = new XML::Parser('Style' => 'Stream',
 …
+    }
     print $outhandle "DSpacePlug: extracting metadata from $file\n"
+    print $outhandle "DSpacePlugin: extracting metadata from $file\n"
     if $self->{'verbosity'} > 1;
 …
     if ($@) {
     die "DSpacePlug: ERROR $filename is not a well formed dublin_core.xml file ($@)\n";
+    die "DSpacePlugin: ERROR $filename is not a well formed dublin_core.xml file ($@)\n";
+    }
 …
     # Temporarily store associate file info in metadata table
     # This will be removed in 'extra_metadata' in BasPlug and used
+    # This will be removed in 'extra_metadata' in BasePlugin and used
     # to perform the actual file association (once the doc obj has
     # been formed
 …
+# The DSpacePlug read() function. This function does all the right things
+# to make general options work for a given plugin. It calls the process()
+# function which does all the work specific to a plugin (like the old
+# read functions used to do). Most plugins should define their own
+# process() function and let this read() function keep control.
+#
+# DSpace overrides read() because there is no need to read the actual
+# text of the file in, because the contents of the file is not text...
+#
+# Return number of files processed, undef if can't process
+# Note that $base_dir might be "" and that $file might
+# include directories
+# The DSpacePlugin read() function. We are not actually reading any documents
+# here, just blocking ones that have been processed by metadata read.
+#
+# Returns 0 for a file its blocking, undef for any other
 sub read {
     my $self = shift (@_);
 …
     return 0 if (defined $self->{'extra_blocks'}->{$filename});
     return undef;
+}
-# do plugin specific processing of doc_obj
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
-    my $outhandle = $self->{'outhandle'};
-    return 1;
+}

gsdl/trunk/perllib/plugins/EmailPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # EMAILPlug.pm - a plugin for parsing email files
+# EmailPlugin.pm - a plugin for parsing email files
+#
 # A component of the Greenstone digital library software
 …
 # EMAILPlug
+# EmailPlugin
+#
 # by Gordon Paynter ([email protected])
 …
 # 12/05/02 Added usage datastructure - John Thompson
 package EMAILPlug;
+package EmailPlugin;
 use strict;
 …
 use SplitPlug;
+use SplitTextFile;
 use unicode;  # gs conv functions
 use gsprintf 'gsprintf'; # translations
 …
 sub BEGIN {
     @EMAILPlug::ISA = ('SplitPlug');
+    @EmailPlugin::ISA = ('SplitTextFile');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "no_attachments",
     'desc' => "{EMAILPlug.no_attachments}",
+    'desc' => "{EmailPlugin.no_attachments}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "headers",
     'desc' => "{EMAILPlug.headers}",
+    'desc' => "{EmailPlugin.headers}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "split_exp",
     'desc' => "{EMAILPlug.split_exp}",
+    'desc' => "{EmailPlugin.split_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "EMAILPlug",
         'desc'     => "{EMAILPlug.desc}",
+my $options = { 'name'     => "EmailPlugin",
+        'desc'     => "{EmailPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
         'args'     => $arguments };
 # Create a new EMAILPlug object with which to parse a file.
 # Accomplished by creating a new BasPlug and using bless to
 # turn it into an EMAILPlug.
+# Create a new EmailPlugin object with which to parse a file.
+# Accomplished by creating a new BasePlugin and using bless to
+# turn it into an EmailPlugin.
 sub new {
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber
 …
     print STDERR "<Processing n='$file' p='EMAILPlug'>\n" if ($gli);
     gsprintf($outhandle, "EMAILPlug: {common.processing} $file\n")
+    print STDERR "<Processing n='$file' p='EmailPlugin'>\n" if ($gli);
+    gsprintf($outhandle, "EmailPlugin: {common.processing} $file\n")
     if $self->{'verbosity'} > 1;
 …
+        }
     } else {
         print $outhandle "EMAILPlug: (warning) couldn't parse MIME boundary\n";
+        print $outhandle "EmailPlugin: (warning) couldn't parse MIME boundary\n";
+    }
     # parts start with "--$boundary"
 …
     # make sure it is only -- and whitespace
     if ($last !~ /^\-\-\s*$/ms) {
         print $outhandle "EMAILPlug: (warning) last part of MIME message isn't empty\n";
+        print $outhandle "EmailPlugin: (warning) last part of MIME message isn't empty\n";
+    }
     foreach my $message_part (@message_parts) {
 …
         # or it was an empty message...
         # do nothing...
         gsprintf($outhandle, "{BasPlug.empty_file} - empty body?\n");
+        gsprintf($outhandle, "{BasePlugin.empty_file} - empty body?\n");
         } else {
         $text = $part_text;
 …
+        }
         open (SAVE, ">$tmpdir/$save_filename") ||
         warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";
+        warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!";
         my $part_text = $message_part;
         $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header
 …
 #           &util::rm("$tmpdir/$save_filename");
         my $outhandle=$self->{'outhandle'};
         print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; #
+        print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; #
         # be nice if "download" was a translatable macro :(
 …
         # rfc2045 also allows binary, which we ignore (for now).
         my $outhandle=$self->{'outhandle'};
         print $outhandle "EMAILPlug: unknown transfer encoding: $encoding\n";
+        print $outhandle "EmailPlugin: unknown transfer encoding: $encoding\n";
         return "";
+    }
 …
       if ($badbytesfound==1) {
           # claims to be utf8, but it isn't!
           print $outhandle "EMAILPlug: Headers claim utf-8 but bad bytes "
+          print $outhandle "EmailPlugin: Headers claim utf-8 but bad bytes "
           . "detected and removed.\n";
 …
       # 1252 has characters between 0x80 and 0x9f, 8859-1 doesn't
       if ($$textref =~ m/[\x80-\x9f]/) {
       print $outhandle "EMAILPlug: Headers claim ISO charset but MS ";
+      print $outhandle "EmailPlugin: Headers claim ISO charset but MS ";
       print $outhandle "codepage 1252 detected.\n";
       $charset = "windows_1252";
 …
       # characters out here if this causes problems...
       my $outhandle=$self->{'outhandle'};
       print $outhandle "EMAILPlug: falling back to iso-8859-1\n";
+      print $outhandle "EmailPlugin: falling back to iso-8859-1\n";
       $$textref=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1",$textref));

gsdl/trunk/perllib/plugins/ExcelPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ExcelPlug.pm -- plugin for importing Microsoft Excel files.
+# ExcelPlugin.pm -- plugin for importing Microsoft Excel files.
 #  (currently only versions 95 and 97)
+#
 …
 ###########################################################################
 package ExcelPlug;
+package ExcelPlugin;
 use ConvertToPlug;
+use ConvertBinaryFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @ExcelPlug::ISA = ('ConvertToPlug');
+    @ExcelPlugin::ISA = ('ConvertBinaryFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "ExcelPlug",
         'desc'     => "{ExcelPlug.desc}",
+my $options = { 'name'     => "ExcelPlugin",
+        'desc'     => "{ExcelPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    $self->{'filename_extension'} = "xls";
+    $self->{'file_type'} = "Excel";
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
     $secondary_plugin_options->{'HTMLPlug'} = [];
+    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
+    $secondary_plugin_options->{'HTMLPlugin'} = [];
+    }
     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
+    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
-    #$self->{'input_encoding'} = "utf8";
-    #$self->{'extract_language'} = 1;
     push(@$html_options, "-input_encoding", "utf8");
     push(@$html_options,"-extract_language") if $self->{'extract_language'};
 …
+}
 sub convert_post_process
+sub convert_post_process_old
+{
     my $self = shift (@_);
 …
+}
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
-    return $self->process_type("xls",$base_dir,$file,$doc_obj);
+}
 ;

gsdl/trunk/perllib/plugins/FOXPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # FOXPlug.pm
+# FOXPlugin.pm
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # the appropriate fields in the file.
+# 12/05/02 Added usage datastructure - John Thompson
+package FOXPlug;
+use BasPlug;
+package FOXPlugin;
+use BasePlugin;
 use util;
 use doc;
 use unicode;
-use cnseg;
-# use gb;
 use strict;
 …
 sub BEGIN {
     @FOXPlug::ISA = ('BasPlug');
+    @FOXPlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_block_exp() } ];
 my $options = { 'name'     => "FOXPlug",
         'desc'     => "{FOXPlug.desc}",
+my $options = { 'name'     => "FOXPlugin",
+        'desc'     => "{FOXPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     return $block_status if ((!defined $block_status) || ($block_status==0));
     print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli);
     print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli);
+    print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1;
     my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i;
 …
         print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n";
+    }
     print STDERR "FOXPlug::read - couldn't read $fullname\n";
+    print STDERR "FOXPlugin::read - couldn't read $fullname\n";
     return -1; # error in processing
+    }
 …
         print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n";
+    }
     print STDERR "FOXPlug::read - eof while reading database header\n";
+    print STDERR "FOXPlugin::read - eof while reading database header\n";
     close (FOXBASEIN);
     return -1;
 …
         print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n";
+    }
     print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n";
+    print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n";
     return -1;
+    }
 …
         print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n";
+    }
     print STDERR "FOXPlug::read - couldn't read $dbtfullname\n";
+    print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n";
     close (FOXBASEIN);
     return -1;

gsdl/trunk/perllib/plugins/FavouritesPlugin.pm

-              r15865
+              r15872
 # especially SRCPlug by John McPherson Nov 2000
 package FavouritesPlug;
+package FavouritesPlugin;
 use BasPlug;
+use ReadTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @FavouritesPlug::ISA = ('BasPlug');
+    @FavouritesPlugin::ISA = ('ReadTextFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{ReadTextFile.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ];
 my $options = { 'name'     => "FavouritesPlug",
         'desc'     => "FavouritesPlug imports Internet Explorer style Favourites. Favourites are often found in the \"C:\\Documents and Settings\\[your username]\\Favorites\" folder on your computer, but can also be made by dragging a bookmark or location from your browser (any) to the desktop.",
+my $options = { 'name'     => "FavouritesPlugin",
+        'desc'     => "{FavouritesPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $section = $doc_obj->get_top_section();
     print STDERR "<Processing n='$file' p='FavouritesPlug'>\n" if ($gli);
     print $outhandle "FavouritesPlug: processing $file\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$file' p='FavouritesPlugin'>\n" if ($gli);
+    print $outhandle "FavouritesPlugin: processing $file\n" if $self->{'verbosity'} > 1;
     # don't want mg to turn escape chars into actual values

gsdl/trunk/perllib/plugins/GAPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # GAPlug.pm
+# GAPlugin.pm
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # to their DTD.
 package GAPlug;
 use XMLPlug;
+package GAPlugin;
+use ReadXMLFile;
 use strict;
 …
 sub BEGIN {
     @GAPlug::ISA = ('XMLPlug');
+    @GAPlugin::ISA = ('ReadXMLFile');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ];
 my $options = { 'name'     => "GAPlug",
         'desc'     => "{GAPlug.desc}",
+my $options = { 'name'     => "GAPlugin",
+        'desc'     => "{GAPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'section'} = "";
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'};
+    print $outhandle "GAPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$self->{'file'}' p='GAPlugin'>\n" if $self->{'gli'};
+}

gsdl/trunk/perllib/plugins/GISExtractor.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # GISBasPlug.pm -- base class to enhance plugins with GIS capabilities
+# GISExtractor.pm -- extension base class to enhance plugins with GIS capabilities
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 ###########################################################################
+package GISBasPlug;
+package GISExtractor;
+use PrintInfo;
 use util;
-use locale;
 use gsprintf 'gsprintf';
 …
 no strict 'refs'; # allow filehandles to be variables and viceversa
 no strict 'subs';
 #field categories in DataBase files
 #$LAT = 3;
 …
 BEGIN {
+    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
+}
+use BasPlug; # uses BasPlug, but is not inherited
+my $options = { 'name'     => "GISBasPlug",
+        'desc'     => "{GISBasPlug.desc}",
+    @GISExtractor::ISA = ('PrintInfo');
+}
+my $arguments =
+    [ { 'name' => "extract_placenames",
+    'desc' => "{GISExtractor.extract_placenames}",
+    'type' => "flag",
+    'reqd' => "no" },
+      { 'name' => "gazetteer",
+    'desc' => "{GISExtractor.gazetteer}",
+    'type' => "string",
+    'reqd' => "no" },
+      { 'name' => "place_list",
+    'desc' => "{GISExtractor.place_list}",
+    'type' => "flag",
+    'reqd' => "no" } ];
+my $options = { 'name'     => "GISExtractor",
+        'desc'     => "{GISExtractor.desc}",
         'abstract' => "yes",
+        'inherits' => "no" };
+        'inherits' => "yes",
+        'args' => $arguments };
 sub new {
+    my $class = shift (@_);
+    my $plugin_name = shift (@_);
+    my $self = {};
+    $self->{'plugin_type'} = "GISBasPlug";
+    $self->{'option_list'} = [ $options ];
+    my ($class) = shift (@_);
+    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
+    push(@$pluginlist, $class);
+    # can we indicate that these are not available if the map data is not there??
+    #if (has_mapdata()) {
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    #}
+    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists);
+    if ($self->{'extract_placenames'}) {
+    my $outhandle = $self->{'outhandle'};
+    my $places_ref
+        = $self->loadGISDatabase($outhandle,$self->{'gazetteer'});
+    if (!defined $places_ref) {
+        print $outhandle "Warning: Error loading mapdata gazetteer \"$self->{'gazetteer'}\"\n";
+        print $outhandle "         No placename extraction will take place.\n";
+        $self->{'extract_placenames'} = undef;
+    }
+    else {
+        $self->{'places'} = $places_ref;
+    }
+    }
     return bless $self, $class;
+}
+sub init {
+}
+sub print_xml_usage
+}
+sub extract_gis_metadata
+{
+    BasPlug::print_xml_usage(@_);
+}
+sub print_xml
+{
+    BasPlug::print_xml(@_);
+}
+sub print_txt_usage
+{
+   BasPlug::print_txt_usage(@_);
+}
+sub determine_description_offset
+{
+    BasPlug::determine_description_offset(@_);
+}
+sub print_plugin_usage
+{
+    my $plugindesc = $options->{'desc'};
+    if (defined($plugindesc)) {
+    gsprintf(STDERR, "$plugindesc\n\n");
+    }
+}
+sub set_incremental
+{
+    BasPlug::set_incremental(@_);
+    my $self = shift (@_);
+    my ($doc_obj) = @_;
+    if ($self->{'extract_placenames'}) {
+    my $thissection = $doc_obj->get_top_section();
+    while (defined $thissection) {
+        my $text = $doc_obj->get_text($thissection);
+        $self->extract_placenames (\$text, $doc_obj, $thissection) if $text =~ /./;
+        $thissection = $doc_obj->get_next_section ($thissection);
+    }
+    }
+}
 …
     $doc_obj->associate_file($tempfile, "places.txt", "text/plain");
     $self->{'places_filename'} = $tempfile;
     my %countries = ();
 …
     #this line removes apostrophes from placenames (they break the javascript function)
     $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g;
+    $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; #' (to get emacs colours back)
     #for displaying map of document, count num of places from each country
 …
     if ($self->{'verbosity'} > 2);
+}
+sub clean_up_temp_files {
+    my $self = shift(@_);
+    if(defined($self->{'places_filename'}) && -e $self->{'places_filename'}){
+    &util::rm($self->{'places_filename'});
+    }
+    $self->{'places_filename'} = undef;
+}

gsdl/trunk/perllib/plugins/GMLPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # GMLPlug.pm --
+# GMLPlugin.pm --
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # 12/05/02 Added usage datastructure - John Thompson
 package GMLPlug;
 use BasPlug;
+package GMLPlugin;
+use BasePlugin;
 use util;
 use doc;
 …
 sub BEGIN {
     @GMLPlug::ISA = ('BasPlug');
+    @GMLPlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' =>  &get_default_process_exp() }
     ];
 my $options = { 'name'     => "GMLPlug",
         'desc'     => "{GMLPlug.desc}",
+my $options = { 'name'     => "GMLPlugin",
+        'desc'     => "{GMLPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     print STDERR "<Processing n='$file' p='GMLPlug'>\n" if ($gli);
     print $outhandle "GMLPlug: processing $file\n";
+    print STDERR "<Processing n='$file' p='GMLPlugin'>\n" if ($gli);
+    print $outhandle "GMLPlugin: processing $file\n";
     my $parent_dir = $file;
 …
         print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n";
+    }
     print $outhandle "GMLPlug::read - couldn't read $filename\n";
+    print $outhandle "GMLPlugin::read - couldn't read $filename\n";
     return -1;
+    }
 …
         if ($gml =~ /^\s*([^>]*)>(.*)$/so) {
             $tags = $1 if defined $1;
             $text = &GMLPlug::_unescape_text($2);
+            $text = &GMLPlugin::_unescape_text($2);
         } else {
             print $outhandle "GMLPlug::read - error in file $filename\n";
+            print $outhandle "GMLPlugin::read - error in file $filename\n";
             print $outhandle "text: \"$gml\"\n";
             last;
 …
         # could be stored as either attributes or ....
         while ((defined $tags) && ($tags =~ s/^\s*(\S+)=\"([^\"]*)\"//o)) {
             $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))
+            $doc_obj->add_utf8_metadata($section, $1, &GMLPlugin::_unescape_text($2))
             if (defined $1 and defined $2);
 …
                 $tagname =~ s/^&\#47;/\//;
                 $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlug::_unescape_text($tagvalue));
+                $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlugin::_unescape_text($tagvalue));
+            }
+            }

gsdl/trunk/perllib/plugins/HBPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # HBPlug.pm --
+# HBPlugin.pm --
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # Humanity Library collections
 package HBPlug;
+package HBPlugin;
 use ghtml;
 use BasPlug;
+use BasePlugin;
 use unicode;
 use util;
 …
 sub BEGIN {
+    @HBPlug::ISA = ('BasPlug');
+}
+    @HBPlugin::ISA = ('BasePlugin');
+}
+my $encoding_list =
+    [ { 'name' => "ascii",
+    'desc' => "{ReadTextFile.input_encoding.ascii}" },
+      { 'name' => "iso_8859_1",
+    'desc' => "Latin1 (western languages)" } ];
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
+    'deft' => &get_default_process_exp() }
+    'deft' => &get_default_process_exp() },
+      { 'name' => "input_encoding",
+    'desc' => "{ReadTextFile.input_encoding}",
+    'type' => "enum",
+    'deft' => "iso_8859_1",
+    'list' => $encoding_list,
+    'reqd' => "no" }
       ];
 my $options = { 'name'     => "HBPlug",
         'desc'     => "{HBPlug.desc}",
+my $options = { 'name'     => "HBPlugin",
+        'desc'     => "{HBPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
+}
-sub init {
-    my $self = shift (@_);
-    my ($verbosity, $outhandle) = @_;
-    $self->BasPlug::init($verbosity, $outhandle);
-    $self->{'input_encoding'} = "iso_8859_1";
-    # this plugin only handles ascii encodings
-    if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {
-    die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .
-        $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";
+    }
+}
 # this is included only to prevent warnings being printed out
 # from BasPlug::init. The process_exp is not used by this plugin
+# from BasePlugin::init. The process_exp is not used by this plugin
 sub get_default_process_exp {
     my $self = shift (@_);
 …
     if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) {
         my $font = $1;
         print $outhandle "HBPlug::HB_gettext - warning removed font $font\n"
+        print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n"
         if ($font !~ /^arial$/i);
+    }
 …
+}
-# if input_encoding is ascii we can call add_utf8_metadata
-# directly but if it's iso_8859_1 (the default) we need to call
-# add_metadata so that the ascii2utf8 conversion is done first
-# this should speed things up a little if processing an ascii only
-# document with input_encoding set to ascii
-sub HB_add_metadata {
-    my $self = shift (@_);
-    my ($doc_obj, $cursection, $field, $value) = @_;
-# All text should now be in utf-8
-#    if ($self->{'input_encoding'} eq "ascii") {
-    $doc_obj->add_utf8_metadata ($cursection, $field, $value);
-#    } else {
-#   $doc_obj->add_metadata ($cursection, $field, $value);
-#    }
+}
 # return number of files processed, undef if can't process
 # Note that $base_dir might be "" and that $file might
 …
     return undef unless -e $htmlfile;
     print STDERR "<Processing n='$file' p='HBPlug'>\n" if ($gli);
     print $outhandle "HBPlug: processing $file\n";
+    print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli);
+    print $outhandle "HBPlugin: processing $file\n";
     # read in the file and do basic html cleaning (removing header etc)
 …
     # $metadata->{$field} may be an array reference
     if (ref ($metadata->{$field}) eq "ARRAY") {
         map {
         $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);
+        map {
+        $doc_obj->add_utf8_metadata($cursection, $field, $_);
         } @{$metadata->{$field}};
     } else {
         $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});
+        $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field});
+    }
+    }
 …
         # add the metadata to this section
         $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title);
+        $doc_obj->add_utf8_metadata($cursection, "Title", $title);
         # clean up the section html
 …
         # add the text for this section
-# All read text should now be in utf-8
-#       if ($self->{'input_encoding'} eq "ascii") {
         $doc_obj->add_utf8_text ($cursection, $sectiontext);
-#       } else {
-#       $doc_obj->add_text ($cursection, $sectiontext);
-#       }
     } else {
         print $outhandle "WARNING - leftover text\n" , $self->shorten($html),

gsdl/trunk/perllib/plugins/HTMLPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # HTMLPlug.pm -- basic html plugin
+# HTMLPlugin.pm -- basic html plugin
+#
 # A component of the Greenstone digital library software
 …
+#
+package HTMLPlug;
+use BasPlug;
+package HTMLPlugin;
+use ReadTextFile;
+use HBPlugin;
 use ghtml;
 use unicode;
 …
 sub BEGIN {
     @HTMLPlug::ISA = ('BasPlug');
+    @HTMLPlugin::ISA = ('ReadTextFile', 'HBPlugin');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' =>  &get_default_process_exp() },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => 'regexp',
     'deft' =>  &get_default_block_exp() },
       { 'name' => "nolinks",
     'desc' => "{HTMLPlug.nolinks}",
+    'desc' => "{HTMLPlugin.nolinks}",
     'type' => "flag" },
       { 'name' => "keep_head",
     'desc' => "{HTMLPlug.keep_head}",
+    'desc' => "{HTMLPlugin.keep_head}",
     'type' => "flag" },
       { 'name' => "no_metadata",
     'desc' => "{HTMLPlug.no_metadata}",
+    'desc' => "{HTMLPlugin.no_metadata}",
     'type' => "flag" },
       { 'name' => "metadata_fields",
     'desc' => "{HTMLPlug.metadata_fields}",
+    'desc' => "{HTMLPlugin.metadata_fields}",
     'type' => "string",
     'deft' => "Title" },
       { 'name' => "hunt_creator_metadata",
     'desc' => "{HTMLPlug.hunt_creator_metadata}",
+    'desc' => "{HTMLPlugin.hunt_creator_metadata}",
     'type' => "flag" },
       { 'name' => "file_is_url",
     'desc' => "{HTMLPlug.file_is_url}",
+    'desc' => "{HTMLPlugin.file_is_url}",
     'type' => "flag" },
       { 'name' => "assoc_files",
     'desc' => "{HTMLPlug.assoc_files}",
+    'desc' => "{HTMLPlugin.assoc_files}",
     'type' => "regexp",
     'deft' => &get_default_block_exp() },
       { 'name' => "rename_assoc_files",
     'desc' => "{HTMLPlug.rename_assoc_files}",
+    'desc' => "{HTMLPlugin.rename_assoc_files}",
     'type' => "flag" },
       { 'name' => "title_sub",
     'desc' => "{HTMLPlug.title_sub}",
+    'desc' => "{HTMLPlugin.title_sub}",
     'type' => "string",
     'deft' => "" },
       { 'name' => "description_tags",
     'desc' => "{HTMLPlug.description_tags}",
+    'desc' => "{HTMLPlugin.description_tags}",
     'type' => "flag" },
       # retain this for backward compatibility (w3mir option was replaced by
       # file_is_url)
       { 'name' => "w3mir",
 #   'desc' => "{HTMLPlug.w3mir}",
+#   'desc' => "{HTMLPlugin.w3mir}",
     'type' => "flag",
     'hiddengli' => "yes"},
       { 'name' => "no_strip_metadata_html",
     'desc' => "{HTMLPlug.no_strip_metadata_html}",
+    'desc' => "{HTMLPlugin.no_strip_metadata_html}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no"},
       { 'name' => "sectionalise_using_h_tags",
     'desc' => "{HTMLPlug.sectionalise_using_h_tags}",
+    'desc' => "{HTMLPlugin.sectionalise_using_h_tags}",
     'type' => "flag" },
       { 'name' => "use_realistic_book",
         'desc' => "{HTMLPlug.tidy_html}",
+        'desc' => "{HTMLPlugin.tidy_html}",
     'type' => "flag"},
+      { 'name' => "is_old_HDL_tags",
+        'desc' => "{HTMLPlug.old_style_HDL}",
+    'type' => "flag"},
+      { 'name' => "no_image_links",            # in future think about removing this option,
+        'desc' => "{HTMLPlug.no_image_links}", # since it has become the default behaviour
+    'type' => "flag"},
+      { 'name' => "old_style_HDL",
+        'desc' => "{HTMLPlugin.old_style_HDL}",
+    'type' => "flag"}
       ];
 my $options = { 'name'     => "HTMLPlug",
         'desc'     => "{HTMLPlug.desc}",
+my $options = { 'name'     => "HTMLPlugin",
+        'desc'     => "{HTMLPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     if (($self->{'tidy_html'}) || ($self->{'old_style_HDL'}))
+    {
+        # because the document has to be sectionalized set the description tags
+        $self->{'description_tags'} = 1;
+        # set the file to be tidied
+            $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/;
+            # get the tidied file
+            #my $tidy_filename = $self->tmp_tidy_file($input_filename);
+        my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename);
+            # derive tmp filename from input filename
+            my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$");
+    # because the document has to be sectionalized set the description tags
+    $self->{'description_tags'} = 1;
+        # set the new input file and base_dir to be from the tidied file
+        $file = "$tailname$suffix";
+        $base_dir = $dirname;
+    # set the file to be tidied
+    $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/;
+    # get the tidied file
+    #my $tidy_filename = $self->tmp_tidy_file($input_filename);
+    my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename);
+    # derive tmp filename from input filename
+    my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$");
+    # set the new input file and base_dir to be from the tidied file
+    $file = "$tailname$suffix";
+    $base_dir = $dirname;
+    }
     # call the parent read_into_doc_obj
     my ($process_status,$doc_obj) = &BasPlug::read_into_doc_obj($self,$pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
+    my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     return ($process_status,$doc_obj);
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
+    my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists);
     if ($self->{'w3mir'}) {
 …
     my $outhandle = $self->{'outhandle'};
     print STDERR "<Processing n='$file' p='HTMLPlug'>\n" if ($gli);
     print $outhandle "HTMLPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='HTMLPlugin'>\n" if ($gli);
+    print $outhandle "HTMLPlugin: processing $file\n"
     if $self->{'verbosity'} > 1;
 …
     # URL metadata (even invalid ones) are used to support internal
     # links, so even if 'file_is_url' is off, still need to store info
     $file = &BasPlug::filename_to_metadata($self, $file); # ensures filename is in UTF8 character encoding
     my $web_url = "http://$file";
     $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); # will eventually ensure it is utf8 anyway
+    my $utf8_file = $self->filename_to_utf8_metadata($file);
+    my $web_url = "http://$utf8_file";
+    $doc_obj->add_utf8_metadata($cursection, "URL", $web_url);
     if ($self->{'file_is_url'}) {
 …
+    }
     if ($cursection ne "") {
         print $outhandle "HTMLPlug: WARNING: $file contains unmatched <Section></Section> tags\n";
+        print $outhandle "HTMLPlugin: WARNING: $file contains unmatched <Section></Section> tags\n";
+    }
 …
         if (!$found_something) {
         if ($self->{'verbosity'} > 2) {
             print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags so\n";
+            print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags so\n";
             print $outhandle "          will be processed as a single section document\n";
+        }
 …
         } else {
         print $outhandle "HTMLPlug: WARNING: $file contains the following text outside\n";
+        print $outhandle "HTMLPlugin: WARNING: $file contains the following text outside\n";
         print $outhandle "          of the final closing </Section> tag. This text will\n";
         print $outhandle "          be ignored.";
 …
         # been processed already but we should print the warning
         # as above and extract metadata
         print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags and\n";
+        print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags and\n";
         print $outhandle "          is blank or empty.  Metadata will be assigned if present.\n";
+        }
 …
     # trap images
     # Previously, by default, HTMLPlug would embed <img> tags inside anchor tags
+    # Previously, by default, HTMLPlugin would embed <img> tags inside anchor tags
     # i.e. <a href="image><img src="image"></a> in order to overcome a problem that
     # turned regular text succeeding images into links. That is, by embedding <imgs>
 …
     # If at any time, there is a need for having images embedded in <a> anchor tags,
     # then it might be better to turn that into an HTMLPlug option rather than make
+    # then it might be better to turn that into an HTMLPlugin option rather than make
     # it the default behaviour. Also, eventually, no_image_links needs to become
     # a deprecated option for HTMLPlug as it has now become the default behaviour.
+    # a deprecated option for HTMLPlugin as it has now become the default behaviour.
     #if(!$self->{'no_image_links'}){
     $$textref =~ s/(<(?:img|embed|table|tr|td)[^>]*?(?:src|background)\s*=\s*)([\"][^\"]+[\"]|[\'][^\']+[\']|[^\s\/>]+)([^>]*>)/
         $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
+    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge;
     #}
 …
     $back="\"$back";
+    }
     $link =~ s/\n/ /g;
 …
     my ($before_hash, $hash_part) = $link =~ /^([^\#]*)(\#?.*)$/;
     $hash_part = "" if !defined $hash_part;
     if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) {
     my $outhandle = $self->{'outhandle'};
     print $outhandle "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n"
+    print $outhandle "HTMLPlugin: ERROR - badly formatted tag ignored ($link)\n"
         if $self->{'verbosity'};
     return ($link, "", 0);
 …
     if (!defined $tag) {
         print $outhandle "HTMLPlug: can't find NAME in \"$metatag\"\n";
+        print $outhandle "HTMLPlugin: can't find NAME in \"$metatag\"\n";
         next;
+    }
 …
+    }
     if (!defined $value) {
         print $outhandle "HTMLPlug: can't find VALUE in \"$metatag\"\n";
+        print $outhandle "HTMLPlugin: can't find VALUE in \"$metatag\"\n";
         next;
+    }
 …
 # Extend the BasPlug read_file so that strings like &eacute; are
+# Extend read_file so that strings like &eacute; are
 # converted to UTF8 internally.
+#
 …
 sub read_file {
+    my ($self, $filename, $encoding, $language, $textref) = @_;
+    &BasPlug::read_file($self, $filename, $encoding, $language, $textref);
+    my $self = shift(@_);
+    my ($filename, $encoding, $language, $textref) = @_;
+    $self->SUPER::read_file($filename, $encoding, $language, $textref);
     # Convert entities to their UTF8 equivalents

gsdl/trunk/perllib/plugins/ISISPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ISISPlug.pm -- A plugin for CDS/ISIS databases
+# ISISPlugin.pm -- A plugin for CDS/ISIS databases
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 package ISISPlug;
+package ISISPlugin;
 use multiread;
 use SplitPlug;
+use SplitTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 # ISISPlug is a sub-class of SplitPlug.
+# ISISPlugin is a sub-class of SplitTextFile.
 sub BEGIN {
     @ISISPlug::ISA = ('SplitPlug');
+    @ISISPlugin::ISA = ('SplitTextFile');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
     'hiddengli' => "yes" },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       # The interesting options
       { 'name' => "entry_separator",
     'desc' => "{ISISPlug.entry_separator}",
+    'desc' => "{ISISPlugin.entry_separator}",
     'type' => "string",
     'reqd' => "no",
     'deft' => "<br>" },
       { 'name' => "subfield_separator",
     'desc' => "{ISISPlug.subfield_separator}",
+    'desc' => "{ISISPlugin.subfield_separator}",
     'type' => "string",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "ISISPlug",
         'desc'     => "{ISISPlug.desc}",
+my $options = { 'name'     => "ISISPlugin",
+        'desc'     => "{ISISPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
     my $reader = new multiread();
     $reader->set_handle('ISISPlug::FILE');
+    $reader->set_handle('ISISPlugin::FILE');
     $reader->set_encoding($encoding);
     $reader->read_file($textref);
 …
     # Report that we're processing the file
     print STDERR "\n<Processing n='$file' p='ISISPlug'>\n" if ($gli);
+    print STDERR "\n<Processing n='$file' p='ISISPlugin'>\n" if ($gli);
     print $outhandle "IsisPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
 …
     my $fdtfiletext = "";
     my $reader = new multiread();
     $reader->set_handle('ISISPlug::FDT_FILE');
+    $reader->set_handle('ISISPlugin::FDT_FILE');
     $reader->set_encoding($encoding);
     $reader->read_file($fdtfiletext);

gsdl/trunk/perllib/plugins/ImagePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ImagePlug.pm -- simple text plugin
+# ImagePlugin.pm -- simple text plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 ###########################################################################
 package ImagePlug;
+package ImagePlugin;
+use BasPlug;
+use BasePlugin;
+use ImageConverter;
 use strict;
 …
 sub BEGIN {
     @ImagePlug::ISA = ('BasPlug');
+    @ImagePlugin::ISA = ('BasePlugin', 'ImageConverter');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
+      { 'name' => "cache_generated_images",
+    'desc' => "{ImagePlug.cache_generated_image}",
+    'type' => "flag",
+    'reqd' => "no" },
+      { 'name' => "noscaleup",
+    'desc' => "{ImagePlug.noscaleup}",
+    'type' => "flag",
+    'reqd' => "no" },
+      { 'name' => "nothumbnail",
+    'desc' => "{ImagePlug.generatethumbnail}",
+    'type' => "flag",
+    'reqd' => "no" },
+      { 'name' => "thumbnailsize",
+    'desc' => "{ImagePlug.thumbnailsize}",
+    'type' => "int",
+    'deft' => "100",
+    'range' => "1,",
+    'reqd' => "no" },
+      { 'name' => "thumbnailtype",
+    'desc' => "{ImagePlug.thumbnailtype}",
+    'type' => "string",
+    'deft' => "gif",
+    'reqd' => "no" },
+      { 'name' => "noscreenview",
+    'desc' => "{ImagePlug.generatescreenview}",
+    'type' => "flag",
+    'reqd' => "no" },
+      { 'name' => "screenviewsize",
+    'desc' => "{ImagePlug.screenviewsize}",
+    'type' => "int",
+    'deft' => "0",
+    'range' => "1,",
+    'reqd' => "no" },
+      { 'name' => "screenviewtype",
+    'desc' => "{ImagePlug.screenviewtype}",
+    'type' => "string",
+    'deft' => "jpg",
+    'reqd' => "no" },
+      { 'name' => "converttotype",
+    'desc' => "{ImagePlug.converttotype}",
+    'type' => "string",
+    'deft' => "",
+    'reqd' => "no" },
+      { 'name' => "minimumsize",
+    'desc' => "{ImagePlug.minimumsize}",
+    'type' => "int",
+    'deft' => "100",
+    'range' => "1,",
+    'reqd' => "no" } ];
+      ];
 my $options = { 'name'     => "ImagePlug",
         'desc'     => "{ImagePlug.desc}",
+my $options = { 'name'     => "ImagePlugin",
+        'desc'     => "{ImagePlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'tmp_file_paths'} = ();
+    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
+    # Check that ImageMagick is installed and available on the path (except for Windows 95/98)
+    if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) {
+    my $result = `identify 2>&1`;
+    if ($? == -1 || $? == 256) {  # Linux and Windows return different values for "program not found"
+        $self->{'imagemagick_not_installed'} = 1;
+    }
+    }
+    return bless $self, $class;
+}
+    return bless $self, $class;
+sub init {
+    my $self = shift (@_);
+    my ($verbosity, $outhandle, $failhandle) = @_;
+    $self->SUPER::init(@_);
+    $self->ImageConverter::init();
+}
 …
     return;
+}
-# Create the thumbnail and screenview images, and discover the Image's
-# size, width, and height using the convert utility.
-sub generate_images
+{
-    my $self = shift (@_);
-    my $filename = shift (@_);   # filename with full path
-    my $file = shift (@_);       # filename without path
-    my $doc_obj = shift (@_);
-    my $section = $doc_obj->get_top_section();
-    my $verbosity = $self->{'verbosity'};
-    my $outhandle = $self->{'outhandle'};
-    # check the filename is okay
-    return 0 if ($file eq "" || $filename eq "");
-#    Code now extended to quote filenames in 'convert' commnads
-#    Allows spaces in filenames, but note needs spaces to be escaped in URL as well
-#    if ($filename =~ m/ /) {
-#   print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";
-#   return undef;
-#    }
-    my $minimumsize = $self->{'minimumsize'};
-    if (defined $minimumsize && (-s $filename < $minimumsize)) {
-        print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"
-        if ($verbosity > 1);
+    }
-    # Convert the image to a new type (if required).
-    my $converttotype = $self->{'converttotype'};
-    my $originalfilename = "";  # only set if we do a conversion
-    my $type = "unknown";
-    if ($converttotype ne "" && $filename !~ m/$converttotype$/) {
-    $originalfilename = $filename;
-    my $result = $self->convert($originalfilename, $converttotype, "", "");
-    ($filename) = ($result =~ /=>(.*\.$converttotype)/);
-    $type = $converttotype;
-    $file =~ s/\..*$/\.$type/;
+    }
-    # Add the image metadata
-    my $url = $file;
-    ##not know why it is required at the first place, it seems all works fine without it, so I comment it out
-    ##$url =~ s/ /%20/g;
-    my $utf8_filename_meta = $self->filename_to_metadata($url);
-    $doc_obj->add_utf8_metadata ($section, "Image", $utf8_filename_meta);
-    # Also want to set filename as 'Source' metadata to be
-    # consistent with other plugins
-    $doc_obj->add_utf8_metadata ($section, "Source", $utf8_filename_meta);
-    my ($image_type, $image_width, $image_height, $image_size)
-    = &identify($filename, $outhandle, $verbosity);
-    if ($image_type ne " ") {
-    $type = $image_type;
+    }
-    $doc_obj->add_metadata ($section, "FileFormat", $type);
-    $doc_obj->add_metadata ($section, "FileSize",   $image_size);
-    $doc_obj->add_metadata ($section, "ImageType",   $image_type);
-    $doc_obj->add_metadata ($section, "ImageWidth",  $image_width);
-    $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
-    $doc_obj->add_metadata ($section, "ImageSize",   $image_size);
-    $doc_obj->add_metadata ($section, "NoText",    "1");
-    $doc_obj->add_metadata ($section, "srclink",
-                "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
-    $doc_obj->add_metadata ($section, "/srclink", "</a>");
-    $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");
-    # Add the image as an associated file
-    $doc_obj->associate_file($filename,$file,"image/$type",$section);
-    if (!$self->{'nothumbnail'}) {
-    # Make the thumbnail image
-    my $thumbnailsize = $self->{'thumbnailsize'} || 100;
-    my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
-    # Generate the thumbnail with convert
-    my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");
-    my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/);
-    # Add the thumbnail as an associated file ...
-    if (-e "$thumbnailfile") {
-        $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",
-                     "image/$thumbnailtype",$section);
-        $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
-        $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");
-        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
+    }
-    # Extract Thumnail metadata from convert output
-    if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
-        $doc_obj->add_metadata ($section, "ThumbWidth", $1);
-        $doc_obj->add_metadata ($section, "ThumbHeight", $2);
+    }
+    }
-    # Make a screen-sized version of the picture if requested
-    if (!$self->{'noscreenview'}) {
-    # To do: if the actual image smaller than the screenview size,
-    # we should use the original !
-    my $screenviewsize = $self->{'screenviewsize'};
-    my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
-    # make the screenview image
-    my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");
-    my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/);
-    # get screenview dimensions, size and type
-        if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
-        $doc_obj->add_metadata ($section, "ScreenWidth", $1);
-        $doc_obj->add_metadata ($section, "ScreenHeight", $2);
+    }
-    else {
-        $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);
-        $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);
+    }
-    #add the screenview as an associated file ...
-    if (-e "$screenviewfilename") {
-        $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",
-                     "image/$screenviewtype",$section);
-        $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
-        $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");
-        $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
-    } else {
-        print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";
+    }
+    }
-    return $type;
+}
-# Discover the characteristics of an image file with the ImageMagick
-# "identify" command.
-sub identify {
-    my ($image, $outhandle, $verbosity) = @_;
-    # Use the ImageMagick "identify" command to get the file specs
-    my $command = "identify \"$image\" 2>&1";
-    print $outhandle "$command\n" if ($verbosity > 2);
-    my $result = '';
-    $result = `$command`;
-    print $outhandle "$result\n" if ($verbosity > 3);
-    # Read the type, width, and height
-    my $type =   'unknown';
-    my $width =  'unknown';
-    my $height = 'unknown';
-    my $image_safe = quotemeta $image;
-    if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
-    $type = $1;
-    $width = $2;
-    $height = $3;
+    }
-    # Read the size
-    my $size = "unknown";
-    if ($result =~ m/^.* ([0-9]+)b/) {
-    $size = $1;
+    }
-    elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {
-    $size = 1024 * $1;
-    if (defined($2)) {
-        $size = $size + (1024 * $2);
-        # Truncate size (it isn't going to be very accurate anyway)
-        $size = int($size);
+    }
+    }
-    elsif ($result =~ m/^.* (([0-9]+)(\.([0-9]+))?e\+([0-9]+))(kb|b)?/) {
-    # Deals with file sizes on Linux of type "3.4e+02kb" where e+02 is 1*10^2.
-    # 3.4e+02 therefore evaluates to 3.4 x 1 x 10^2 = 340kb.
-    # Programming languages including Perl know how that 3.4e+02 is a number,
-    # so we don't need to do any calculations.
-    $size = $1*1; # turn the string into a number by multiplying it by 1
-           #if we did $size = $1; $size would be merely the string "3.4e+02"
-    $size = int($size); # truncate size
+    }
-    print $outhandle "file: $image:\t $type, $width, $height, $size\n"
-    if ($verbosity > 2);
-    # Return the specs
-    return ($type, $width, $height, $size);
+}
-sub convert
+{
-    my $self = shift(@_);
-    my $source_file_path = shift(@_);
-    my $target_file_type = shift(@_);
-    my $convert_options = shift(@_) || "";
-    my $convert_type = shift(@_) || "";
-    my $outhandle = $self->{'outhandle'};
-    my $verbosity = $self->{'verbosity'};
-    # Determine the full name and path of the output file
-    my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type;
-    push(@{$self->{'tmp_file_paths'}}, $target_file_path);
-    # Generate and run the convert command
-    my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";
-    print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);
-    my $result = `$convert_command 2>&1`;
-    print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);
-    return $result;
+}
-# The ImagePlug read() function.
-# ImagePlug overrides read() because there is no need to read the actual
-# text of the file in, because the contents of the file is not text...
+#
-# Return number of files processed, undef if can't process
-# Note that $base_dir might be "" and that $file might
-# include directories
-sub read {
-    my $self = shift (@_);
-    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
-    my $outhandle = $self->{'outhandle'};
-    #check process and block exps, smart block, etc
-    my ($block_status,$filename) = $self->read_block(@_);
-    return $block_status if ((!defined $block_status) || ($block_status==0));
-    print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);
-    print $outhandle "ImagePlug processing $file\n"
-        if $self->{'verbosity'} > 1;
-    # None of this works very well on Windows 95/98...
-    if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {
-    if ($gli) {
-        print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";
+    }
-    print $outhandle "ImagePlug: Windows 95/98 not supported\n";
-    return -1;
+    }
-    # None of this is going to work very well without ImageMagick...
-    if ($self->{'imagemagick_not_installed'}) {
-    if ($gli) {
-        print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";
+    }
-    print $outhandle "ImagePlug: ImageMagick not installed\n";
-    return -1;
+    }
-    #if there's a leading directory name, eat it...
-    $file =~ s/^.*[\/\\]//;
-    # create a new document
-    my $doc_obj = new doc ($filename, "indexed_doc");
-    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
-    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
-    #run convert to get the thumbnail and extract size and type info
-    my $result = generate_images($self, $filename, $file, $doc_obj);
-    if (!defined $result)
+    {
-    if ($gli) {
-        print STDERR "<ProcessingError n='$file'>\n";
+    }
-    print $outhandle "ImagePlug: couldn't process \"$filename\"\n";
-    return -1; # error during processing
+    }
-    #create an empty text string so we don't break downstream plugins
-    my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
-    # include any metadata passed in from previous plugins
-    # note that this metadata is associated with the top level section
-    my $section = $doc_obj->get_top_section();
-    $self->extra_metadata ($doc_obj, $section, $metadata);
-    # do plugin specific processing of doc_obj
-    unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
-    print STDERR "<ProcessingError n='$file'>\n" if ($gli);
-    return -1;
+    }
-    # do any automatic metadata extraction
-    $self->auto_extract_metadata ($doc_obj);
-    # if we haven't found any Title so far, assign one
-    # this was shifted to here from inside read()
-    $self->title_fallback($doc_obj,$section,$file);
-    # add an OID
-    $doc_obj->set_OID();
-    $doc_obj->add_utf8_text($section, $text);
-    # process the document
-    $processor->process($doc_obj);
-    # clean up temporary files - we do this here instead of in
-    # generate_images becuase associated files aren't actually copied
-    # until after process has been run.
-    foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})
+    {
-    if (-e $tmp_file_path)
+    {
-        &util::rm($tmp_file_path);
+    }
+    }
-    $self->{'num_processed'}++;
-    return 1;
+}
 # do plugin specific processing of doc_obj
 sub process {
     my $self = shift (@_);
+    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
+    # options??
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     my $outhandle = $self->{'outhandle'};
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    if ($self->check_image_magick()) {
+    $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $doc_obj->get_top_section()); # should we check the return value?
+    } else {
+    # do some basic stuff
+    # associate the image, fileformat, mimetype, srclink, srcicon
+    # do this if image magick not installed. but also if generate hasn't worked?? what about images too small?
+    }
+    #we have no text - adds dummy text and NoText metadata
+    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
+    return 1;
+}
+sub clean_up_after_doc_obj_processing {
+    my $self = shift(@_);
     return 1;
+    $self->ImageConverter::clean_up_temporary_files();
+}

gsdl/trunk/perllib/plugins/IndexPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # IndexPlug.pm --
+# IndexPlugin.pm --
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # named 'Subject'.
+# 12/05/02 Added usage datastructure - John Thompson
+package IndexPlug;
+package IndexPlugin;
 use plugin;
 use BasPlug;
+use BasePlugin;
 use doc;
 use util;
 …
 sub BEGIN {
     @IndexPlug::ISA = ('BasPlug');
+    @IndexPlugin::ISA = ('BasePlugin');
+}
 my $arguments = [
          ];
+#my $arguments = [
+#        ];
 my $options = { 'name'     => "IndexPlug",
         'desc'     => "{IndexPlug.desc}",
+my $options = { 'name'     => "IndexPlugin",
+        'desc'     => "{IndexPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes" };
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     # found an index.txt file
     print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli);
     print $outhandle "IndexPlug: processing $indexfile\n";
+    print STDERR "<Processing n='$file' p='IndexPlugin'>\n" if ($gli);
+    print $outhandle "IndexPlugin: processing $indexfile\n";
     # read in the index.txt

gsdl/trunk/perllib/plugins/LOMPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # LOMPlug.pm -- plugin for import the collection from LOM
+# LOMPlugin.pm -- plugin for import the collection from LOM
+#
 # A component of the Greenstone digital library software
 …
 ### Note this plugin currently can't download source documents from outside if you are behind a firewall.
 package LOMPlug;
 use BasPlug;
+package LOMPlugin;
+use ReadTextFile;
 use MetadataPass;
 use XMLParser;
 …
 sub BEGIN {
     @ISA = ('BasPlug', 'MetadataPass');
+    @ISA = ('ReadTextFile', 'MetadataPass');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{ReadTextFile.process_exp}",
     'type' => "string",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "root_tag",
     'desc' => "{LOMPlug.root_tag}",
+    'desc' => "{LOMPlugin.root_tag}",
     'type' => "regexp",
     'deft' => q/^(?i)lom$/,
     'reqd' => "no" },
       { 'name' => "check_timestamp",
     'desc' => "{LOMPlug.check_timestamp}",
+    'desc' => "{LOMPlugin.check_timestamp}",
     'type' => "flag" },
       { 'name' => "download_srcdocs",
     'desc' => "{LOMPlug.download_srcdocs}",
+    'desc' => "{LOMPlugin.download_srcdocs}",
     'type' => "regexp",
     'deft' => "",
     'reqd' => "no" }];
 my $options = { 'name'     => "LOMPlug",
         'desc'     => "{LOMPlug.desc}",
+my $options = { 'name'     => "LOMPlugin",
+        'desc'     => "{LOMPlugin.desc}",
         'inherits' => "yes",
         'args'     => $arguments };
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
+    if ($self->{'info_only'}) {
+    # don't worry about creating the XML parser as all we want is the
+    # list of plugin options
+    return bless $self, $class;
+    }
     #create XML::Parser object for parsing dublin_core.xml files
 …
     $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     print $outhandle "LOMPlug: extracting metadata from $file\n"
+    print $outhandle "LOMPlugin: extracting metadata from $file\n"
     if $self->{'verbosity'} > 1;
 …
     if ($@) {
     print $outhandle "LOMPlug: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
+    print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
     print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
     return 0;
 …
     my $outhandle = $self->{'outhandle'};
     print STDERR "<Processing n='$file' p='LOMPlug'>\n" if ($gli);
+    print STDERR "<Processing n='$file' p='LOMPlugin'>\n" if ($gli);
     print $outhandle "LOMPLug: processing $file\n";

gsdl/trunk/perllib/plugins/LaTeXPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # LaTeXPlug.pm
+# LaTeXPlugin.pm
+#
 # A component of the Greenstone digital library software
 …
 #  parse/remove tex \if ... macros
 package LaTeXPlug;
+package LaTeXPlugin;
 # System complains about $arguments if the strict is set
 …
 # greenstone packages
 use BasPlug;
+use ReadTextFile;
 use unicode;
 use util;
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{ReadTextFile.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() } ];
 my $options = { 'name'     => 'LaTeXPlug',
         'desc'     => '{LaTeXPlug.desc}',
+my $options = { 'name'     => 'LaTeXPlugin',
+        'desc'     => '{LaTeXPlugin.desc}',
         'abstract' => 'no',
         'inherits' => 'yes',
 …
 sub BEGIN {
+    @LaTeXPlug::ISA = ('BasPlug');
+}
+sub print_usage {
+    print STDERR "\n  usage: plugin LaTeXPlug [options]\n\n";
+    @LaTeXPlugin::ISA = ('ReadTextFile');
+}
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'aux_files'} = {};
 …
     my $outhandle = $self->{'outhandle'};
     if ($gli) {
     print STDERR "<Processing n='$file' p='LaTeXPlug'>\n";
+    print STDERR "<Processing n='$file' p='LaTeXPlugin'>\n";
     } elsif ($self->{'verbosity'} > 1) {
     print $outhandle "LaTeXPlug: processing $file\n"
+    print $outhandle "LaTeXPlugin: processing $file\n"
+    }
     my $cursection = $doc_obj->get_top_section();

gsdl/trunk/perllib/plugins/MARCPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MARCPlug.pm -- basic MARC plugin
+# MARCPlugin.pm -- basic MARC plugin
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 package MARCPlug;
 use SplitPlug;
+package MARCPlugin;
+use SplitTextFile;
 use unicode;
 …
 sub BEGIN {
     @MARCPlug::ISA = ('SplitPlug');
+    @MARCPlugin::ISA = ('SplitTextFile');
     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
+}
 …
 my $arguments =
     [ { 'name' => "metadata_mapping",
     'desc' => "{MARCPlug.metadata_mapping}",
+    'desc' => "{MARCPlugin.metadata_mapping}",
     'type' => "string",
     'deft' => "marctodc.txt",
 …
     'reqd' => "no" },
       { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "MARCPlug",
         'desc'     => "{MARCPlug.desc}",
+my $options = { 'name'     => "MARCPlugin",
+        'desc'     => "{MARCPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     # 'metadata_mapping' was used in two ways in the plugin: as a plugin
 …
+    {
     my $msg = "MARCPlug ERROR: Can't locate mapping file \"" .
+    my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" .
         $self->{'metadata_mapping_file'} . "\".\n" .
         "    No marc files can be processed.\n";
 …
         push(@marc_entries,$marc);
     $$textref .= $marc->as_formatted();
     $$textref .= "\n\n"; # for SplitPlug - see default_split_exp above...
+    $$textref .= "\n\n"; # for SplitTextFile - see default_split_exp above...
+    }
 …
 # do plugin specific processing of doc_obj
 # This gets done for each record found by SplitPlug in marc files.
+# This gets done for each record found by SplitTextFile in marc files.
 sub process {
     my $self = shift (@_);
 …
     if (! defined($self->{'metadata_mapping'}))
+    {
     print $outhandle "MARCPlug: no metadata file! Can't process $file\n";
+    print $outhandle "MARCPlugin: no metadata file! Can't process $file\n";
     return undef;
+    }
     print STDERR "<Processing n='$file' p='MARCPlug'>\n" if ($gli);
     print $outhandle "MARCPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='MARCPlugin'>\n" if ($gli);
+    print $outhandle "MARCPlugin: processing $file\n"
     if $self->{'verbosity'} > 1;

gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MARCXMLPlug.pm
+# MARCXMLPlugin.pm
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # well-formedness).
 package MARCXMLPlug;
 use XMLPlug;
+package MARCXMLPlugin;
+use ReadXMLFile;
 use strict;
 …
 sub BEGIN {
     @MARCXMLPlug::ISA = ('XMLPlug');
+    @MARCXMLPlugin::ISA = ('ReadXMLFile');
+}
 my $arguments = [{'name' => "metadata_mapping_file",
           'desc' => "{MARCXMLPlug.metadata_mapping_file}",
+          'desc' => "{MARCXMLPlugin.metadata_mapping_file}",
           'type' => "string",
           'deft' => "marctodc.txt",
           'reqd' => "no" }];
 my $options = { 'name'     => "MARCXMLPlug",
         'desc'     => "{MARCXMLPlug.desc}",
+my $options = { 'name'     => "MARCXMLPlugin",
+        'desc'     => "{MARCXMLPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'content'} = "";
 …
     if (scalar(@$mm_files)==0)
+    {
     my $msg = "MARCXMLPlug ERROR: Can't locate mapping file \"" .
+    my $msg = "MARCXMLPlugin ERROR: Can't locate mapping file \"" .
         $self->{'metadata_mapping_file'} . "\".\n " .
         "    No marc files can be processed.\n";
 …
     $self->{'indent'} = 0;
     my $outhandle = $self->{'outhandle'};
     print $outhandle "MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlug'>\n" if $self->{'gli'};
+    print $outhandle "MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlugin'>\n" if $self->{'gli'};
+}
 …
     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding);
     my ($filemeta) = $file =~ /([^\\\/]+)$/;
     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
+    $self->set_Source_metadata($doc_obj, $filemeta, $encoding);
     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}");
         if ($self->{'cover_image'}) {
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "Record $self->{'record_count'} - MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print $outhandle "Record $self->{'record_count'} - MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
         $self->{'record_count'}++;

gsdl/trunk/perllib/plugins/METSPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # METSPlug.pm
+# METSPlugin.pm
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 package METSPlug;
+package METSPlugin;
 use ghtml;
 …
 no strict 'refs'; # allow filehandles to be variables and viceversa
 use XMLPlug;
+use ReadXMLFile;
 use XML::XPath;
 use XML::XPath::XMLParser;
 sub BEGIN {
     @METSPlug::ISA = ('XMLPlug');
+    @METSPlugin::ISA = ('ReadXMLFile');
+}
 my $arguments = [
          ];
 my $options = { 'name'     => "METSPlug",
         'desc'     => "{METSPlug.desc}",
+my $options = { 'name'     => "METSPlugin",
+        'desc'     => "{METSPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes" };
 …
     push(@$pluginlist, $class);
+    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
+    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    # have no args - do we still want this?
+    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'section'} = "";
 …
+    }
     my $outhandle = $self->{'outhandle'};
     print $outhandle "METSPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     print STDERR "<Processing n='$self->{'file'}' p='METSPlug'>\n" if ($self->{'gli'});
+    print $outhandle "METSPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$self->{'file'}' p='METSPlugin'>\n" if ($self->{'gli'});
+}

gsdl/trunk/perllib/plugins/MP3Plugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MP3Plug.pm -- Plugin for MP3 files (MPEG audio layer 3).
+# MP3Plugin.pm -- Plugin for MP3 files (MPEG audio layer 3).
+#
 # A component of the Greenstone digital library software from the New
 …
 package MP3Plug;
 use UnknownPlug;
+package MP3Plugin;
+use BasePlugin;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
+no strict 'subs';
 use MP3::Info;
 …
 sub BEGIN {
     @MP3Plug::ISA = ('UnknownPlug');
+    @MP3Plugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "assoc_images",
         'desc' => "{MP3Plug.assoc_images}",
+        'desc' => "{MP3Plugin.assoc_images}",
         'type' => "flag",
         'deft' => "",
         'reqd' => "no" },
       { 'name' => "applet_metadata",
     'desc' => "{MP3Plug.applet_metadata}",
+    'desc' => "{MP3Plugin.applet_metadata}",
     'type' => "flag",
     'deft' => "" },
       { 'name' => "metadata_fields",
     'desc' => "{MP3Plug.metadata_fields}",
+    'desc' => "{MP3Plugin.metadata_fields}",
     'type' => "string",
     'deft' => "Title,Artist,Genre" } ];
 my $options = { 'name'     => "MP3Plug",
         'desc'     => "{MP3Plug.desc}",
+my $options = { 'name'     => "MP3Plugin",
+        'desc'     => "{MP3Plugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
 sub get_default_process_exp {
     return q^(?i)\.mp3$^;
+}
+sub process {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    # do something about OIDtype so no hashing
+    # old code was in effect the following.
+    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
+    $doc_obj->set_OIDtype ("incremental");
+    }
+    # associate the file with the document
+    if ($self->associate_mp3_file($filename_full_path, $filename_no_path, $doc_obj) != 1)
+    {
+    print "MP3Plugin: couldn't process \"$filename_full_path\"\n";
+    return 0;
+    }
+    #whats this crap?
+   my $text = &gsprintf::lookup_string("{BasePlugin.dummy_text}",1);
+    if ($self->{'assoc_images'}) {
+    $text .= "[img1]<br>";
+    $text .= "[img2]<br>";
+    }
+    $doc_obj->add_utf8_text($doc_obj->get_top_section(), $text);
+}
 …
     $doc_obj->associate_file($filename, $dst_file, $mime_type, $section);
-    $doc_obj->add_metadata ($section, "Source", $file);
     $doc_obj->add_metadata ($section, $assoc_field, $assoc_name);
     $doc_obj->add_metadata ($section, "srcurl", $assoc_url);
 …
+# The MP3Plug read() function is based on UnknownPlug read().  This
+# function does all the right things to make general options work for
+# a given plugin.
+my $mp3_doc_count = 0; ## is this used anywhere now !!???
+sub read {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
+    my $outhandle = $self->{'outhandle'};
+    #check for associate_ext, blocking etc
+    my ($block_status,$filename) = $self->read_block(@_);
+    return $block_status if ((!defined $block_status) || ($block_status==0));
+    print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli);
+    print $outhandle "MP3Plug processing \"$filename\"\n"
+        if $self->{'verbosity'} > 1;
+    #if there's a leading directory name, eat it...
+    $file =~ s/^.*[\/\\]//;
+    # create a new document
+    my $doc_obj = new doc ($filename, "indexed_doc");
+    $mp3_doc_count++;
+##    $doc_obj->set_OIDtype ($processor->{'OIDtype'});
+    if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
+    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
+    }
+    else {
+    $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
+    }
+    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
+    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));
+    # associate the file with the document
+    if (associate_mp3_file($self, $filename, $file, $doc_obj) != 1)
+    {
+    print "MP3Plug: couldn't process \"$filename\"\n";
+    return 0;
+    }
+    #create an empty text string so we don't break downstream plugins
+    my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
+    if ($self->{'assoc_images'}) {
+    $text .= "[img1]<br>";
+    $text .= "[img2]<br>";
+    }
+    # include any metadata passed in from previous plugins
+    my $section = $doc_obj->get_top_section();
+    $self->extra_metadata ($doc_obj, $section, $metadata);
+    $self->title_fallback($doc_obj,$section,$file);
+    # do plugin specific processing of doc_obj
+    return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,
+                         $file, $metadata, $doc_obj));
+    # do any automatic metadata extraction
+    $self->auto_extract_metadata ($doc_obj);
+    # add an OID
+    $doc_obj->set_OID();
+    $doc_obj->add_utf8_text($section, $text);
+    # process the document
+    $processor->process($doc_obj);
+    $self->{'num_processed'} ++;
+    return 1;
+}
+# we want to use mp3:Title if its there, otherwise we'll use BasePlugin method
 sub title_fallback
+{
 …
+    }
     else {
         &BasPlug::title_fallback($self, $doc_obj, $section, $file);
+        $self->BasePlugin::title_fallback($doc_obj, $section, $file);
+    }
+    }

gsdl/trunk/perllib/plugins/MediaWikiPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MediaWikiPlug.pm -- html plugin with extra facilities for wiki page
+# MediaWikiPlugin.pm -- html plugin with extra facilities for wiki page
+#
 # A component of the Greenstone digital library software
 …
 # collection's Home page.
 package MediaWikiPlug;
 use HTMLPlug;
 # use ImagePlug;
+package MediaWikiPlugin;
+use HTMLPlugin;
+# use ImagePlugin;
 # use File::Copy;
 use unicode;
 …
 sub BEGIN {
     @MediaWikiPlug::ISA = ('HTMLPlug');
+    @MediaWikiPlugin::ISA = ('HTMLPlugin');
+}
 …
      # show the table of contents on collection's home page
      { 'name' => "show_toc",
        'desc' => "{MediaWikiPlug.show_toc}",
+       'desc' => "{MediaWikiPlugin.show_toc}",
        'type' => "flag",
        'reqd' => "no"},
      # set to delete the table of contents section on each MediaWiki page
      { 'name' => "delete_toc",
        'desc' => "{MediaWikiPlug.delete_toc}",
+       'desc' => "{MediaWikiPlugin.delete_toc}",
        'type' => "flag",
        'reqd' => "no"},
      # regexp to match the table of contents
      { 'name' => "toc_exp",
        'desc' => "{MediaWikiPlug.toc_exp}",
+       'desc' => "{MediaWikiPlugin.toc_exp}",
        'type' => "regexp",
        'reqd' => "no",
 …
      # set to delete the navigation section
      { 'name' => "delete_nav",
        'desc' => "{MediaWikiPlug.delete_nav}",
+       'desc' => "{MediaWikiPlugin.delete_nav}",
        'type' => "flag",
        'reqd' => "no",
 …
      # regexp to match the navigation section
      { 'name' => "nav_div_exp",
        'desc' => "{MediaWikiPlug.nav_div_exp}",
+       'desc' => "{MediaWikiPlugin.nav_div_exp}",
        'type' => "regexp",
        'reqd' => "no",
 …
      # set to delete the searchbox section
      { 'name' => "delete_searchbox",
        'desc' => "{MediaWikiPlug.delete_searchbox}",
+       'desc' => "{MediaWikiPlugin.delete_searchbox}",
        'type' => "flag",
        'reqd' => "no",
 …
      # regexp to match the searchbox section
      { 'name' => "searchbox_div_exp",
        'desc' => "{MediaWikiPlug.searchbox_div_exp}",
+       'desc' => "{MediaWikiPlugin.searchbox_div_exp}",
        'type' => "regexp",
        'reqd' => "no",
        'deft' => "<div([^>]*)id=(\\\"|')p-search(\\\"|')(.|\\n)*?<\/div>"},
      # regexp to match title suffix
      # can't use the title_sub option in HTMLPlug instead
+     # can't use the title_sub option in HTMLPlugin instead
      # because title_sub always matches from the begining
      { 'name' => "remove_title_suffix_exp",
        'desc' => "{MediaWikiPlug.remove_title_suffix_exp}",
+       'desc' => "{MediaWikiPlugin.remove_title_suffix_exp}",
        'type' => "regexp",
        'reqd' => "no",
 …
      ];
 my $options = { 'name'     => "MediaWikiPlug",
         'desc'     => "{MediaWikiPlug.desc}",
+my $options = { 'name'     => "MediaWikiPlugin",
+        'desc'     => "{MediaWikiPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
+}
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "MediaWikiPlug: processing $file\n" if $self->{'verbosity'} > 1;
+    print $outhandle "MediaWikiPlugin: processing $file\n" if $self->{'verbosity'} > 1;
     my @head_and_body = split(/<body/i,$$textref);
 …
                                         # linux: /research/lh92/greenstone/greenstone2.73/collect/wiki/import
     # $file use different delimiters : forward slash for linux; backward slash for windows
     # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlug.html
+    # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlugin.html
                                         # linux: greenstone.sourceforge.net/wiki/index.php/Using_GreenstoneWiki.html
 …
         $value = $1;
         if (!defined $value || !defined $tag){
         #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
+        #print $outhandle "MediaWikiPlugin: can't find VALUE in \"$tag\"\n";
         next;
         } else {

gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MetadataCSVPlug.pm -- A plugin for metadata in comma-separated value format
+# MetadataCSVPlugin.pm -- A plugin for metadata in comma-separated value format
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 package MetadataCSVPlug;
+package MetadataCSVPlugin;
 use BasPlug;
+use BasePlugin;
 use strict;
 sub BEGIN {
     @MetadataCSVPlug::ISA = ('BasPlug');
+    @MetadataCSVPlugin::ISA = ('BasePlugin');
+}
 …
 my $arguments =
     [ { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
 my $options = { 'name'     => "MetadataCSVPlug",
         'desc'     => "{MetadataCSVPlug.desc}",
+my $options = { 'name'     => "MetadataCSVPlugin",
+        'desc'     => "{MetadataCSVPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
 # We don't want any other plugins to see .csv files
+# Used by BasePlugin read to block this file
 sub get_default_block_exp
+{
 …
     return undef;
+    }
     print STDERR "\n<Processing n='$file' p='MetadataCSVPlug'>\n" if ($gli);
     print STDERR "MetadataCSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1;
+    print STDERR "\n<Processing n='$file' p='MetadataCSVPlugin'>\n" if ($gli);
+    print STDERR "MetadataCSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
     # Read the CSV file to get the metadata
 …
     open(CSV_FILE, "$filename");
     my $csv_file_reader = new multiread();
     $csv_file_reader->set_handle('MetadataCSVPlug::CSV_FILE');
+    $csv_file_reader->set_handle('MetadataCSVPlugin::CSV_FILE');
     $csv_file_reader->read_file(\$csv_file_content);
     close(CSV_FILE);
 …
     if (!$found_filename_field) {
     print STDERR "MetadataCSVPlug Error: No Filename field in CSV file: $filename\n";
+    print STDERR "MetadataCSVPlugin Error: No Filename field in CSV file: $filename\n";
     return -1; # error
+    }
 …
         # The line must be formatted incorrectly
         else {
         print STDERR "MetadataCSVPlug Error: Badly formatted CSV line: $csv_line.\n";
+        print STDERR "MetadataCSVPlugin Error: Badly formatted CSV line: $csv_line.\n";
         last;
+        }
 …
     my $csv_line_filename_array = $csv_line_metadata{"Filename"};
     if (!defined $csv_line_filename_array) {
         print STDERR "MetadataCSVPlug Error: No Filename metadata in CSV line: $orig_csv_line\n";
+        print STDERR "MetadataCSVPlugin Error: No Filename metadata in CSV line: $orig_csv_line\n";
         next;
+    }

gsdl/trunk/perllib/plugins/MetadataPass.pm

r12970	r15872
29	29	no strict 'refs'; # allow filehandles to be variables and viceversa
30	30
31		use ~~BasPlug; # uses BasPlug~~, but is not inherited
	31	use PrintInfo; # uses PrintInfo, but is not inherited
32	32
33	33
…	…
55	55	sub print_xml_usage
56	56	{
57		~~BasPlug~~::print_xml_usage(@_);
	57	PrintInfo::print_xml_usage(@_);
58	58	}
59	59
60	60	sub print_xml
61	61	{
62		~~BasPlug~~::print_xml(@_);
	62	PrintInfo::print_xml(@_);
63	63	}
64	64
65	65	sub set_incremental
66	66	{
67		~~BasPlug~~::set_incremental(@_);
	67	PrintInfo::set_incremental(@_);
68	68	}
69	69

gsdl/trunk/perllib/plugins/MetadataXMLPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # MetadataXMLPlug.pm --
+# MetadataXMLPlugin.pm --
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 ###########################################################################
 # MetadataXMLPlug process metadata.xml files in a collection
+# MetadataXMLPlugin process metadata.xml files in a collection
 # Here's an example of a metadata file that uses three FileSet structures
 …
 # metadata is explictly overridden later in the import.
 package MetadataXMLPlug;
+package MetadataXMLPlugin;
 use strict;
 no strict 'refs';
 use BasPlug;
+use BasePlugin;
 use util;
 use metadatautil;
 sub BEGIN {
     @MetadataXMLPlug::ISA = ('BasPlug');
+    @MetadataXMLPlugin::ISA = ('BasePlugin');
     unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
+}
 …
 my $arguments = [
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
 ];
 my $options = { 'name'     => "MetadataXMLPlug",
         'desc'     => "{MetadataXMLPlug.desc}",
+my $options = { 'name'     => "MetadataXMLPlugin",
+        'desc'     => "{MetadataXMLPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
     print STDERR "\n<Processing n='$file' p='MetadataXMLPlug'>\n" if ($gli);
     print STDERR "MetadataXMLPlug: processing $file\n" if ($self->{'verbosity'})> 1;
+    print STDERR "\n<Processing n='$file' p='MetadataXMLPlugin'>\n" if ($gli);
+    print STDERR "MetadataXMLPlugin: processing $file\n" if ($self->{'verbosity'})> 1;
     $self->{'metadataref'} = $extrametadata;

gsdl/trunk/perllib/plugins/NulPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # NULPlug.pm -- Plugin for dummy (.nul) files
+# NulPlugin.pm -- Plugin for dummy (.nul) files
+#
 # A component of the Greenstone digital library software from the New
 …
 ###########################################################################
 # NULPlug - a plugin for dummy files
+# NulPlugin - a plugin for dummy files
 # This is a simple Plugin for importing dummy files, along with
 …
 # databases
 package NULPlug;
+package NulPlugin;
 use BasPlug;
+use BasePlugin;
 use strict;
 …
 sub BEGIN {
     @NULPlug::ISA = ('BasPlug');
+    @NulPlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "assoc_field",
     'desc' => "{NULPlug.assoc_field}",
+    'desc' => "{NulPlugin.assoc_field}",
     'type' => "string",
     'deft' => "",
+    'deft' => "null_file",
     'reqd' => "no" },
       { 'name' => "add_metadata_as_text",
     'desc' => "{NULPlug.add_metadata_as_text}",
+    'desc' => "{NulPlugin.add_metadata_as_text}",
     'type' => "flag" },
       { 'name' => "remove_namespace_for_text",
     'desc' => "{NULPlug.remove_namespace_for_text}",
+    'desc' => "{NulPlugin.remove_namespace_for_text}",
     'type' => "flag" }
       ];
 my $options = { 'name'     => "NULPlug",
         'desc'     => "{NULPlug.desc}",
+my $options = { 'name'     => "NulPlugin",
+        'desc'     => "{NulPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
+}
 # The NULPlug read() function. This function does all the right
+# things to make general options work for a given plugin.  NULPlug
+# overrides read() because there is no need to read the actual text of
+# the file in, because the contents of the file is not text...
+#
+#
 # Return number of files processed, undef if can't process
+#
+# Note that $base_dir might be "" and that $file might include directories
+# NulPlugin specific processing of doc_obj.
+sub process {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my $topsection = $doc_obj->get_top_section();
+    my $assoc_field = $self->{'assoc_field'}; # || "null_file"; TODO, check this
+    $doc_obj->add_metadata ($topsection, $assoc_field, $file);
-sub read {
-    my $self = shift (@_);
-    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
-    my $outhandle = $self->{'outhandle'};
-    #check for associate_ext, blocking etc
-    my ($block_status,$filename) = $self->read_block(@_);
-    return $block_status if ((!defined $block_status) || ($block_status==0));
-    print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli);
-    print $outhandle "NULPlug processing \"$filename\"\n"
-        if $self->{'verbosity'} > 1;
-    #if there's a leading directory name, eat it...
-    $file =~ s/^.*[\/\\]//;
-    # create a new document
-    my $doc_obj = new doc ($filename, "indexed_doc");
-    my $top_section = $doc_obj->get_top_section();
-    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
-    #$doc_obj->set_OIDtype ("incremental");
-    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
-    $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins
-    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));
-    # the metadata NoText is used to suppress the dummy text 'This document has no text.'
-    $doc_obj->add_metadata ($top_section, "NoText",    "1");
-    my $assoc_field = $self->{'assoc_field'} || "null_file";
-    $doc_obj->add_metadata ($top_section, $assoc_field, $file);
-     if ($self->{'cover_image'}) {
-    $self->associate_cover_image($doc_obj, $filename);
+    }
-    # include any metadata passed in from previous plugins
-    my $section = $doc_obj->get_top_section();
-    $self->extra_metadata ($doc_obj, $section, $metadata);
     # format the metadata passed in (presumably from metadata.xml)
     my $text = "";
     if ($self->{'add_metadata_as_text'}) {
     $text = &metadatautil::format_metadata_as_table($metadata, $self->{'remove_namespace_for_text'});
+    $doc_obj->add_utf8_text($topsection, $text);
     } else {
+    #create an empty text string so we don't break downstream plugins
+    $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
+    $self->add_dummy_text($doc_obj, $topsection);
+    }
-    $self->title_fallback($doc_obj,$section,$file);
-    # do plugin specific processing of doc_obj
-    unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
-    print STDERR "<ProcessingError n='$file'>\n" if ($gli);
-    return -1;
+    }
-    # do any automatic metadata extraction
-    $self->auto_extract_metadata ($doc_obj);
-    # add an OID
-    $doc_obj->set_OID();
-    $doc_obj->add_utf8_text($section, $text);
-    # process the document
-    $processor->process($doc_obj);
-    $self->{'num_processed'} ++;
-    return 1;
+}
-# NULPlug processing of doc_obj.  In practice we don't need to do
-# anything here because the read function takes care of everything.
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
-    my $outhandle = $self->{'outhandle'};
     return 1;

gsdl/trunk/perllib/plugins/OAIPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+package OAIPlug;
+use BasPlug;
+package OAIPlugin;
 use unicode;
 use util;
 …
 no strict 'refs'; # allow filehandles to be variables and viceversa
 use XMLPlug;
+use ReadXMLFile;
 sub BEGIN {
     @OAIPlug::ISA = ('XMLPlug');
+    @OAIPlugin::ISA = ('ReadXMLFile');
+}
 …
       ];
 my $options = { 'name'     => "OAIPlug",
         'desc'     => "{OAIPlug.desc}",
+my $options = { 'name'     => "OAIPlugin",
+        'desc'     => "{OAIPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "OAIPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     print STDERR "<Processing n='$self->{'file'}' p='OAIPlug'>\n" if $self->{'gli'};
+    print $outhandle "OAIPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$self->{'file'}' p='OAIPlugin'>\n" if $self->{'gli'};
+}
 …
     if ($srcdoc_exists)
+    {
         print $outhandle "OAIPlug: passing metadata on to $url_array->[0]\n"
+        print $outhandle "OAIPlugin: passing metadata on to $url_array->[0]\n"
         if ($self->{'verbosity'}>1);
 …
     my $outhandle = $self->{'outhandle'};
     print STDERR "<Processing n='$file' p='OAIPlug'>\n" if ($gli);
     print $outhandle "OAIPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='OAIPlugin'>\n" if ($gli);
+    print $outhandle "OAIPlugin: processing $file\n"
     if $self->{'verbosity'} > 1;
 …
     if ($top_level_prefix !~ /dc$/) {
         print $outhandle "Warning: OAIPlug currently only designed for Dublin Core (or variant) metadata\n";
+        print $outhandle "Warning: OAIPlugin currently only designed for Dublin Core (or variant) metadata\n";
         print $outhandle "         This recorded metadata section '$top_level_prefix' does not appear to match.\n";
         print $outhandle "         Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n";

gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm

-              r15865
+              r15872
 ###########################################################################
 package OggVorbisPlug;
+package OggVorbisPlugin;
 use UnknownPlug;
+use BasePlugin;
 use Ogg::Vorbis::Header::PurePerl;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
+no strict 'subs';
 sub BEGIN {
     @OggVorbisPlug::ISA = ('UnknownPlug');
+    @OggVorbisPlugin::ISA = ('BasePlugin');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "string",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "add_technical_metadata",
     'desc' => "{OggVorbisPlug.add_technical_metadata}",
+    'desc' => "{OggVorbisPlugin.add_technical_metadata}",
     'type' => "flag",
     'deft' => "" } ];
 my $options = { 'name'     => "OggVorbisPlug",
         'desc'     => "{OggVorbisPlug.desc}",
+my $options = { 'name'     => "OggVorbisPlugin",
+        'desc'     => "{OggVorbisPlugin.desc}",
         'inherits' => "yes",
         'abstract' => "no",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
+}
+sub read
+sub process
+{
     my $self = shift (@_);
     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my $outhandle = $self->{'outhandle'};
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    # do something about OIDtype so no hashing
+    # old code was in effect the following.
+    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
+    $doc_obj->set_OIDtype ("incremental");
+    }
+    #check process and block exps, smart block, etc
+    my ($block_status,$filename) = $self->read_block(@_);
+    return $block_status if ((!defined $block_status) || ($block_status==0));
+     # Report that we're processing the file
+    print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli);
+    print $outhandle "OggVorbisPlug: processing $file\n"
+    if ($self->{'verbosity'}) > 1;
+    # file is just the name of the file (need to get rid off any leading directory names)
+    $file =~ s/^.*[\/\\]//;
+    # create a new index document
+    my $doc_obj = new doc ($filename, "indexed_doc");
+    if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
+    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
+    }
+    else {
+    $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
+    }
+    my $section = $doc_obj->get_top_section();
+    # replace spaces in filename with %20 in url for metadata entry
+    my $url = $file;
+    ##$url =~ s/ /%20/g;
+    # Source (filename) to be consistent with other plugins
+    $doc_obj->add_metadata ($section, "Source", $url);
+    my $top_section = $doc_obj->get_top_section();
     # Extract metadata
     my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename);
+    my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename_full_path);
     # Comments added to the file
 …
+    {
         if (defined $value && $value ne "") {
         $doc_obj->add_metadata($section, $keytc, $value);
+        $doc_obj->add_metadata($top_section, $keytc, $value);
+        }
+    }
 …
         my $value = $ogg->info->{$key};
         if (defined $value && $value ne "") {
         $doc_obj->add_metadata($section, $keytc, $value);
+        $doc_obj->add_metadata($top_section, $keytc, $value);
+        }
+    }
+    }
+    # srclink
+    $doc_obj->add_metadata ($section, "FileFormat", "OggVorbis");
+    $doc_obj->add_metadata ($section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
+    $doc_obj->add_metadata ($section, "/srclink", "</a>");
+    $doc_obj->add_metadata ($top_section, "FileFormat", "OggVorbis");
+    $doc_obj->add_metadata ($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
+    $doc_obj->add_metadata ($top_section, "/srclink", "</a>");
     # srcicon (need to include "iogg.gif" in the greenstone images directory
     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>");
+    $doc_obj->add_metadata ($top_section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>");
     # add NoText metadata which can be used to suppress the dummy text
     $doc_obj->add_metadata ($section, "NoText", "1");
+    $doc_obj->add_metadata ($top_section, "NoText", "1");
     # Add the actual file as an associated file
     $doc_obj->associate_file($filename, $file, "VORBIS", $section);
+    $doc_obj->associate_file($filename_full_path, $filename_no_path, "VORBIS", $top_section);
-    # Create an empty text string so we don't break downstream plugins
-     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
-    # include any metadata passed in from previous plugins
-    $self->extra_metadata ($doc_obj, $section, $metadata);
-    # do plugin specific processing of doc_obj
-    return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
-    # do any automatic metadata extraction
-    $self->auto_extract_metadata($doc_obj);
-    # add an OID
-    $doc_obj->set_OID();
-    $doc_obj->add_utf8_text($section, $text);
-    # process the document
-    $processor->process($doc_obj);
-    $self->{'num_processed'}++;
-    return 1;
+}

gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # OpenDocumentPlug.pm -- The Open Document plugin
+# OpenDocumentPlugin.pm -- The Open Document plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 #This basically extracts any text out of the document, but not much else.
+package OpenDocumentPlug;
+# this inherits ReadXMLFile, and therefore offers -xslt option, but does
+# nothing with it.
+package OpenDocumentPlugin;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 use XMLPlug;
+use ReadXMLFile;
 use XML::XPath;
 use XML::XPath::XMLParser;
 …
 sub BEGIN {
+    @OpenDocumentPlug::ISA = ('XMLPlug');
+}
+#our @filesAssoc = ();
+    @OpenDocumentPlugin::ISA = ('ReadXMLFile');
+}
 our @filesProcess = ( "content.xml" , "meta.xml" );
-#XML plug has this so we need it too
-our ($self);
 my $arguments = [
 …
          ];
 my $options = { 'name'     => "OpenDocumentPlug",
         'desc'     => "{OpenDocumentPlug.desc}",
+my $options = { 'name'     => "OpenDocumentPlugin",
+        'desc'     => "{OpenDocumentPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     $self->{'section'} = "";
 …
 sub read {
+    # $self must be global to work with XML callback routines.
+    $self = shift (@_);
+    my $self = shift (@_);
     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
 …
     $self->unzip ("\"$file_only\"");
     foreach my $xmlFile (@OpenDocumentPlug::filesProcess) {
+    foreach my $xmlFile (@OpenDocumentPlugin::filesProcess) {
         if (-e $xmlFile) {
         $self->parse_file($xmlFile);
+        $self->{'parser'}->parsefile($xmlFile);
+        }
+    }
 …
     # parsefile may either croak somewhere in XML::Parser (e.g. because
     # the document is not well formed) or die somewhere in XMLPlug or a
+    # the document is not well formed) or die somewhere in ReadXMLFile or a
     # derived plugin (e.g. because we're attempting to process a
     # document whose DOCTYPE is not meant for this plugin). For the
 …
     $doc_obj->add_utf8_metadata ("", "srcicon",  "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[archivedir]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">");
     $doc_obj->add_utf8_metadata ("", "/srclink", "</a>");
     $doc_obj->add_utf8_metadata ("", "Source", &ghtml::dmsafe($file_only));
     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));
+    $self->set_Source_metadata($doc_obj, $file_only);
+     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));
     # include any metadata passed in from previous plugins
 …
     # add an OID
     $doc_obj->set_OID();
+    $self->add_OID($doc_obj);
     $doc_obj->add_utf8_metadata("", "Plugin", "$self->{'plugin_type'}");

gsdl/trunk/perllib/plugins/PDFPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # PDFPlug.pm -- reasonably with-it pdf plugin
+# PDFPlugin.pm -- reasonably with-it pdf plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
+#
 ###########################################################################
+package PDFPlug;
+use ConvertToPlug;
+package PDFPlugin;
+use ConvertBinaryFile;
+use ReadTextFile;
 use unicode;
 use strict;
 …
 sub BEGIN {
     @PDFPlug::ISA = ('ConvertToPlug');
+    @PDFPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
+}
 my $convert_to_list =
     [ { 'name' => "auto",
     'desc' => "{ConvertToPlug.convert_to.auto}" },
+    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
       { 'name' => "html",
     'desc' => "{ConvertToPlug.convert_to.html}" },
+    'desc' => "{ConvertBinaryFile.convert_to.html}" },
       { 'name' => "text",
     'desc' => "{ConvertToPlug.convert_to.text}" },
+    'desc' => "{ConvertBinaryFile.convert_to.text}" },
       { 'name' => "pagedimg_jpg",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}"},
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}"},
       { 'name' => "pagedimg_gif",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}"},
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}"},
       { 'name' => "pagedimg_png",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}"},
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}"},
       ];
 …
+    [
      { 'name' => "convert_to",
        'desc' => "{ConvertToPlug.convert_to}",
+       'desc' => "{ConvertBinaryFile.convert_to}",
        'type' => "enum",
        'reqd' => "yes",
 …
        'deft' => "html" },
      { 'name' => "process_exp",
        'desc' => "{BasPlug.process_exp}",
+       'desc' => "{BasePlugin.process_exp}",
        'type' => "regexp",
        'deft' => &get_default_process_exp(),
        'reqd' => "no" },
      { 'name' => "block_exp",
        'desc' => "{BasPlug.block_exp}",
+       'desc' => "{BasePlugin.block_exp}",
        'type' => "regexp",
        'deft' => &get_default_block_exp() },
      { 'name' => "metadata_fields",
        'desc' => "{HTMLPlug.metadata_fields}",
+       'desc' => "{HTMLPlugin.metadata_fields}",
        'type' => "string",
        'deft' => "" },
      { 'name' => "noimages",
        'desc' => "{PDFPlug.noimages}",
+       'desc' => "{PDFPlugin.noimages}",
        'type' => "flag" },
      { 'name' => "allowimagesonly",
        'desc' => "{PDFPlug.allowimagesonly}",
+       'desc' => "{PDFPlugin.allowimagesonly}",
        'type' => "flag" },
      { 'name' => "complex",
        'desc' => "{PDFPlug.complex}",
+       'desc' => "{PDFPlugin.complex}",
        'type' => "flag" },
      { 'name' => "nohidden",
        'desc' => "{PDFPlug.nohidden}",
+       'desc' => "{PDFPlugin.nohidden}",
        'type' => "flag" },
      { 'name' => "zoom",
        'desc' => "{PDFPlug.zoom}",
+       'desc' => "{PDFPlugin.zoom}",
        'deft' => "2",
        'range' => "1,3", # actually the range is 0.5-3
        'type' => "int" },
      { 'name' => "use_sections",
        'desc' => "{PDFPlug.use_sections}",
+       'desc' => "{PDFPlugin.use_sections}",
        'type' => "flag" },
      { 'name' => "description_tags",
        'desc' => "{HTMLPlug.description_tags}",
+       'desc' => "{HTMLPlugin.description_tags}",
        'type' => "flag" }
      ];
 my $options = { 'name'     => "PDFPlug",
         'desc'     => "{PDFPlug.desc}",
+my $options = { 'name'     => "PDFPlugin",
+        'desc'     => "{PDFPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my @arg_array = @$inputargs;
     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    # these are passed through to gsConvert.pl by ConvertToPlug.pm
+    $self->{'filename_extension'} = "pdf";
+    $self->{'file_type'} = "PDF";
+    # these are passed through to gsConvert.pl by ConvertBinaryFile.pm
     my $zoom = $self->{"zoom"};
     $self->{'convert_options'} = "-pdf_zoom $zoom";
 …
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
     $secondary_plugin_options->{'HTMLPlug'} = [];
+    }
     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
     $secondary_plugin_options->{'TEXTPlug'} = [];
+    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
+    $secondary_plugin_options->{'HTMLPlugin'} = [];
+    }
+    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
+    $secondary_plugin_options->{'TextPlugin'} = [];
+    }
     if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) {
     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
         $secondary_plugin_options->{'PagedImgPlug'} = [];
         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
+    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
+        $secondary_plugin_options->{'PagedImagePlugin'} = [];
+        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
         push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
+    }
+    }
     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
+    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
+    my $text_options = $secondary_plugin_options->{'TextPlugin'};
+    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
     if ($self->{'input_encoding'} eq "auto") {
 …
     && $self->{'converted_to'} eq "HTML") {
     print $outhandle "PDFPlug: Calculating sections...\n";
+    print $outhandle "PDFPlugin: Calculating sections...\n";
     # we have "<a name=1></a>" etc for each page
 …
     if (scalar (@sections) == 1) { #only one section - no split!
         print $outhandle "PDFPlug: warning - no sections found\n";
+        print $outhandle "PDFPlugin: warning - no sections found\n";
     } else {
         $top_section .= shift @sections; # keep HTML header etc as top_section
 …
         $title = " "; # get rid of the undefined warning in next line
+        }
         my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n";
+        my $newsection = "<!-- from PDFPlugin -->\n<!-- <Section>\n";
         $newsection .= "<Metadata name=\"Title\">" . $title
         . "</Metadata>\n--><p>\n";
 …
 sub process {
     my $self = shift (@_);
     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj);

gsdl/trunk/perllib/plugins/PPTPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # PPTPlug.pm -- plugin for importing Microsoft PowerPoint files.
+# PPTPlugin.pm -- plugin for importing Microsoft PowerPoint files.
 #  (currently only versions 95 and 97)
+#
 …
 ###########################################################################
 package PPTPlug;
+package PPTPlugin;
+use ConvertToPlug;
+use ConvertBinaryFile;
+use ReadTextFile; # for read_file in convert_post_process. do we need it?
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @PPTPlug::ISA = ('ConvertToPlug');
+    @PPTPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
+}
 my $convert_to_list =
     [ { 'name' => "auto",
     'desc' => "{ConvertToPlug.convert_to.auto}" },
+    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
       { 'name' => "html",
     'desc' => "{ConvertToPlug.convert_to.html}" },
+    'desc' => "{ConvertBinaryFile.convert_to.html}" },
       { 'name' => "text",
     'desc' => "{ConvertToPlug.convert_to.text}" },
+    'desc' => "{ConvertBinaryFile.convert_to.text}" },
       { 'name' => "pagedimg_jpg",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" },
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
       { 'name' => "pagedimg_gif",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" },
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
       { 'name' => "pagedimg_png",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" }
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
       ];
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "PPTPlug",
         'desc'     => "{PPTPlug.desc}",
+my $options = { 'name'     => "PPTPlugin",
+        'desc'     => "{PPTPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     my $ws_arg =[{ 'name' => "convert_to",
                'desc' => "{ConvertToPlug.convert_to}",
+               'desc' => "{ConvertBinaryFile.convert_to}",
                'type' => "enum",
                'reqd' => "yes",
 …
                'deft' => "html" },
              { 'name' => "windows_scripting",
                'desc' => "{PPTPlug.windows_scripting}",
+               'desc' => "{PPTPlugin.windows_scripting}",
                'type' => "flag",
                'reqd' => "no" }
 …
+    }
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my @arg_array = @$inputargs;
+    my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    $self->{'filename_extension'} = "ppt";
+    $self->{'file_type'} = "PPT";
     # ppthtml outputs utf-8 already.
     #these are passed through to gsConvert.pl by ConvertToPlug.pm
+    #these are passed through to gsConvert.pl by ConvertBinaryFile.pm
     $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) {
     $secondary_plugin_options->{'PagedImgPlug'} = [];
+    $secondary_plugin_options->{'PagedImagePlugin'} = [];
     } else {
     $secondary_plugin_options->{'HTMLPlug'} = [];
+    $secondary_plugin_options->{'HTMLPlugin'} = [];
+    }
     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     my $pageimg_options = $secondary_plugin_options->{'PagedImgPlug'};
+    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
+    my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
     if ($self->{'input_encoding'} eq "auto") {
     $self->{'input_encoding'} = "utf8";
     if (defined $secondary_plugin_options->{'HTMLPlug'}){
+    if (defined $secondary_plugin_options->{'HTMLPlugin'}){
         push(@$html_options,"-input_encoding", "utf8");
         push(@$html_options,"-extract_language") if $self->{'extract_language'};
         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
+        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
         # to extract these metadata fields from the HEAD META fields
         push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
+    }
     if (defined $secondary_plugin_options->{'PagedImgPlug'}){
+    if (defined $secondary_plugin_options->{'PagedImagePlugin'}){
         push(@$pageimg_options,"-input_encoding", "utf8");
         push(@$pageimg_options,"-extract_language") if $self->{'extract_language'};
 …
+}
+sub get_file_type {
+    my $self = shift (@_);
+    my $file_type = "PPT";
+    return $file_type;
+}
+# do we need this? above states that ppthtml produces utf8 text...
 sub convert_post_process
+{
 …
+}
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
-    return $self->process_type("ppt",$base_dir,$file,$doc_obj);
+}
 ;

gsdl/trunk/perllib/plugins/PSPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # PSPlug.pm -- this might look VERY similar to the PDF plugin...
+# PSPlugin.pm -- this might look VERY similar to the PDF plugin...
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # 12/05/02 Added usage datastructure - John Thompson
+package PSPlug;
+use ConvertToPlug;
+package PSPlugin;
+use ConvertBinaryFile;
+use ReadTextFile; # for read_file in convert_post_process. do we need it?
 use sorttools;
 …
 sub BEGIN {
     @PSPlug::ISA = ('ConvertToPlug');
+    @PSPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile');
+}
 my $convert_to_list =
     [ { 'name' => "auto",
     'desc' => "{ConvertToPlug.convert_to.auto}" },
+    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
       { 'name' => "text",
     'desc' => "{ConvertToPlug.convert_to.text}" },
+    'desc' => "{ConvertBinaryFile.convert_to.text}" },
       { 'name' => "pagedimg_jpg",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" },
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" },
       { 'name' => "pagedimg_gif",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" },
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" },
       { 'name' => "pagedimg_png",
     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" }
+    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" }
       ];
 my $arguments =
     [ { 'name' => "convert_to",
     'desc' => "{ConvertToPlug.convert_to}",
+    'desc' => "{ConvertBinaryFile.convert_to}",
     'type' => "enum",
     'reqd' => "yes",
 …
     'deft' => "text" },
       { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{BasePlugin.block_exp}",
     'type' => 'regexp',
     'deft' => &get_default_block_exp() },
       { 'name' => "extract_date",
     'desc' => "{PSPlug.extract_date}",
+    'desc' => "{PSPlugin.extract_date}",
     'type' => "flag" },
       { 'name' => "extract_pages",
     'desc' => "{PSPlug.extract_pages}",
+    'desc' => "{PSPlugin.extract_pages}",
     'type' => "flag" },
       { 'name' => "extract_title",
     'desc' => "{PSPlug.extract_title}",
+    'desc' => "{PSPlugin.extract_title}",
     'type' => "flag" } ];
 my $options = { 'name'     => "PSPlug",
         'desc'     => "{PSPlug.desc}",
+my $options = { 'name'     => "PSPlugin",
+        'desc'     => "{PSPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    $self->{'filename_extension'} = "ps";
+    $self->{'file_type'} = "PS";
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
     $secondary_plugin_options->{'TEXTPlug'} = [];
+    }
     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
+    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
+    $secondary_plugin_options->{'TextPlugin'} = [];
+    }
+    my $text_options = $secondary_plugin_options->{'TextPlugin'};
     if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) {
     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
         $secondary_plugin_options->{'PagedImgPlug'} = [];
         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
+    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
+        $secondary_plugin_options->{'PagedImagePlugin'} = [];
+        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
         push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
+    }
 …
+}
+# this has been commented out in other plugins. do we need it here?
 sub convert_post_process
+{
 …
     my $date_found = 0;
     print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n"
+    print STDERR "PSPlugin: extracting PostScript metadata from \"$filename\"\n"
     if $self->{'verbosity'} > 1;
 …
+}
 # do plugin specific processing of doc_obj for HTML type
+# do plugin specific processing of doc_obj
 sub process {
     my $self = shift (@_);
+    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+#    my $outhandle = $self->{'outhandle'};
+#    print $outhandle "PSPlug: passing $file on to $self->{'converted_to'}Plug\n"
+#   if $self->{'verbosity'} > 1;
+#    print STDERR "<Processing n='$file' p='PSPlug'>\n" if ($gli);
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     my $filename = &util::filename_cat($base_dir,$file);
     $self->extract_metadata_from_postscript($filename, $doc_obj);
+    return $self->process_type("ps",$base_dir,$file,$doc_obj);
+    return $self->SUPER::process(@_);
+}

gsdl/trunk/perllib/plugins/PagedImagePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # PagedImgPlug.pm -- plugin for sets of images and OCR text that
+# PagedImagePlugin.pm -- plugin for sets of images and OCR text that
 #  make up a document
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 # PagedImgPlug
+# PagedImagePlugin
 # processes sequences of images, with optional OCR text
+#
 …
 # To have it create medium size images for display, use the '-screenview'
 # option. As usual, running
 # 'perl -S pluginfo.pl PagedImgPlug' will list all the options.
+# 'perl -S pluginfo.pl PagedImagePlugin' will list all the options.
 # If you want the resulting documents to be presented with a table of
 …
 # FileName (only for document level metadata).
+package PagedImgPlug;
+use XMLPlug;
+package PagedImagePlugin;
+use ReadXMLFile;
+use ReadTextFile;
+use ImageConverter;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @PagedImgPlug::ISA = ('XMLPlug');
+    @PagedImagePlugin::ISA = ('ReadXMLFile', 'ReadTextFile', 'ImageConverter');
+}
 my $type_list =
     [ { 'name' => "paged",
         'desc' => "{PagedImgPlug.documenttype.paged}" },
+        'desc' => "{PagedImagePlugin.documenttype.paged}" },
       { 'name' => "hierarchy",
         'desc' => "{PagedImgPlug.documenttype.hierarchy}" } ];
+        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ];
 my $arguments =
 …
     'type' => "string",
     'deft' => "" },
-      { 'name' => "noscaleup",
-    'desc' => "{ImagePlug.noscaleup}",
-    'type' => "flag",
-    'reqd' => "no" },
-      { 'name' => "thumbnail",
-    'desc' => "{PagedImgPlug.thumbnail}",
-    'type' => "flag",
-    'reqd' => "no" },
-      { 'name' => "thumbnailsize",
-    'desc' => "{ImagePlug.thumbnailsize}",
-    'type' => "int",
-    'deft' => "100",
-    'range' => "1,",
-    'reqd' => "no" },
-      { 'name' => "thumbnailtype",
-    'desc' => "{ImagePlug.thumbnailtype}",
-    'type' => "string",
-    'deft' => "gif",
-    'reqd' => "no" },
-      { 'name' => "screenview",
-    'desc' => "{PagedImgPlug.screenview}",
-    'type' => "flag",
-    'reqd' => "no" },
-      { 'name' => "screenviewsize",
-    'desc' => "{PagedImgPlug.screenviewsize}",
-    'type' => "int",
-        'deft' => "500",
-        'range' => "1,",
-    'reqd' => "no" },
-      { 'name' => "screenviewtype",
-    'desc' => "{PagedImgPlug.screenviewtype}",
-    'type' => "string",
-    'deft' => "jpg",
-    'reqd' => "no" },
-      { 'name' => "converttotype",
-    'desc' => "{ImagePlug.converttotype}",
-    'type' => "string",
-    'deft' => "",
-    'reqd' => "no" },
-      { 'name' => "minimumsize",
-    'desc' => "{ImagePlug.minimumsize}",
-    'type' => "int",
-    'deft' => "100",
-    'range' => "1,",
-    'reqd' => "no" },
       { 'name' => "headerpage",
     'desc' => "{PagedImgPlug.headerpage}",
+    'desc' => "{PagedImagePlugin.headerpage}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "documenttype",
     'desc' => "{PagedImgPlug.documenttype}",
+    'desc' => "{PagedImagePlugin.documenttype}",
     'type' => "enum",
     'list' => $type_list,
 …
 my $options = { 'name'     => "PagedImgPlug",
         'desc'     => "{PagedImgPlug.desc}",
+my $options = { 'name'     => "PagedImagePlugin",
+        'desc'     => "{PagedImagePlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
+    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
+    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists);
+    new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
+}
+sub init {
+    my $self = shift (@_);
+    my ($verbosity, $outhandle, $failhandle) = @_;
+    $self->SUPER::init(@_);
+    $self->ImageConverter::init();
+}
 …
+}
+# Create the thumbnail and screenview images, and discover the Image's
+# size, width, and height using the convert utility.
+sub rotate_image {
+    my $self = shift (@_);
+    my ($filename_full_path) = @_;
+    my ($this_filetype) = $filename_full_path =~ /\.([^\.]*)$/;
+    my $result = $self->convert($filename_full_path, $this_filetype, "-rotate 180", "ROTATE");
+    my ($new_filename) = ($result =~ /=>(.*\.$this_filetype)/);
+    if (-e "$new_filename") {
+    return $new_filename;
+    }
+    # somethings gone wrong
+    return $filename_full_path;
+}
 sub process_image {
+    my $self = shift (@_);
+    my $filename = shift (@_); # filename with full path
+    my $srcfile = shift (@_); # filename without path
+    my $doc_obj = shift (@_);
+    my $section = shift (@_); #the current section
+    my $rotate = shift (@_); # whether to rotate the image or not
+    $rotate = 0 unless defined $rotate;
+    # check that the image file exists!!
+    if (!-f $filename) {
+    print "PagedImgPlug: ERROR: File $filename does not exist, skipping\n";
+    return 0;
+    }
+    my $top=0;
+    if ($section eq $doc_obj->get_top_section()) {
+    $top=1;
+    }
+    my $verbosity = $self->{'verbosity'};
+    my $outhandle = $self->{'outhandle'};
+    # check the filename is okay
+    return 0 if ($srcfile eq "" || $filename eq "");
+    my $minimumsize = $self->{'minimumsize'};
+    if (defined $minimumsize && (-s $filename < $minimumsize)) {
+        print $outhandle "PagedImgPlug: \"$filename\" too small, skipping\n"
+        if ($verbosity > 1);
+    }
+    # Convert the image to a new type (if required), and rotate if required.
+    my $converttotype = $self->{'converttotype'};
+    my $originalfilename = "";  # only set if we do a conversion
+    my $type = "unknown";
+    my $converted = 0;
+    my $rotated=0;
+    if ($converttotype ne "" && $filename !~ /$converttotype$/) {
+    $converted=1;
+    $originalfilename = $filename;
+    my $filehead = &util::get_tmp_filename();
+    $filename = $filehead . ".$converttotype";
+    my $n = 1;
+    while (-e $filename) {
+        $filename = "$filehead$n\.$converttotype";
+        $n++;
+    }
+    $self->{'tmp_filename1'} = $filename;
+    my $rotate_option = "";
+    if ($rotate eq "r") {
+        $rotate_option = "-rotate 180 ";
+    }
+    my $command = "convert -verbose \"$originalfilename\" $rotate_option \"$filename\"";
+    print $outhandle "CONVERT: $command\n" if ($verbosity > 2);
+    my $result = '';
+    $result = `$command`;
+    print $outhandle "CONVERT RESULT = $result\n" if ($verbosity > 2);
+    $type = $converttotype;
+    } elsif ($rotate eq "r") {
+    $rotated=1;
+    $originalfilename = $filename;
+    $filename  = &util::get_tmp_filename();
+    my $command = "convert \"$originalfilename\" -rotate 180 \"$filename\"";
+    print $outhandle "ROTATE: $command\n" if ($verbosity > 2);
+    my $result = '';
+    $result = `$command`;
+    print $outhandle "ROTATE RESULT = $result\n" if ($verbosity > 2);
+    }
+    # Add the image metadata
+    my $file; # the new file name
+    my $id = $srcfile;
+    $id =~ s/\.([^\.]*)$//;  # the new file name without an extension
+    if ($converted) {
+    # we have converted the image
+    # add on the new extension
+    $file .= "$id.$converttotype";
+    } else {
+    $file = $srcfile;
+    }
+    my $url =$file; # the new file name prepared for a url
+    my $srcurl = $srcfile;
+    ##$url =~ s/ /%20/g;
+    ##$srcurl =~ s/ /%20/g;
+    $doc_obj->add_metadata ($section, "Image", $url);
+    # Also want to set filename as 'Source' metadata to be
+    # consistent with other plugins
+    $doc_obj->add_metadata ($section, "Source", $srcurl);
+    my ($image_type, $image_width, $image_height, $image_size)
+    = &identify($filename, $outhandle, $verbosity);
+    $doc_obj->add_metadata ($section, "ImageType",   $image_type);
+    $doc_obj->add_metadata ($section, "ImageWidth",  $image_width);
+    $doc_obj->add_metadata ($section, "ImageHeight", $image_height);
+    $doc_obj->add_metadata ($section, "ImageSize",   $image_size);
+    $doc_obj->add_metadata ($section, "FileFormat", "PagedImg");
+    # add NoText metadata which can be used to suppress the dummy text
+    $doc_obj->add_metadata ($section, "NoText", "1");
+    if ($type eq "unknown" && $image_type) {
+    $type = $image_type;
+    }
+    if ($top) {
+    $doc_obj->add_metadata ($section, "srclink",
+                "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
+    $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");
+    } else {
+    $doc_obj->add_metadata ($section, "srclink",
+                "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
+    $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">");
+    }
+    $doc_obj->add_metadata ($section, "/srclink", "</a>");
+    # Add the image as an associated file
+    $doc_obj->associate_file($filename,$file,"image/$type",$section);
+    print $outhandle "associating file $filename as name $file\n" if ($verbosity > 2);
+    if ($self->{'thumbnail'}) {
+    # Make the thumbnail image
+    my $thumbnailsize = $self->{'thumbnailsize'} || 100;
+    my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';
+    my $filehead = &util::get_tmp_filename();
+    my $thumbnailfile = $filehead . ".$thumbnailtype";
+    my $n=1;
+    while (-e $thumbnailfile) {
+        $thumbnailfile = $filehead . $n . ".$thumbnailtype";
+        $n++;
+    }
+    $self->{'tmp_filename2'} = $thumbnailfile;
+    # Generate the thumbnail with convert
+    my $command = "convert  -verbose -geometry $thumbnailsize"
+        . "x$thumbnailsize \"$filename\" \"$thumbnailfile\"";
+    print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2);
+    my $result = '';
+    $result = `$command 2>&1` ;
+    print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2);
+    # Add the thumbnail as an associated file ...
+    if (-e "$thumbnailfile") {
+        $doc_obj->associate_file("$thumbnailfile", $id."thumb.$thumbnailtype", "image/$thumbnailtype",$section);
+        $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);
+        $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype");
+        if ($top) {
+        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
+        } else {
+        $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");
+        }
+    }
+    # Extract Thumnail metadata from convert output
+    if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
+        $doc_obj->add_metadata ($section, "ThumbWidth", $1);
+        $doc_obj->add_metadata ($section, "ThumbHeight", $2);
+    }
+    }
+    # Make a screen-sized version of the picture if requested
+    if ($self->{'screenview'}) {
+    # To do: if the actual image is smaller than the screenview size,
+    # we should use the original !
+    my $screenviewsize = $self->{'screenviewsize'} || 500;
+    my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';
+    my $filehead = &util::get_tmp_filename();
+    my $screenviewfilename = $filehead . ".$screenviewtype";
+    my $n=1;
+    while (-e $screenviewfilename) {
+        $screenviewfilename = "$filehead$n\.$screenviewtype";
+        $n++;
+    }
+    $self->{'tmp_filename3'} = $screenviewfilename;
+    # make the screenview image
+    my $command = "convert  -verbose -geometry $screenviewsize"
+        . "x$screenviewsize \"$filename\" \"$screenviewfilename\"";
+    print $outhandle "SCREENVIEW: $command\n" if ($verbosity > 2);
+    my $result = "";
+    $result = `$command  2>&1` ;
+    print $outhandle "SCREENVIEW RESULT: $result\n" if ($verbosity > 3);
+    # get screenview dimensions, size and type
+        if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {
+        $doc_obj->add_metadata ($section, "ScreenWidth", $1);
+        $doc_obj->add_metadata ($section, "ScreenHeight", $2);
+    }elsif ($result =~ m/([0-9]+)x([0-9]+)/) {
+        #if the image hasn't changed size,  the previous regex doesn't match
+        $doc_obj->add_metadata ($section, "ScreenWidth", $1);
+        $doc_obj->add_metadata ($section, "ScreenHeight", $2);
+    }
+    #add the screenview as an associated file ...
+    if (-e "$screenviewfilename") {
+        $doc_obj->associate_file("$screenviewfilename", $id."sv.$screenviewtype",
+                     "image/$screenviewtype",$section);
+        print $outhandle "associating screen file $screenviewfilename as name $id sv.$screenviewtype\n" if ($verbosity > 2);
+        $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);
+        $doc_obj->add_metadata ($section, "Screen", $id."sv.$screenviewtype");
+        if ($top) {
+        $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
+        } else {
+        $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");
+        }
+    } else {
+        print $outhandle "PagedImgPlug: couldn't find \"$screenviewfilename\"\n";
+    }
+    }
+    return $type;
+}
+# Discover the characteristics of an image file with the ImageMagick
+# "identify" command.
+sub identify {
+    my ($image, $outhandle, $verbosity) = @_;
+    # Use the ImageMagick "identify" command to get the file specs
+    my $command = "identify \"$image\" 2>&1";
+    print $outhandle "$command\n" if ($verbosity > 2);
+    my $result = '';
+    $result = `$command`;
+    print $outhandle "$result\n" if ($verbosity > 3);
+    # Read the type, width, and height
+    my $type =   'unknown';
+    my $width =  'unknown';
+    my $height = 'unknown';
+    my $image_safe = quotemeta $image;
+    if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {
+    $type = $1;
+    $width = $2;
+    $height = $3;
+    }
+    # Read the size
+    my $size = "unknown";
+    if ($result =~ m/^.* ([0-9]+)b/) {
+    $size = $1;
+    } elsif ($result =~ m/^.* ([0-9]+)kb/) {
+    $size = 1024 * $1;
+    }
+    print $outhandle "file: $image:\t $type, $width, $height, $size\n"
+    if ($verbosity > 3);
+    # Return the specs
+    return ($type, $width, $height, $size);
+}
+# The PagedImgPlug read() function. This function does all the right things
+# to make general options work for a given plugin. It calls the process()
+# function which does all the work specific to a plugin (like the old
+# read functions used to do). Most plugins should define their own
+# process() function and let this read() function keep control.
+#
+# PagedImgPlug overrides read() because there is no need to read the actual
+# text of the file in, because the contents of the file is not text...
+#
+# Return number of files processed, undef if can't process
+# Note that $base_dir might be "" and that $file might
+# include directories
+sub read_into_doc_obj {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
+    my $outhandle   = $self->{'outhandle'};
+    #check process and block exps, smart block, etc
+    my ($block_status,$filename) = $self->read_block(@_);
+    return $block_status if ((!defined $block_status) || ($block_status==0));
+    print $outhandle "PagedImgPlug processing \"$filename\"\n"
+    if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$file' p='PagedImgPlug'>\n" if ($gli);
+    # here we need to decide if we have an old text .item file, or a new xml
+    # .item file - for now the test is if the first non-empty line is
+    # <PagedDocument> then its xml
+    my $xml_version = 0;
+    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
+    my $backup_filename = "backup.item";
+    open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n";
+    my $line = "";
+    my $num = 0;
+    $line = <ITEMFILE>;
+    while ($line !~ /\w/) {
+    $line = <ITEMFILE>;
+    }
+    chomp $line;
+    if ($line =~ /<PagedDocument/) {
+    $xml_version = 1;
+    }
+    close ITEMFILE;
+    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
+    $line = <ITEMFILE>;
+    $line =~ s/^\xEF\xBB\xBF//; # strip BOM
+    $line =~ s/\x0B+//ig;
+    $line =~ s/&/&amp;/g;
+    print BACKUP ($line);
+    #Tidy up the item file some metadata title contains \vt-vertical tab
+    while ($line = <ITEMFILE>) {
+    $line =~ s/\x0B+//ig;
+    $line =~ s/&/&amp;/g;
+    print BACKUP ($line);
+    }
+    close ITEMFILE;
+    close BACKUP;
+    &File::Copy::copy ($backup_filename, $filename);
+    &util::rm($backup_filename);
+    my $doc_obj;
+    if ($xml_version) {
+    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
+    $self->{'file'} = $file;
+    $self->{'filename'} = $filename;
+    $self->{'processor'} = $processor;
+    $self->{'metadata'} = $metadata;
+    my $self = shift(@_);
+    my ($filename_full_path, $filename_no_path, $doc_obj, $section, $rotate) = @_;
+    # do rotation
+    if ($rotate eq "r") {
+    # check the filenames
+    return 0 if ($filename_no_path eq "" || !-f $filename_full_path);
+    # we get a new temporary file which is rotated
+    $filename_full_path = $self->rotate_image($filename_full_path);
+    }
+    # do generate images
+    my $result = $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $section);
+    #overwrite one set in ImageConverter
+    $doc_obj->set_metadata_element ($section, "FileFormat", "PagedImage");
+    return $result;
+}
+sub old_read_stuff_for_xml_version {
+    my ($self, $filename, $file, $gli);
+    # this bit same as ReadXMLFile read
+#   $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
+#   $self->{'file'} = $file;
+#   $self->{'filename'} = $filename_full_path;
+#   $self->{'processor'} = $processor;
+#   $self->{'metadata'} = $metadata;
+#
     eval {
         $@ = "";
 …
         # feed transformed file (now in memory as string) into XML parser
         #$self->{'parser'}->parse($transformed_xml);
         $self->parse_string($transformed_xml);
+        $self->{'parser'}->parse($transformed_xml);
+        ###$self->parse_string($transformed_xml);
+        }
         else {
         #$self->{'parser'}->parsefile($filename);
         $self->parse_file($filename);
+        $self->{'parser'}->parsefile($filename);
+        #$self->parse_file($filename);
+        }
     };
 …
         return -1; # error during processing
+    }
+}
+# The PagedImagePlugin read() function. This function does all the right things
+# to make general options work for a given plugin. It calls the process()
+# function which does all the work specific to a plugin (like the old
+# read functions used to do). Most plugins should define their own
+# process() function and let this read() function keep control.
+#
+# PagedImagePlugin overrides read() because there is no need to read the actual
+# text of the file in, because the contents of the file is not text...
+#
+# Return number of files processed, undef if can't process
+# Note that $base_dir might be "" and that $file might
+# include directories
+sub read_into_doc_obj {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
+    my $outhandle = $self->{'outhandle'};
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    print $outhandle "PagedImagePlugin processing \"$filename_full_path\"\n"
+    if $self->{'verbosity'} > 1;
+    print STDERR "<Processing n='$file' p='PagedImagePlugin'>\n" if ($gli);
+    # here we need to decide if we have an old text .item file, or a new xml
+    # .item file
+    my $xml_version = $self->is_xml_item_file($filename_full_path);
+    $self->tidy_item_file($filename_full_path);
+    my $doc_obj;
+    if ($xml_version) {
+    # careful checking needed here!! are we using local xml handlers or super ones
+    $self->ReadXMLFile::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     $doc_obj = $self->{'doc_obj'};
     } else {
     my ($dir);
     ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;
+    ($dir, $file) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
     #process the .item file
     $doc_obj = $self->process_item($filename, $dir, $file, $processor);
+    $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor);
+    }
+    if ($self->{'cover_image'}) {
+    $self->associate_cover_image($doc_obj, $filename);
+    }
+    my $section = $doc_obj->get_top_section();
+    $doc_obj->add_utf8_metadata($section, "Plugin", "$self->{'plugin_type'}");
+    $doc_obj->add_metadata($section, "FileFormat", "PagedImage");
     # include any metadata passed in from previous plugins
     # note that this metadata is associated with the top level section
     my $section = $doc_obj->get_top_section();
+    $self->add_associated_files($doc_obj, $filename_full_path);
     $self->extra_metadata ($doc_obj, $section, $metadata);
-    #my $text="";
-    # do plugin specific processing of doc_obj
-    #unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
-    #print STDERR "<ProcessingError n='$file'>\n" if ($gli);
-    #return -1;
-    #}
-    # do any automatic metadata extraction
     $self->auto_extract_metadata ($doc_obj);
+    $self->{'num_processed'}++;
+    # if we haven't found any Title so far, assign one
+    $self->title_fallback($doc_obj,$section,$filename_no_path);
+    $self->add_OID($doc_obj);
     return (1,$doc_obj);
+}
+# for now, the test is if the first non-empty line is <PagedDocument>, then its xml
+sub is_xml_item_file {
+    my $self = shift(@_);
+    my ($filename) = @_;
+    my $xml_version = 0;
+    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
+    my $line = "";
+    my $num = 0;
+    $line = <ITEMFILE>;
+    while ($line !~ /\w/) {
+    $line = <ITEMFILE>;
+    }
+    chomp $line;
+    if ($line =~ /<PagedDocument/) {
+    $xml_version = 1;
+    }
+    close ITEMFILE;
+    return $xml_version;
+}
+sub tidy_item_file {
+    my $self = shift(@_);
+    my ($filename) = @_;
+    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
+    my $backup_filename = "backup.item";
+    open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n";
+    my $line = "";
+    $line = <ITEMFILE>;
+    $line =~ s/^\xEF\xBB\xBF//; # strip BOM
+    $line =~ s/\x0B+//ig;
+    $line =~ s/&/&amp;/g;
+    print BACKUP ($line);
+    #Tidy up the item file some metadata title contains \vt-vertical tab
+    while ($line = <ITEMFILE>) {
+    $line =~ s/\x0B+//ig;
+    $line =~ s/&/&amp;/g;
+    print BACKUP ($line);
+    }
+    close ITEMFILE;
+    close BACKUP;
+    &File::Copy::copy ($backup_filename, $filename);
+    &util::rm($backup_filename);
+}
+# de we need this? old read was the same as BasePlug read, not the same as ReadXMLfile read
 sub read
+{
     my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;    my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
+    if ((defined $process_status) && ($process_status == 1)) {
+    # process the document
+    $processor->process($doc_obj);
+    #if(defined($self->{'places_filename'})){
+    #    &util::rm($self->{'places_filename'});
+    #    $self->{'places_filename'} = undef;
+    #}
+    #$self->{'num_processed'} ++;
+    undef $doc_obj;
+    }
+    # clean up temporary files - we do this here instead of in
+    # process_image becuase associated files aren't actually copied
+    # until after process has been run.
+    if (defined $self->{'tmp_filename1'} &&
+    -e $self->{'tmp_filename1'}) {
+    &util::rm($self->{'tmp_filename1'})
+    }
+    if (defined $self->{'tmp_filename2'} &&
+    -e $self->{'tmp_filename2'}) {
+    &util::rm($self->{'tmp_filename2'})
+    }
+    if (defined $self->{'tmp_filename3'} &&
+    -e $self->{'tmp_filename3'}) {
+      &util::rm($self->{'tmp_filename3'})
+    }
+    # if process_status == 1, then the file has been processed.
+    return $process_status;
+    $self->BasePlugin::read(@_);
+}
 …
     if (defined($txtfile)&& $txtfile ne "") {
         $self->process_text ($self->{'base_dir'}.$txtfile, $txtfile, $doc_obj, $self->{'current_section'});
+            $doc_obj->set_metadata_element($self->{'current_section'},"NoText","0");
+    } else {
+        # otherwise add in some dummy text
+        #create an empty text string so we don't break downstream plugins
+        my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
+        $doc_obj->add_utf8_text($self->{'current_section'}, $text);
+            $doc_obj->add_metadata($self->{'current_section'},"NoText","1");
+    } else {
+        $self->add_dummy_text($doc_obj, $self->{'current_section'});
+    }
     } elsif ($element eq "Metadata") {
 …
     # create a new document
     $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc");
+    my $doc_obj = $self->{'doc_obj'};
+    $doc_obj->set_OIDtype ($self->{'processor'}->{'OIDtype'});
+    # TODO is file filenmae_no_path??
+    $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'});
     my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/;
     $self->{'base_dir'} = $dir;
     $self->{'num_pages'} = 0;
-    my $topsection = $doc_obj->get_top_section();
-    if ($self->{'documenttype'} eq 'paged') {
-    # set the gsdlthistype metadata to Paged - this ensures this document will
-    # be treated as a Paged doc, even if Titles are not numeric
-    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");
-    } else {
-    $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");
+    }
-    $doc_obj->add_metadata ($topsection, "Source", $file);
-    if ($self->{'headerpage'}) {
-    $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
+    }
+}
 …
     my $self = shift(@_);
     my $doc_obj = $self->{'doc_obj'};
-    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
-    $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "PagedImg");
     # add numpages metadata
     $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(), 'NumPages', $self->{'num_pages'});
+    # add an OID
+    $doc_obj->set_OID();
+}
+sub process_item {
+    my $self = shift (@_);
+    my ($filename, $dir, $file, $processor) = @_;
+    my $doc_obj = new doc ($filename, "indexed_doc");
+}
+sub set_initial_doc_fields {
+    my $self = shift(@_);
+    my ($doc_obj, $filename_no_path, $processor) = @_;
     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
     my $topsection = $doc_obj->get_top_section();
-    $doc_obj->add_utf8_metadata($topsection, "Plugin", "$self->{'plugin_type'}");
-    $doc_obj->add_metadata($topsection, "FileFormat", "PagedImg");
     if ($self->{'documenttype'} eq 'paged') {
 …
+    }
+    $doc_obj->add_metadata ($topsection, "Source", $file);
+    open (ITEMFILE, $filename) || die "couldn't open $filename\n";
+    $self->set_Source_metadata($doc_obj, $filename_no_path);
+    # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible
+    if ($self->{'headerpage'}) {
+    $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasePlugin.dummy_text}"));
+    }
+}
+sub process_item {
+    my $self = shift (@_);
+    my ($filename_full_path, $dir, $filename_no_path, $processor) = @_;
+    my $doc_obj = new doc ($filename_full_path, "indexed_doc");
+    $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor);
+    my $topsection = $doc_obj->get_top_section();
+    open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n";
     my $line = "";
     my $num = 0;
 …
         if (!defined $result1)
+        {
             print "PagedImgPlug: couldn't process image \"$dir.$imgname\" for item \"$filename\"\n";
+            print "PagedImagePlugin: couldn't process image \"$dir.$imgname\" for item \"$filename_full_path\"\n";
+        }
+        }
 …
         if (!defined $result2) {
+            print "PagedImgPlug: couldn't process text file \"$dir.$txtname\" for item \"$filename\"\n";
+        }
+                else{
+            $doc_obj->set_metadata_element($cursection, "NoText", "0");
+            print "PagedImagePlugin: couldn't process text file \"$dir.$txtname\" for item \"$filename_full_path\"\n";
+            $self->add_dummy_text($doc_obj, $cursection);
+        }
         } else {
         # otherwise add in some dummy text
+        $doc_obj->add_text($cursection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
+                 # add NoText metadata which can be used to suppress the dummy text
+         }
+        $self->add_dummy_text($doc_obj, $cursection);
+        }
+    }
+    }
 …
     close ITEMFILE;
-    # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible
-    if ($self->{'headerpage'}) {
-    $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));
+    }
-    $file =~ s/\.item//i;
-    $doc_obj->set_OID ();
     # add numpages metadata
     $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', "$num");
 …
 sub process_text {
     my $self = shift (@_);
     my ($fullpath, $file, $doc_obj, $cursection) = @_;
+    my ($filename_full_path, $file, $doc_obj, $cursection) = @_;
     # check that the text file exists!!
     if (!-f $fullpath) {
     print "PagedImgPlug: ERROR: File $fullpath does not exist, skipping\n";
+    if (!-f $filename_full_path) {
+    print "PagedImagePlugin: ERROR: File $filename_full_path does not exist, skipping\n";
     return 0;
+    }
     # Do encoding stuff
     my ($language, $encoding) = $self->textcat_get_language_encoding ($fullpath);
+    my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path);
     my $text="";
     &BasPlug::read_file($self, $fullpath, $encoding, $language, \$text);
+    &ReadTextFile::read_file($self, $filename_full_path, $encoding, $language, \$text);
     if (!length ($text)) {
     # It's a bit unusual but not out of the question to have no text, so just give a warning
         print "PagedImgPlug: WARNING: $fullpath contains no text\n";
+        print "PagedImagePlugin: WARNING: $filename_full_path contains no text\n";
+    }
 …
 # do plugin specific processing of doc_obj
 sub process {
+sub process_old {
     my $self = shift (@_);
     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
 …
+}
+sub clean_up_after_doc_obj_processing {
+    my $self = shift(@_);
+    $self->ImageConverter::clean_up_temporary_files();
+}
 ;

gsdl/trunk/perllib/plugins/ProCitePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ProCitePlug.pm -- A plugin for (exported) ProCite databases
+# ProCitePlugin.pm -- A plugin for (exported) ProCite databases
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 package ProCitePlug;
+package ProCitePlugin;
 use multiread;
 use SplitPlug;
+use SplitTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 # ProCitePlug is a sub-class of SplitPlug
+# ProCitePlugin is a sub-class of SplitTextFile
 sub BEGIN {
     @ProCitePlug::ISA = ('SplitPlug');
+    @ProCitePlugin::ISA = ('SplitTextFile');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
     'deft' => &get_default_process_exp() },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'deft' => &get_default_split_exp(),
 …
       ];
 my $options = { 'name'     => "ProCitePlug",
         'desc'     => "{ProCitePlug.desc}",
+my $options = { 'name'     => "ProCitePlugin",
+        'desc'     => "{ProCitePlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     open(PROCITE_FILE, "<$filename");
     my $reader = new multiread();
     $reader->set_handle ('ProCitePlug::PROCITE_FILE');
+    $reader->set_handle ('ProCitePlugin::PROCITE_FILE');
     $reader->set_encoding ($encoding);
     $reader->read_file ($textref);
 …
     my $cursection = $doc_obj->get_top_section();
     # Report that we're processing the file
     print STDERR "<Processing n='$file' p='ProCitePlug'>\n" if ($gli);
     print $outhandle "ProCitePlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='ProCitePlugin'>\n" if ($gli);
+    print $outhandle "ProCitePlugin: processing $file\n"
     if ($self->{'verbosity'}) > 1;

gsdl/trunk/perllib/plugins/RTFPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # RTFPlug.pm -- plugin for importing Rich Text Format files.
+# RTFPlugin.pm -- plugin for importing Rich Text Format files.
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
+# 12/05/02 Added usage datastructure - John Thompson
+package RTFPlugin;
+package RTFPlug;
+use ConvertToPlug;
+use ConvertBinaryFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @RTFPlug::ISA = ('ConvertToPlug');
+    @RTFPlugin::ISA = ('ConvertBinaryFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "description_tags",
     'desc' => "{HTMLPlug.description_tags}",
+    'desc' => "{HTMLPlugin.description_tags}",
     'type' => "flag" }
 ];
 my $options = { 'name'     => "RTFPlug",
         'desc'     => "{RTFPlug.desc}",
+my $options = { 'name'     => "RTFPlugin",
+        'desc'     => "{RTFPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    $self->{'filename_extension'} = "rtf";
+    $self->{'file_type'} = "RTF";
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
     $secondary_plugin_options->{'TEXTPlug'} = [];
+    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
+    $secondary_plugin_options->{'TextPlugin'} = [];
+    }
     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
     $secondary_plugin_options->{'HTMLPlug'} = [];
+    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
+    $secondary_plugin_options->{'HTMLPlugin'} = [];
+    }
     my $text_options = $secondary_plugin_options->{'TEXTPlug'};
     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
+    my $text_options = $secondary_plugin_options->{'TextPlugin'};
+    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
     #$self->{'input_encoding'} = "utf8";
 …
     return q^(?i)\.rtf$^;
+}
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
-    return $self->process_type("rtf",$base_dir,$file,$doc_obj);
+}
 ;

gsdl/trunk/perllib/plugins/RealMediaPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # RealMediaPlug.pm -- Extract metadata from Real Media files
+# RealMediaPlugin.pm -- Extract metadata from Real Media files
+#
 # Original code by Xin Gao
 …
 ###########################################################################
 package RealMediaPlug;
+package RealMediaPlugin;
 use UnknownPlug;
+use BasePlugin;
 use rm::Header::PurePerl;
 use strict;
 no strict 'refs'; # make an exception so we can use variables as filehandles
+no strict 'subs';
 sub BEGIN {
     @RealMediaPlug::ISA = ('UnknownPlug');
+    @RealMediaPlugin::ISA = ('BasePlugin');
+}
 …
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ];
 my $options = { 'name'     => "RealMediaPlug",
         'desc'     => "{RealMediaPlug.desc}",
+my $options = { 'name'     => "RealMediaPlugin",
+        'desc'     => "{RealMediaPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }
     if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }
+    push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}}, $options);
     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
+}
+# do plugin specific processing of doc_obj
+sub read
+sub process
+{
     my $self = shift (@_);
     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my $outhandle = $self->{'outhandle'};
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    my $top_section = $doc_obj->get_top_section();
+    # prevent hashing: old code was in effect the following.
+    if ($doc_obj->{'OIDtype'} =~ /^hash$/) {
+    $doc_obj->set_OIDtype ("incremental");
+    }
-    #check process and block exps, smart block, etc
-    my ($block_status,$filename) = $self->read_block(@_);
-    return $block_status if ((!defined $block_status) || ($block_status==0));
-    # Report that we're processing the file
-    print STDERR "<Processing n='$file' p='RealMediaPlug'>\n" if ($gli);
-    print $outhandle "RealMediaPlug: processing $file\n"
-    if ($self->{'verbosity'}) > 1;
-    # create a new index document
-    my $doc_obj = new doc ($filename, "indexed_doc");
-    if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {
-    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
+    }
-    else {
-    $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file
+    }
-    my $top_section = $doc_obj->get_top_section();
-    #if there's a leading directory name, eat it...
-    $file =~ s/^.*[\/\\]//;
-    my $url = $file;
-    # Source (filename) to be consistent with other plugins
-    $doc_obj->add_metadata($top_section, "Source", $url);
     my $text = "";
     my $real_media = rm::Header::PurePerl->new($filename);
+    my $real_media = rm::Header::PurePerl->new($filename_full_path);
     foreach my $key (keys %{$real_media->info})
+    {
 …
     $doc_obj->add_utf8_text($top_section, "<pre>\n$text\n</pre>");
+    $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia");
-    # srclink
-    $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia");
     $doc_obj->add_metadata($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">");
     $doc_obj->add_metadata($top_section, "/srclink", "</a>");
 …
     # Add the actual file as an associated file
     $doc_obj->associate_file($filename, $file, "RealMedia", $top_section);
+    $doc_obj->associate_file($filename_full_path, $filename_no_path, "RealMedia", $top_section);
-    # include any metadata passed in from previous plugins
-    my $section = $doc_obj->get_top_section();
-    $self->extra_metadata ($doc_obj, $section, $metadata);
-    # do plugin specific processing of doc_obj
-    return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));
-    # do any automatic metadata extraction
-    $self->auto_extract_metadata($doc_obj);
-    # have we found a Title?? is the Title empty??
-    if(!defined $doc_obj->get_metadata_element($section, "Title") or $doc_obj->get_metadata_element($section, "Title") eq ""){
-        my $file_derived_title = &BasPlug::filename_based_title($self, $file);
-        if(!defined $doc_obj->get_metadata_element($section, "Title")) {
-            $doc_obj->add_metadata ($section, "Title", $file_derived_title);
+        }
-        else {
-            $doc_obj->set_metadata_element ($section, "Title", $file_derived_title);
+        }
+    }
-    # add an OID
-    $doc_obj->set_OID();
-    # process the document
-    $processor->process($doc_obj);
-    $self->{'num_processed'}++;
-    return 1;
+}

gsdl/trunk/perllib/plugins/ReferPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ReferPlug.pm - a plugin for bibliography records in Refer format
+# ReferPlugin.pm - a plugin for bibliography records in Refer format
+#
 # A component of the Greenstone digital library software
 …
 ###########################################################################
 # ReferPlug reads bibliography files in Refer format.
+# ReferPlugin reads bibliography files in Refer format.
+#
 # by Gordon W. Paynter ([email protected]), November 2000
 …
+#
+#
 # ReferPlug creates a document object for every reference in the file.
 # It is a subclass of SplitPlug, so if there are multiple records, all
+# ReferPlugin creates a document object for every reference in the file.
+# It is a subclass of SplitTextFile, so if there are multiple records, all
 # are read.
+#
 …
+#
+# 12/05/02 Added usage datastructure - John Thompson
+package ReferPlug;
+use SplitPlug;
+package ReferPlugin;
+use SplitTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 # ReferPlug is a sub-class of BasPlug.
+# ReferPlugin is a sub-class of BasePlugin.
 sub BEGIN {
     @ReferPlug::ISA = ('SplitPlug');
+    @ReferPlugin::ISA = ('SplitTextFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "split_exp",
     'desc' => "{SplitPlug.split_exp}",
+    'desc' => "{SplitTextFile.split_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "ReferPlug",
         'desc'     => "{ReferPlug.desc}",
+my $options = { 'name'     => "ReferPlugin",
+        'desc'     => "{ReferPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $cursection = $doc_obj->get_top_section();
     # Report that we're processing the file
     print STDERR "<Processing n='$file' p='ReferPlug'>\n" if ($gli);
     print $outhandle "ReferPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='ReferPlugin'>\n" if ($gli);
+    print $outhandle "ReferPlugin: processing $file\n"
     if ($self->{'verbosity'}) > 1;

gsdl/trunk/perllib/plugins/RogPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # RogPlug.pm -- simple text plugin
+# RogPlugin.pm -- simple text plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # creates simple single-level document from .rog or .mdb files
 package RogPlug;
 use BasPlug;
+package RogPlugin;
+use BasePlugin;
 use sorttools;
 use doc;
 …
 sub BEGIN {
     @RogPlug::ISA = ('BasPlug');
+    @RogPlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'reqd' => "no",
 …
       ];
 my $options = { 'name'     => "RogPlug",
         'desc'     => "{RogPlug.desc}",
+my $options = { 'name'     => "RogPlugin",
+        'desc'     => "{RogPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $gz = (defined $3) ? 1: 0;
         print STDERR "<Processing n='$file' p='RogPlug'>\n" if ($gli);
     print STDERR "RogPlug: processing $filename\n" if $processor->{'verbosity'};
+    print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
+    print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
     if ($gz) {
     open (FILE, "zcat $filename |")
         || die "RogPlug::read - zcat can't open $filename\n";
+        || die "RogPlugin::read - zcat can't open $filename\n";
     } else {
     open (FILE, $filename)
         || die "RogPlug::read - can't open $filename\n";
+        || die "RogPlugin::read - can't open $filename\n";
+    }

gsdl/trunk/perllib/plugins/SourceCodePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # SRCPlug.pm -- source code plugin
+# SourceCodePlugin.pm -- source code plugin
+#
 # A component of the Greenstone digital library software
 …
 # 12/05/02 Added usage datastructure - John Thompson
 package SRCPlug;
 use BasPlug;
+package SourceCodePlugin;
+use ReadTextFile;
 use strict;
 …
 sub BEGIN {
     @SRCPlug::ISA = ('BasPlug');
+    @SourceCodePlugin::ISA = ('ReadTextFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{ReadTextFile.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ,
       { 'name' => "block_exp",
     'desc' => "{BasPlug.block_exp}",
+    'desc' => "{ReadTextFile.block_exp}",
     'type' => "regexp",
     'deft' => &get_default_block_exp(),
     'reqd' => "no" },
       { 'name' => "remove_prefix",
     'desc' => "{SRCPlug.remove_prefix}",
+    'desc' => "{SourceCodePlugin.remove_prefix}",
     'type' => "regexp",
     'deft' => "^.*[/\\]",
     'reqd' => "no" } ];
 my $options = { 'name'     => "SRCPlug",
         'desc'     => "{SRCPlug.desc}",
+my $options = { 'name'     => "SourceCodePlugin",
+        'desc'     => "{SourceCodePlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $self = shift (@_);
-#    return q^(?i)\.te?xt$^;
     return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
+}
 …
     my $outhandle = $self->{'outhandle'};
         print STDERR "<Processing n='$file' p='SRCPlug'>\n" if ($gli);
     print $outhandle "SRCPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='SourceCodePlugin'>\n" if ($gli);
+    print $outhandle "SourceCodePlugin: processing $file\n"
     if $self->{'verbosity'} > 1;

gsdl/trunk/perllib/plugins/StructuredHTMLPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # StructuredHTMLPlug.pm -- html plugin with extra facilities for teasing out
+# StructuredHTMLPlugin.pm -- html plugin with extra facilities for teasing out
 # hierarchical structure (such as h1, h2, h3, or user-defined tags) in an
 # HTML document
 …
 # format:e.g. level1 (Abstract_title|ChapterTitle|Referencing Heading) level2(SectionHeading)...
 package StructuredHTMLPlug;
 use HTMLPlug;
 use ImagePlug;
 #use strict; # every perl program should have this!
 #no strict 'refs'; # make an exception so we can use variables as filehandles
+package StructuredHTMLPlugin;
+use HTMLPlugin;
+use ImageConverter; # want the identify method
+use strict; # every perl program should have this!
+no strict 'refs'; # make an exception so we can use variables as filehandles
 sub BEGIN {
     @StructuredHTMLPlug::ISA = ('HTMLPlug');
+    @StructuredHTMLPlugin::ISA = ('HTMLPlugin');
+}
 …
+    [
      { 'name' => "level1_header",
        'desc' => "{StructuredHTMLPlug.level1_header}",
+       'desc' => "{StructuredHTMLPlugin.level1_header}",
        'type' => "regexp",
        'reqd' => "no",
        'deft' => "" },
      { 'name' => "level2_header",
        'desc' => "{StructuredHTMLPlug.level2_header}",
+       'desc' => "{StructuredHTMLPlugin.level2_header}",
        'type' => "regexp",
        'reqd' => "no",
        'deft' => "" },
      { 'name' => "level3_header",
        'desc' => "{StructuredHTMLPlug.level3_header}",
+       'desc' => "{StructuredHTMLPlugin.level3_header}",
        'type' => "regexp",
        'reqd' => "no",
        'deft' => "" },
      { 'name' => "title_header",
        'desc' => "{StructuredHTMLPlug.title_header}",
+       'desc' => "{StructuredHTMLPlugin.title_header}",
        'type' => "regexp",
        'reqd' => "no",
        'deft' => "" },
      { 'name' => "delete_toc",
        'desc' => "{StructuredHTMLPlug.delete_toc}",
+       'desc' => "{StructuredHTMLPlugin.delete_toc}",
        'type' => "flag",
        'reqd' => "no"},
      { 'name' => "toc_header",
        'desc' => "{StructuredHTMLPlug.toc_header}",
+       'desc' => "{StructuredHTMLPlugin.toc_header}",
        'type' => "regexp",
        'reqd' => "no",
 …
      ];
 my $options = { 'name'     => "StructuredHTMLPlug",
         'desc'     => "{StructuredHTMLPlug.desc}",
+my $options = { 'name'     => "StructuredHTMLPlugin",
+        'desc'     => "{StructuredHTMLPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $outhandle = $self->{'outhandle'};
     print $outhandle "StructuredHTMLPlug: processing $file\n"
+    print $outhandle "StructuredHTMLPlugin: processing $file\n"
         if $self->{'verbosity'} > 1;
 …
     $body_text =~ s/(<p[^>]*><o:p>&nbsp;<\/o:p><\/p>)//isg;
+    $section_text .= "<!--\n<Section>\n-->\n";
+    # what was the following line for. effectively unused. do we need it??
+    #$section_text .= "<!--\n<Section>\n-->\n";
     #my $top_section_tag = "<!--\n<Section>\n-->\n";
     #$body_text =~ s/(<div.*)/$top_section_text$doctitle$1/i;
 …
     my ($image_type, $actual_width, $actual_height, $image_size)
         = &ImagePlug::identify($img_filename, $outhandle, $verbosity);
+        = &ImageConverter::identify($img_filename, $outhandle, $verbosity);
     #print STDERR "**** $actual_width x $actual_height";
 …
         # derive new image name based on current image
         my ($tailname, $dirname, $suffix)
         = &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
+        = &File::Basename::fileparse($img_filename, "\\.[^\\.]+\$");
         my $resized_filename
 …
         # Generate smaller image with convert
         my $newsize = "$img_widthx$image_height";
+        my $newsize = "$img_width"."x$img_height";
         my $command = "convert -interlace plane -verbose "
         ."-geometry $newsize \"img_$filename\" \"$resized_filename\"";
+        ."-geometry $newsize \"$img_filename\" \"$resized_filename\"";
         #print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
         #my $result = '';
 …
         $value = $1;
         if (!defined $value || !defined $tag){
         #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
+        #print $outhandle "StructuredHTMLPlugin: can't find VALUE in \"$tag\"\n";
         next;
         } else {

gsdl/trunk/perllib/plugins/TextPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # TEXTPlug.pm -- simple text plugin
+# TextPlugin.pm -- simple text plugin
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # 12/05/02 Added usage datastructure - John Thompson
 package TEXTPlug;
 use BasPlug;
+package TextPlugin;
+use ReadTextFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
+no strict 'subs';
 sub BEGIN {
     @TEXTPlug::ISA = ('BasPlug');
+    @TextPlugin::ISA = ('ReadTextFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ,
       { 'name' => "title_sub",
     'desc' => "{TEXTPlug.title_sub}",
+    'desc' => "{TextPlugin.title_sub}",
     'type' => "regexp",
     'deft' => "",
     'reqd' => "no" } ];
 my $options = { 'name'     => "TEXTPlug",
         'desc'     => "{TEXTPlug.desc}",
+my $options = { 'name'     => "TextPlugin",
+        'desc'     => "{TextPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     my $outhandle = $self->{'outhandle'};
     print STDERR "<Processing n='$file' p='TEXTPlug'>\n" if ($gli);
     print $outhandle "TEXTPlug: processing $file\n"
+    print STDERR "<Processing n='$file' p='TextPlugin'>\n" if ($gli);
+    print $outhandle "TextPlugin xx: processing $file\n"
     if $self->{'verbosity'} > 1;
 …
+    }
     # Add FileFormat metadata
     $doc_obj->add_metadata($cursection, "FileFormat", "TEXT");
+    $doc_obj->add_metadata($cursection, "FileFormat", "Text");
     # insert preformat tags and add text to document object
 …
 # replace_srcdoc_with_html.pl requires all subroutines that support src_replaceable
 # to contain a method called tmp_area_convert_file - this is indeed the case with all
 # Perl modules that are subclasses of ConvertToPlug.pm, but as we want TEXTPlug to also
 # be srcreplaceable and because TEXTPlug does not inherit from ConvertToPlug.pm, we have
+# Perl modules that are subclasses of ConvertToPlug.pm, but as we want TextPlugin to also
+# be srcreplaceable and because TextPlugin does not inherit from ConvertToPlug.pm, we have
 # a similar subroutine with the same name here.
 sub tmp_area_convert_file {
 …
     # Recreate the original file for writing the updated contents
     unless(open(TEXT, "<$tmp_filename")) { # open it as a new file for writing
     print STDERR "TEXTPlug.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
+    print STDERR "TextPlugin.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";
     return ""; # no file name
+    }
 …
     # try creating this new file writing and try opening it for writing, else exit with error value
     unless(open(HTML, ">$output_filename")) {  # open the new html file for writing
     print STDERR "TEXTPlug.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
+    print STDERR "TextPlugin.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";
     return ""; # no filename
+    }

gsdl/trunk/perllib/plugins/UnknownPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
+# UnknownPlugin.pm -- Plugin for files you know about but Greenstone doesn't
+#
 # A component of the Greenstone digital library software from the New
 …
 ###########################################################################
 # UnknownPlug - a plugin for unknown files
+# UnknownPlugin - a plugin for unknown files
 # This is a simple Plugin for importing files in formats that
 …
 # movies, I add this line to the collection configuration file:
 # plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
+# plugin UnknownPlugin -process_exp "*.MOV" -assoc_field "movie"
 # A document is created for each movie, with the associated movie
 …
 # You can also add extra metadata, such as the Title, Subject, and
 # Duration, with metadata.xml files and RecPlug.  (If you want to use
 # UnknownPlug with more than one type of file, you will have to add
+# UnknownPlugin with more than one type of file, you will have to add
 # some sort of distinguishing metadata in this way.)
 package UnknownPlug;
+package UnknownPlugin;
 use BasPlug;
+use BasePlugin;
 use strict;
 …
 sub BEGIN {
     @UnknownPlug::ISA = ('BasPlug');
+    @UnknownPlugin::ISA = ('BasePlugin');
+}
 my $arguments =
     [ { 'name' => "assoc_field",
     'desc' => "{UnknownPlug.assoc_field}",
+    'desc' => "{UnknownPlugin.assoc_field}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" },
       { 'name' => "file_format",
     'desc' => "{UnknownPlug.file_format}",
+    'desc' => "{UnknownPlugin.file_format}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" },
       { 'name' => "mime_type",
     'desc' => "{UnknownPlug.mime_type}",
+    'desc' => "{UnknownPlugin.mime_type}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" },
       { 'name' => "srcicon",
     'desc' => "{UnknownPlug.srcicon}",
+    'desc' => "{UnknownPlugin.srcicon}",
     'type' => "string",
     'deft' => "iconunknown",
     'reqd' => "no" },
       { 'name' => "process_extension",
     'desc' => "{UnknownPlug.process_extension}",
+    'desc' => "{UnknownPlugin.process_extension}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" } ];
 my $options = { 'name'     => "UnknownPlug",
         'desc'     => "{UnknownPlug.desc}",
+my $options = { 'name'     => "UnknownPlugin",
+        'desc'     => "{UnknownPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
     # "-process_extension" is a simpler alternative to -process_exp for non-regexp people
 …
+}
-sub get_default_process_exp {
-    return '';
+}
+sub process {
+    my $self = shift (@_);
+    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+# Associate the unknown file with the new document
+    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file);
+    my $outhandle = $self->{'outhandle'};
+    my $verbosity = $self->{'verbosity'};
+sub associate_unknown_file {
+    my $self = shift (@_);
+    my $filename = shift (@_);   # filename with full path
+    my $file = shift (@_);       # filename without path
+    my $doc_obj = shift (@_);
+    my $verbosity = $self->{'verbosity'};
+    my $outhandle = $self->{'outhandle'};
+    # check the filename is okay
+    return 0 if ($file eq "" || $filename eq "");
+    my $url = $file;
+    ##$url =~ s/ /%20/g;
+    # check the filename is okay - do we need this??
+    if ($filename_full_path eq "" || $filename_no_path eq "") {
+    print $outhandle "UnknownPlugin: couldn't process \"$filename_no_path\"\n";
+    return undef;
+    }
     # Add the file as an associated file ...
 …
     my $assoc_field = $self->{'assoc_field'} || "unknown_file";
     $doc_obj->associate_file($filename, $file, $mime_type, $section);
+    $doc_obj->associate_file($filename_full_path, $filename_no_path, $mime_type, $section);
     $doc_obj->add_metadata ($section, "FileFormat", $file_format);
     $doc_obj->add_metadata ($section, "MimeType", $mime_type);
     $doc_obj->add_metadata ($section, $assoc_field, $file);
+    $doc_obj->add_metadata ($section, $assoc_field, $filename_full_path);
     $doc_obj->add_metadata ($section, "srclink",
                 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">");
-    #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");
     $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_");
     $doc_obj->add_metadata ($section, "/srclink", "</a>");
     # add NoText metadata which can be used to suppress the dummy text
     $doc_obj->add_metadata ($section, "NoText", "1");
+    # we have no text - add dummy text and NoText metadata
+    $self->add_dummy_text($doc_obj, $section);
-    return 1;
+}
-# The UnknownPlug read() function. This function does all the right
-# things to make general options work for a given plugin.  UnknownPlug
-# overrides read() because there is no need to read the actual text of
-# the file in, because the contents of the file is not text...
+#
+#
-# Return number of files processed, undef if can't process
+#
-# Note that $base_dir might be "" and that $file might include directories
-sub read {
-    my $self = shift (@_);
-    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
-    my $outhandle = $self->{'outhandle'};
-    # Make sure we're processing the correct file
-    my ($block_status,$filename) = $self->read_block(@_);
-    return $block_status if ((!defined $block_status) || ($block_status==0));
-    print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
-    print $outhandle "UnknownPlug processing \"$filename\"\n"
-        if $self->{'verbosity'} > 1;
-    #if there's a leading directory name, eat it...
-    $file =~ s/^.*[\/\\]//;
-    # create a new document
-    my $doc_obj = new doc ($filename, "indexed_doc");
-    my $top_section = $doc_obj->get_top_section();
-    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});
-    $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
-    $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins
-    $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));
-    # URL metadata (even invalid ones) are used to support internal
-    # links, so even if 'file_is_url' is off, still need to store info
-    my $web_url = "http://$file";
-    $doc_obj->add_metadata($top_section, "URL", $web_url);
-    # associate the file with the document
-    if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
+    {
-    if ($gli) {
-        print STDERR "<ProcessingError n='$file'>\n";
+    }
-    print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";
-    return -1; # error during processing
+    }
-    #create an empty text string so we don't break downstream plugins
-    my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);
-    # include any metadata passed in from previous plugins
-    my $section = $doc_obj->get_top_section();
-    $self->extra_metadata ($doc_obj, $section, $metadata);
-    $self->title_fallback($doc_obj,$section,$file);
-    # do plugin specific processing of doc_obj
-    unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
-    print STDERR "<ProcessingError n='$file'>\n" if ($gli);
-    return -1;
+    }
-    # do any automatic metadata extraction
-    $self->auto_extract_metadata ($doc_obj);
-    # add an OID
-    $doc_obj->set_OID();
-    $doc_obj->add_utf8_text($section, $text);
-    # process the document
-    $processor->process($doc_obj);
-    $self->{'num_processed'} ++;
-    return 1;
+}
-# UnknownPlug processing of doc_obj.  In practice we don't need to do
-# anything here because the read function takes care of everything.
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
-    my $outhandle = $self->{'outhandle'};
     return 1;
+}

gsdl/trunk/perllib/plugins/W3ImagePlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # W3ImgPlug.pm -- Context-based image indexing plugin for HTML documents
+# W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents
+#
 # A component of the Greenstone digital library software
 …
 #  collection builds at the import stage.
+#
 #  W3ImgPlug is a subclass of HTMLPlug (i.e. it will index pages also
 #  if required). It can be used in place of HTMLPlug to index both
+#  W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also
+#  if required). It can be used in place of HTMLPlugin to index both
 #  pages and their images.
+#
 …
 #    ImageMagick can be downloaded from the website above.
 #    Make sure the system path includes the ImageMagick binaries
 #    before using W3ImgPlug.
+#    before using W3ImagePlugin.
+#
 #    NOTE: NT/2000/XP contain a filesystem utility 'convert.exe'
 …
 #   ...
+#
 #   plugin W3ImgPlug -index_pages -aggressiveness 6
+#   plugin W3ImagePlugin -index_pages -aggressiveness 6
+#
 #   ...
 …
+#
 package W3ImgPlug;
 use HTMLPlug;
+package W3ImagePlugin;
+use HTMLPlugin;
 use ghtml;
 use unicode;
 …
 sub BEGIN {
     @W3ImgPlug::ISA = qw( HTMLPlug );
+    @W3ImagePlugin::ISA = qw( HTMLPlugin );
+}
 my $aggressiveness_list =
     [ { 'name' => "1",
     'desc' => "{W3ImgPlug.aggressiveness.1}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.1}" },
       { 'name' => "2",
     'desc' => "{W3ImgPlug.aggressiveness.2}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.2}" },
       { 'name' => "3",
     'desc' => "{W3ImgPlug.aggressiveness.3}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.3}" },
       { 'name' => "4",
     'desc' => "{W3ImgPlug.aggressiveness.4}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.4}" },
       { 'name' => "5",
     'desc' => "{W3ImgPlug.aggressiveness.5}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.5}" },
       { 'name' => "6",
     'desc' => "{W3ImgPlug.aggressiveness.6}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.6}" },
       { 'name' => "7",
     'desc' => "{W3ImgPlug.aggressiveness.7}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.7}" },
       { 'name' => "8",
     'desc' => "{W3ImgPlug.aggressiveness.8}" },
+    'desc' => "{W3ImagePlugin.aggressiveness.8}" },
       { 'name' => "9",
     'desc' => "{W3ImgPlug.aggressiveness.9}" } ];
+    'desc' => "{W3ImagePlugin.aggressiveness.9}" } ];
 my $arguments =
     [ { 'name' => "aggressiveness",
     'desc' => "{W3ImgPlug.aggressiveness}",
+    'desc' => "{W3ImagePlugin.aggressiveness}",
     'type' => "int",
     'list' => $aggressiveness_list,
 …
     'reqd' => "no" },
       { 'name' => "index_pages",
     'desc' => "{W3ImgPlug.index_pages}",
+    'desc' => "{W3ImagePlugin.index_pages}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "no_cache_images",
     'desc' => "{W3ImgPlug.no_cache_images}",
+    'desc' => "{W3ImagePlugin.no_cache_images}",
     'type' => "flag",
     'reqd' => "no" },
       { 'name' => "min_size",
     'desc' => "{W3ImgPlug.min_size}",
+    'desc' => "{W3ImagePlugin.min_size}",
     'type' => "int",
     'deft' => "2000",
     'reqd' => "no" },
       { 'name' => "min_width",
     'desc' => "{W3ImgPlug.min_width}",
+    'desc' => "{W3ImagePlugin.min_width}",
     'type' => "int",
     'deft' => "50",
     'reqd' => "no" },
       { 'name' => "min_height",
     'desc' => "{W3ImgPlug.min_height}",
+    'desc' => "{W3ImagePlugin.min_height}",
     'type' => "int",
     'deft' => "50",
     'reqd' => "no" },
       { 'name' => "thumb_size",
     'desc' => "{W3ImgPlug.thumb_size}",
+    'desc' => "{W3ImagePlugin.thumb_size}",
     'type' => "int",
     'deft' => "100",
     'reqd' => "no" },
       { 'name' => "convert_params",
     'desc' => "{W3ImgPlug.convert_params}",
+    'desc' => "{W3ImagePlugin.convert_params}",
     'type' => "string",
     'deft' => "",
     'reqd' => "no" },
       { 'name' => "min_near_text",
     'desc' => "{W3ImgPlug.min_near_text}",
+    'desc' => "{W3ImagePlugin.min_near_text}",
     'type' => "int",
     'deft' => "10",
     'reqd' => "no" },
       { 'name' => "max_near_text",
     'desc' => "{W3ImgPlug.max_near_text}",
+    'desc' => "{W3ImagePlugin.max_near_text}",
     'type' => "int",
     'deft' => "400",
     'reqd' => "no" },
       { 'name' => "smallpage_threshold",
     'desc' => "{W3ImgPlug.smallpage_threshold}",
+    'desc' => "{W3ImagePlugin.smallpage_threshold}",
     'type' => "int",
     'deft' => "2048",
     'reqd' => "no" },
       { 'name' => "textrefs_threshold",
     'desc' => "{W3ImgPlug.textrefs_threshold}",
+    'desc' => "{W3ImagePlugin.textrefs_threshold}",
     'type' => "int",
     'deft' => "2",
     'reqd' => "no" },
       { 'name' => "caption_length",
     'desc' => "{W3ImgPlug.caption_length}",
+    'desc' => "{W3ImagePlugin.caption_length}",
     'type' => "int",
     'deft' => "80",
     'reqd' => "no" },
       { 'name' => "neartext_length",
     'desc' => "{W3ImgPlug.neartext_length}",
+    'desc' => "{W3ImagePlugin.neartext_length}",
     'type' => "int",
     'deft' => "300",
     'reqd' => "no" },
       { 'name' => "document_text",
     'desc' => "{W3ImgPlug.document_text}",
+    'desc' => "{W3ImagePlugin.document_text}",
     'type' => "flag",
     'reqd' => "no" } ];
 my $options = { 'name'     => "W3ImgPlug",
         'desc'     => "{W3ImgPlug.desc}",
+my $options = { 'name'     => "W3ImagePlugin",
+        'desc'     => "{W3ImagePlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);
     # init class variables
 …
+}
 # if indexing pages, let HTMLPlug do it's stuff
+# if indexing pages, let HTMLPlugin do it's stuff
 # image extraction done through read()
 sub process {
+    my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
+    my $self = shift(@_);
+    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     $self->{'imglist'} = ();
     if ( $self->{'index_pages'} ) {
 …
 # get complex configuration options from configuration files
 # -- $GSDLCOLLECTION/etc/W3ImgPlug.cfg (tag sets for aggr 2+)
+# -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+)
 # -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+)
 # If there's no W3ImgPlug.cfg file we'll use the following default values
+# If there's no W3ImagePlugin.cfg file we'll use the following default values
 my $defaultcfg = '
 <delimitertagset>
 …
     my ($filepath);
     print {$self->{'outhandle'}} "W3ImgPlug: Initialising\n"
+    print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n"
     if $self->{'verbosity'} > 1;
     # etc/W3ImgPlug.cfg (XML)
+    # etc/W3ImagePlugin.cfg (XML)
     # tag sets for captions and neartext
     if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) {
 …
     my ($cfg, @tagsets, $tagset, $type, @delims);
     $filepath = "$collpath/etc/W3ImgPlug.cfg";
+    $filepath = "$collpath/etc/W3ImagePlugin.cfg";
     if ( open CFG, "<$filepath" ) {
         while (<CFG>) { $cfg .= $_ }
 …
     # output a warning if there seem to be no delimiters
     if ( scalar(@{$self->{'cdelims'}} == 0)) {
         print {$self->{'outhandle'}} "W3ImgPlug: Warning: no caption delimiters found in $filepath\n";
+        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n";
+    }
     if ( scalar(@{$self->{'delims'}} == 0)) {
         print {$self->{'outhandle'}} "W3ImgPlug: Warning: no neartext delimiters found in $filepath\n";
+        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n";
+    }
+    }
 …
         close STOPWORDS;
     } else {
         print {$self->{'outhandle'}} "W3ImgPlug: Warning: couldn't open stopwords file at $filepath ($!)\n";
+        print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n";
+    }
 …
     if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) {
     $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33;
     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
+    print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
+    }
     if ( $self->{'caption_length'} > $self->{'max_near_text'} ) {
     $self->{'max_near_text'} = $self->{'caption_length'} * 1.33;
     print {$self->{'outhandle'}} "W3ImgPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";
+    print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n";
+    }
 …
     my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_);
     my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs);
     # forward normal read (runs HTMLPlug if index_pages T)
+    # forward normal read (runs HTMLPlugin if index_pages T)
     my $ok =  $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);
     if ( ! $ok ) { return $ok } # what is this returning??
 …
         ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is );
         if (! defined($imgtag)) { $imgtag = $filepath }
         print $outhandle "W3ImgPlug: extracting $filepath\n"
+        print $outhandle "W3ImagePlugin: extracting $filepath\n"
         if ( $self->{'verbosity'} > 1 );
         $doc_obj = new doc ("", "indexed_doc");
 …
     return $numdocs;
     } else {
     print $outhandle "W3ImgPlug: No images from $file indexed\n"
+    print $outhandle "W3ImagePlugin: No images from $file indexed\n"
         if ( $self->{'verbosity'} > 2 );
     return 1;
 …
     `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp;
     if ( ! (-e $thumbfp) ) {
     print STDERR "W3ImgPlug: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
+    print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;
+    }
 …
     } elsif ( $bestlen[$best1] < $mintext ) {
     # use plain text extraction if tags failed (e.g. usable tag outside context)
     print {$self->{'outhandle'}} "W3ImgPlug: Fallback to plain-text extraction for $tag\n"
+    print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n"
         if $self->{'verbosity'} > 2;
     $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>";
 …
         `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m;
     if (! ($width && $height)) {
         print STDERR "W3ImgPlug: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
+        print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;
+    }
     $filesize = (-s $abspath);
 …
        $imgs->{$filepath}{'filesize'} = $filesize;
        } else {
        print {$self->{'outhandle'}} "W3ImgPlug: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
+       print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"
            if $self->{'verbosity'} > 2;
+       }
 …
+}
 # HTMLPlug only extracts meta-data if it is specified in plugin options
+# HTMLPlugin only extracts meta-data if it is specified in plugin options
 # hence a special function to do it here
 sub get_meta_value {
 …
 # so we can go straight to the image
 # within the cached version of the source page
 # (augment's HTMLPlug sub)
+# (augment's HTMLPlugin sub)
 sub replace_images {
     my $self = shift (@_);

gsdl/trunk/perllib/plugins/WordPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # WordPlug.pm -- plugin for importing Microsoft Word documents
+# WordPlugin.pm -- plugin for importing Microsoft Word documents
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 # 12/05/02 Added usage datastructure - John Thompson
 package WordPlug;
 use ConvertToPlug;
+package WordPlugin;
+use ConvertBinaryFile;
 use strict;
 no strict 'refs'; # allow filehandles to be variables and viceversa
 sub BEGIN {
     @WordPlug::ISA = ('ConvertToPlug');
+    @WordPlugin::ISA = ('ConvertBinaryFile');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "regexp",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" },
       { 'name' => "description_tags",
     'desc' => "{HTMLPlug.description_tags}",
+    'desc' => "{HTMLPlugin.description_tags}",
     'type' => "flag" }
       ];
 my $options = { 'name'     => "WordPlug",
         'desc'     => "{WordPlug.desc}",
+my $options = { 'name'     => "WordPlugin",
+        'desc'     => "{WordPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     if ($ENV{'GSDLOS'} =~ m/^windows$/i) {
     my $ws_arg = [ { 'name' => "windows_scripting",
              'desc' => "{WordPlug.windows_scripting}",
+             'desc' => "{WordPlugin.windows_scripting}",
              'type' => "flag",
                  'reqd' => "no" },
 …
              'deft' => "Title" },
                { 'name' => "level1_header",
              'desc' => "{StructuredHTMLPlug.level1_header}",
+             'desc' => "{StructuredHTMLPlugin.level1_header}",
              'type' => "regexp",
              'reqd' => "no",
              'deft' => "" },
                { 'name' => "level2_header",
              'desc' => "{StructuredHTMLPlug.level2_header}",
+             'desc' => "{StructuredHTMLPlugin.level2_header}",
              'type' => "regexp",
              'reqd' => "no",
              'deft' => "" },
                { 'name' => "level3_header",
              'desc' => "{StructuredHTMLPlug.level3_header}",
+             'desc' => "{StructuredHTMLPlugin.level3_header}",
              'type' => "regexp",
              'reqd' => "no",
              'deft' => "" },
                { 'name' => "title_header",
              'desc' => "{StructuredHTMLPlug.title_header}",
+             'desc' => "{StructuredHTMLPlugin.title_header}",
              'type' => "regexp",
              'reqd' => "no",
              'deft' => "" },
                { 'name' => "delete_toc",
              'desc' => "{StructuredHTMLPlug.delete_toc}",
+             'desc' => "{StructuredHTMLPlugin.delete_toc}",
              'type' => "flag",
              'reqd' => "no",
 …
              'modegli' => "3"},
                { 'name' => "toc_header",
              'desc' => "{StructuredHTMLPlug.toc_header}",
+             'desc' => "{StructuredHTMLPlugin.toc_header}",
              'type' => "regexp",
              'reqd' => "no",
 …
+    }
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists);
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
+    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
     if ($self->{'info_only'}) {
 …
+    }
+    #this is passed through to gsConvert.pl by ConvertToPlug.pm
+    $self->{'filename_extension'} = "doc";
+    $self->{'file_type'} = "Word";
+    #this is passed through to gsConvert.pl by ConvertBinaryFile.pm
     $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'};
 …
     my $secondary_plugin_options = $self->{'secondary_plugin_options'};
     if (defined $self->{'windows_scripting'}) {
     if (!defined $secondary_plugin_options->{'StructuredHTMLPlug'}){
         $secondary_plugin_options->{'StructuredHTMLPlug'} = [];
         my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
+    if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){
+        $secondary_plugin_options->{'StructuredHTMLPlugin'} = [];
+        my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};
         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
+        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
         # to extract these metadata fields from the HEAD META fields
         push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
 …
+    }
+    }
     if (!defined $secondary_plugin_options->{'HTMLPlug'}) {
     $secondary_plugin_options->{'HTMLPlug'} = [];
+    }
     if (!defined $secondary_plugin_options->{'TEXTPlug'}) {
     $secondary_plugin_options->{'TEXTPlug'} = [];
+    }
     my $html_options = $secondary_plugin_options->{'HTMLPlug'};
     my $text_options = $secondary_plugin_options->{'TextPlug'};
     my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
     # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this
+    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) {
+    $secondary_plugin_options->{'HTMLPlugin'} = [];
+    }
+    if (!defined $secondary_plugin_options->{'TextPlugin'}) {
+    $secondary_plugin_options->{'TextPlugin'} = [];
+    }
+    my $html_options = $secondary_plugin_options->{'HTMLPlugin'};
+    my $text_options = $secondary_plugin_options->{'TextPlugin'};
+    my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'};
+    # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this
     push(@$html_options,"-input_encoding", "utf8");
     push(@$html_options,"-extract_language") if $self->{'extract_language'};
     push(@$html_options, "-description_tags") if $self->{'description_tags'};
     # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
+    # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)
     # to extract these metadata fields from the HEAD META fields
     push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
 …
+}
 sub convert_post_process
+sub convert_post_process_old
+{
     my $self = shift (@_);
 …
     # Write it out again!
     #$self->utf8_write_file (\$text, $conv_filename);
+}
-sub get_file_type {
-    my $self = shift (@_);
-    my $file_type = "Word";
-    return $file_type;
+}
 …
+}
-# do plugin specific processing of doc_obj for HTML type
-sub process {
-    my $self = shift (@_);
-    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
-    return $self->process_type("doc", $base_dir, $file, $doc_obj);
+}
 ;

gsdl/trunk/perllib/plugins/ZIPPlugin.pm

-              r15865
+              r15872
 ###########################################################################
+#
 # ZIPPlug.pm --
+# ZIPPlugin.pm --
 # A component of the Greenstone digital library software
 # from the New Zealand Digital Library Project at the
 …
 package ZIPPlug;
+package ZIPPlugin;
 use BasPlug;
+use AbstractPlugin;
 use plugin;
 use util;
 …
 BEGIN {
     @ZIPPlug::ISA = ('BasPlug');
+    @ZIPPlugin::ISA = ('AbstractPlugin');
+}
 my $arguments =
     [ { 'name' => "process_exp",
     'desc' => "{BasPlug.process_exp}",
+    'desc' => "{BasePlugin.process_exp}",
     'type' => "string",
     'deft' => &get_default_process_exp(),
     'reqd' => "no" } ];
 my $options = { 'name'     => "ZIPPlug",
         'desc'     => "{ZIPPlug.desc}",
+my $options = { 'name'     => "ZIPPlugin",
+        'desc'     => "{ZIPPlugin.desc}",
         'abstract' => "no",
         'inherits' => "yes",
 …
     push(@$pluginlist, $class);
     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
+    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
+    push(@{$hashArgOptLists->{"OptList"}},$options);
     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);
+    my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists);
     return bless $self, $class;
 …
     &util::mk_all_dir ($tmpdir);
     print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n"
+    print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n"
     if $self->{'verbosity'} > 1;

Context Navigation

Legend:

Download in other formats: