Changeset 15872

Show
Ignore:
Timestamp:
05.06.2008 09:29:32 (11 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

Location:
gsdl/trunk/perllib/plugins
Files:
49 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/BibTexPlugin.pm

    r15864 r15872  
    11########################################################################### 
    22# 
    3 # BibTexPlug.pm - a plugin for bibliography records in BibTex format 
     3# BibTexPlugin.pm - a plugin for bibliography records in BibTex format 
    44# 
    55# A component of the Greenstone digital library software 
     
    2727 
    2828 
    29 # BibTexPlug reads bibliography files in BibTex format. 
     29# BibTexPlugin reads bibliography files in BibTex format. 
    3030# 
    3131# by Gordon W. Paynter (gwp@cs.waikato.ac.nz), November 2000 
    3232# Based on ReferPlug.  See ReferPlug for geneology. 
    3333# 
    34 # BibTexPlug creates a document object for every reference a the file. 
    35 # It is a subclass of SplitPlug, so if there are multiple records, all 
     34# BibTexPlugin creates a document object for every reference a the file. 
     35# It is a subclass of SplitTextFile, so if there are multiple records, all 
    3636# are read. 
    3737# 
     
    4242 
    4343 
    44 package BibTexPlug; 
    45  
    46 use SplitPlug; 
     44package BibTexPlugin; 
     45 
     46use SplitTextFile; 
    4747use strict; 
    4848no strict 'refs'; # allow filehandles to be variables and viceversa 
    4949 
    50 # BibTexPlug is a sub-class of BasPlug. 
     50# BibTexPlugin is a sub-class of SplitTextFile. 
    5151sub BEGIN { 
    52     @BibTexPlug::ISA = ('SplitPlug'); 
     52    @BibTexPlugin::ISA = ('SplitTextFile'); 
    5353} 
    5454 
    5555my $arguments =  
    5656    [ { 'name' => "process_exp", 
    57     'desc' => "{BasPlug.process_exp}", 
     57    'desc' => "{BasePlugin.process_exp}", 
    5858    'type' => "regexp", 
    5959    'reqd' => "no", 
    6060    'deft' => &get_default_process_exp() }, 
    6161      { 'name' => "split_exp", 
    62     'desc' => "{SplitPlug.split_exp}", 
     62    'desc' => "{SplitTextFile.split_exp}", 
    6363    'type' => "regexp", 
    6464    'deft' => &get_default_split_exp(), 
     
    6666      ]; 
    6767 
    68 my $options = { 'name'     => "BibTexPlug", 
    69         'desc'     => "{BibTexPlug.desc}", 
     68my $options = { 'name'     => "BibTexPlugin", 
     69        'desc'     => "{BibTexPlugin.desc}", 
    7070        'abstract' => "no", 
    7171        'inherits' => "yes", 
     
    8282    return q^\n+(?=@)^; 
    8383} 
     84 
    8485sub new { 
    8586    my ($class) = shift (@_); 
     
    8788    push(@$pluginlist, $class); 
    8889 
    89     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    90     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    91  
    92     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 
     90    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     91    push(@{$hashArgOptLists->{"OptList"}},$options); 
     92 
     93    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    9394 
    9495    return bless $self, $class; 
     
    115116 
    116117    # Report that we're processing the file 
    117     print STDERR "<Processing n='$file' p='BibTexPlug'>\n" if ($gli); 
    118     print $outhandle "BibTexPlug: processing $file\n" 
     118    print STDERR "<Processing n='$file' p='BibTexPlugin'>\n" if ($gli); 
     119    print $outhandle "BibTexPlugin: processing $file\n" 
    119120    if ($self->{'verbosity'}) > 1; 
    120121 
     
    312313            $vonlast=shift @parts; 
    313314            if (scalar(@parts) > 0) { 
    314             print $outhandle "BibTexPlug: couldn't parse name $a\n"; 
     315            print $outhandle "BibTexPlugin: couldn't parse name $a\n"; 
    315316            # but we continue anyway... 
    316317            } 
     
    331332            # some non-English names do start with lowercase 
    332333            # eg "Marie desJardins". Also we can get typos... 
    333             print $outhandle "BibTexPlug: couldn't parse surname $vonlast\n"; 
     334            print $outhandle "BibTexPlugin: couldn't parse surname $vonlast\n"; 
    334335            $von=""; 
    335336            if ($vonlast =~ /^[a-z]+$/) { 
     
    724725        my $replacement=$utf8_chars{$tex}; 
    725726        if (!defined($replacement)) { 
    726         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
     727        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
    727728        $replacement=$char; 
    728729        } 
     
    737738        my $replacement=$special_utf8_chars{$tex}; 
    738739        if (!defined($replacement)) { 
    739         print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
     740        print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
    740741        $replacement=$tex; 
    741742        } 
     
    749750          my $replacement=$special_utf8_chars{$tex}; 
    750751          if (!defined($replacement)) { 
    751           print STDERR "BibTexPlug: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
     752          print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 
    752753          $replacement=$char; 
    753754      } 
  • gsdl/trunk/perllib/plugins/BookPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # BookPlug.pm (formally called HBSPlug) -- plugin for processing simple 
     3# BookPlugin.pm (formally called HBSPlug) -- plugin for processing simple 
    44# html (or text) books 
    55# 
     
    4040# taken as the cover image (jpg files are blocked by this plugin) 
    4141 
    42 # BookPlug is a simplification (and extension) of the HBPlug used 
    43 # by the Humanity Library collections. BookPlug is faster as it expects 
     42# BookPlugin is a simplification (and extension) of the HBPlug used 
     43# by the Humanity Library collections. BookPlugin is faster as it expects 
    4444# the input files to be cleaner (The input to the HDL collections 
    4545# contains lots of excess html tags around <<TOC>> tags, uses <<I>> 
     
    4949# use this plugin instead of HBPlug. 
    5050 
    51 # 12/05/02 Added usage datastructure - John Thompson 
    52  
    53 package BookPlug; 
    54  
    55 use BasPlug; 
     51package BookPlugin; 
     52 
     53use AutoExtractMetadata; 
    5654use util; 
    5755use strict; 
     
    5957 
    6058sub BEGIN { 
    61     @BookPlug::ISA = ('BasPlug'); 
     59    @BookPlugin::ISA = ('AutoExtractMetadata'); 
    6260} 
    6361 
    6462my $arguments =  
    6563    [ { 'name' => "process_exp", 
    66     'desc' => "{BasPlug.process_exp}", 
     64    'desc' => "{BasePlugin.process_exp}", 
    6765    'type' => "regexp", 
    6866    'reqd' => "no", 
    6967    'deft' => &get_default_process_exp() }, 
    7068      { 'name' => "block_exp", 
    71     'desc' => "{BasPlug.block_exp}", 
     69    'desc' => "{BasePlugin.block_exp}", 
    7270    'type' => "regexp", 
    7371    'reqd' => "no", 
    7472    'deft' => &get_default_block_exp() } ]; 
    7573 
    76 my $options = { 'name'     => "BookPlug", 
    77         'desc'     => "{BookPlug.desc}", 
     74my $options = { 'name'     => "BookPlugin", 
     75        'desc'     => "{BookPlugin.desc}", 
    7876        'abstract' => "no", 
    7977        'inherits' => "yes", 
     
    8583    push(@$pluginlist, $class); 
    8684 
    87     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    88     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    89  
    90     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     86    push(@{$hashArgOptLists->{"OptList"}},$options); 
     87 
     88    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists); 
    9189 
    9290    return bless $self, $class; 
     
    111109    my $outhandle = $self->{'outhandle'}; 
    112110 
    113     print STDERR "<Processing n='$file' p='BookPlug'>\n" if ($gli); 
    114     print $outhandle "BookPlug: processing $file\n"  
     111    print STDERR "<Processing n='$file' p='BookPlugin'>\n" if ($gli); 
     112    print $outhandle "BookPlugin: processing $file\n"  
    115113    if $self->{'verbosity'} > 1; 
    116114     
     
    211209    if ($imagetype eq "jpg") {$imagetype = "jpeg";} 
    212210    if ($imagetype !~ /^(jpeg|gif|png)$/) { 
    213     print $outhandle "BookPlug: Warning - unknown image type ($imagetype)\n"; 
     211    print $outhandle "BookPlugin: Warning - unknown image type ($imagetype)\n"; 
    214212    } 
    215213    my ($imagefile) = $link =~ /([^\/]*)$/; 
     
    223221        $foundimage = 1; 
    224222    } else { 
    225         $error = "BookPlug: Warning - couldn't find image file $imagefile in either $filename or"; 
     223        $error = "BookPlugin: Warning - couldn't find image file $imagefile in either $filename or"; 
    226224    } 
    227225    } 
     
    235233        print $outhandle "$error $filename\n"; 
    236234    } else { 
    237         print $outhandle "BookPlug: Warning - couldn't find image file $imagefile in $filename\n"; 
     235        print $outhandle "BookPlugin: Warning - couldn't find image file $imagefile in $filename\n"; 
    238236    } 
    239237    } 
  • gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # CONTENTdmPlug.pm -- reasonably with-it pdf plugin 
     3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2323# 
    2424########################################################################### 
    25 package CONTENTdmPlug; 
    26  
    27 use ConvertToPlug; 
     25package CONTENTdmPlugin; 
     26 
     27use ConvertBinaryFile; 
     28use ReadXMLFile; 
     29 
    2830use unicode; 
    2931use ghtml; 
     
    3537use XMLParser; 
    3638 
     39# inherit ReadXMLFile for the apply_xslt method 
    3740sub BEGIN { 
    38     @CONTENTdmPlug::ISA = ('ConvertToPlug'); 
     41    @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile'); 
    3942} 
    4043 
     
    4245my $convert_to_list = 
    4346    [ { 'name' => "auto", 
    44     'desc' => "{ConvertToPlug.convert_to.auto}" }, 
     47    'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 
    4548      { 'name' => "html", 
    46     'desc' => "{ConvertToPlug.convert_to.html}" }, 
     49    'desc' => "{ConvertBinaryFile.convert_to.html}" }, 
    4750      { 'name' => "text", 
    48     'desc' => "{ConvertToPlug.convert_to.text}" }, 
     51    'desc' => "{ConvertBinaryFile.convert_to.text}" }, 
    4952      { 'name' => "pagedimg", 
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg}"}, 
     53    'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"}, 
    5154      ]; 
    5255 
     
    5659      [ 
    5760       { 'name' => "convert_to", 
    58     'desc' => "{ConvertToPlug.convert_to}", 
     61    'desc' => "{ConvertBinaryFile.convert_to}", 
    5962    'type' => "enum", 
    6063    'reqd' => "yes", 
     
    6265    'deft' => "html" },   
    6366      { 'name' => "xslt", 
    64     'desc' => "{XMLPlug.xslt}", 
     67    'desc' => "{ReadXMLFile.xslt}", 
    6568    'type' => "string", 
    6669    'deft' => "", 
    6770    'reqd' => "no" }, 
    6871       { 'name' => "process_exp", 
    69     'desc' => "{BasPlug.process_exp}", 
     72    'desc' => "{BasePlugin.process_exp}", 
    7073    'type' => "regexp", 
    7174    'deft' => &get_default_process_exp(), 
    7275    'reqd' => "no" }, 
    7376      { 'name' => "block_exp", 
    74     'desc' => "{BasPlug.block_exp}", 
     77    'desc' => "{BasePlugin.block_exp}", 
    7578    'type' => "regexp", 
    7679    'deft' => &get_default_block_exp() } 
    7780]; 
    7881 
    79 my $options = { 'name'     => "CONTENTdmPlug", 
    80         'desc'     => "{CONTENTdmPlug.desc}", 
     82my $options = { 'name'     => "CONTENTdmPlugin", 
     83        'desc'     => "{CONTENTdmPlugin.desc}", 
    8184        'abstract' => "no", 
    8285        'inherits' => "yes", 
    83         # CONTENTdmPlug is one of the few ConvertToPlug subclasses whose source doc can't be replaced by a GS-generated html 
     86        # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html 
    8487        'srcreplaceable' => "no", 
    8588        'args'     => $arguments }; 
    86  
    87 our ($self); 
    8889 
    8990sub new { 
     
    9596    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 
    9697 
    97     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    98     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     98    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     99    push(@{$hashArgOptLists->{"OptList"}},$options); 
    99100 
    100101    my @arg_array = @$inputargs; 
    101     $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs); 
     102    my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists); 
    102103     
    103104    if ($self->{'info_only'}) { 
     
    107108 
    108109    my $parser = new XML::Parser('Style' => 'Stream', 
     110                 'Pkg' => 'CONTENTdmPlugin', 
     111                 'PluginObj' => $self, 
    109112                 'Handlers' => {'Char' => \&Char, 
    110113                        'XMLDecl' => \&XMLDecl, 
     
    119122    $self->{'metadata_value'} = undef; 
    120123 
    121     $self->{'convert_to'} = "PagedImg"; 
     124    $self->{'convert_to'} = "PagedImage"; 
    122125    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    123126 
    124     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 
    125     $secondary_plugin_options->{'PagedImgPlug'} = []; 
    126     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};  
    127     push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    128     push(@$pagedimg_options, "-thumbnail", "-screenview"); 
    129     } 
     127    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 
     128    $secondary_plugin_options->{'PagedImagePlugin'} = []; 
     129    } 
     130    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};  
     131    push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
     132    push(@$pagedimg_options, "-thumbnail", "-screenview"); 
     133 
    130134 
    131135    $self = bless $self, $class; 
     
    140144} 
    141145 
    142 # so we don't inherit HTMLPlug's block exp... 
    143146sub get_default_block_exp { 
    144147    return q^(?i)\.(jpg|jpeg|gif)$^; 
    145 } 
    146  
    147  
    148  
    149  
    150 # A smarter (?) option would be to add XMLPlug into inheritence above 
    151 # thereby avoiding a fair amount of code duplication 
    152  
    153 sub apply_xslt 
    154 { 
    155     my $self = shift @_; 
    156     my ($xslt,$filename) = @_; 
    157      
    158     my $outhandle = $self->{'outhandle'}; 
    159  
    160     my $xslt_filename = $xslt; 
    161  
    162     if (! -e $xslt_filename) { 
    163     # Look in main site directory 
    164     my $gsdlhome = $ENV{'GSDLHOME'}; 
    165     $xslt_filename = &util::filename_cat($gsdlhome,$xslt); 
    166     } 
    167  
    168     if (! -e $xslt_filename) { 
    169     # Look in collection directory 
    170     my $coldir = $ENV{'GSDLCOLLECTDIR'}; 
    171     $xslt_filename = &util::filename_cat($coldir,$xslt); 
    172     } 
    173  
    174     if (! -e $xslt_filename) { 
    175     print $outhandle "Warning: Unable to find XSLT $xslt\n"; 
    176     if (open(XMLIN,"<$filename")) { 
    177  
    178         my $untransformed_xml = ""; 
    179         while (defined (my $line = <XMLIN>)) { 
    180  
    181         $untransformed_xml .= $line; 
    182         } 
    183         close(XMLIN); 
    184          
    185         return $untransformed_xml; 
    186     } 
    187     else { 
    188         print $outhandle "Error: Unable to open file $filename\n"; 
    189         print $outhandle "       $!\n"; 
    190         return ""; 
    191     } 
    192      
    193     } 
    194  
    195     my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java"); 
    196     my $jar_filename = &util::filename_cat($bin_java,"xalan.jar"); 
    197     my $xslt_base_cmd = "java -jar $jar_filename"; 
    198     my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\""; 
    199  
    200     my $transformed_xml = ""; 
    201  
    202     if (open(XSLT_IN,"$xslt_cmd |")) { 
    203     while (defined (my $line = <XSLT_IN>)) { 
    204  
    205         $transformed_xml .= $line; 
    206     } 
    207     close(XSLT_IN); 
    208     } 
    209     else { 
    210     print $outhandle "Error: Unable to run command $xslt_cmd\n"; 
    211     print $outhandle "       $!\n"; 
    212     } 
    213  
    214     return $transformed_xml; 
    215  
    216148} 
    217149 
     
    523455 
    524456 
    525 # Override ConvertToPlug tmp_area_convert_file() to provide solution specific  
     457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific  
    526458# to CONTENTdm 
    527459# 
     
    612544    $self->{'converted_to'} = "HTML"; 
    613545    } elsif ($output_type =~ /te?xt/i) { 
    614     $self->{'converted_to'} = "TEXT"; 
     546    $self->{'converted_to'} = "Text"; 
    615547    } elsif ($output_type =~ /item/i){ 
    616     $self->{'converted_to'} = "PagedImg"; 
     548    $self->{'converted_to'} = "PagedImage"; 
    617549    } 
    618550 
     
    624556 
    625557 
    626 # Override ConvertToPlug read 
     558# Override ConvertBinaryFile read 
    627559# Needed so multiple .item files generate are sent down secondary plugin 
    628560 
     
    640572    my ($block_status,$filename) = $self->read_block(@_); 
    641573    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    642     $file = $self->read_tidy_file($file); 
    643      
    644     # read() deviates at this point from ConvertToPlug 
     574    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
     575        
     576    # read() deviates at this point from ConvertBinaryFile 
    645577    # Need to work with list of filename returned 
    646578 
     
    693625     
    694626    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    695     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 
     627    $self->set_Source_metadata($doc_obj, $filemeta); 
    696628    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    697629    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename)); 
     
    713645 
    714646    # add an OID 
    715     $doc_obj->set_OID(); 
     647    $self->add_OID($doc_obj); 
    716648    # process the document 
    717649    $processor->process($doc_obj); 
     
    723655} 
    724656 
    725  
    726  
    727657sub process { 
    728     my $self = shift (@_); 
    729     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     658 
     659} 
     660# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj?? 
     661sub process_old { 
     662    my $self = shift (@_); 
     663    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    730664 
    731665     
     
    741675} 
    742676 
    743  
    744  
    745  
    746 sub StartDocument {$self->xml_start_document(@_);} 
    747 sub XMLDecl {$self->xml_xmldecl(@_);} 
    748 sub Entity {$self->xml_entity(@_);} 
    749 sub Doctype {$self->xml_doctype(@_);} 
    750 sub StartTag {$self->xml_start_tag(@_);} 
    751 sub EndTag {$self->xml_end_tag(@_);} 
    752 sub Text {$self->xml_text(@_);} 
    753 sub PI {$self->xml_pi(@_);} 
    754 sub EndDocument {$self->xml_end_document(@_);} 
    755 sub Default {$self->xml_default(@_);} 
    756  
    757 # This Char function overrides the one in XML::Parser::Stream to overcome a 
    758 # problem where $expat->{Text} is treated as the return value, slowing 
    759 # things down significantly in some cases. 
    760 sub Char { 
    761     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ 
    762     $_[0]->{'Text'} .= $_[1]; 
    763     return undef; 
    764 } 
    765677 
    766678# Called at the beginning of the XML document. 
     
    772684} 
    773685 
    774 # Called for XML declarations 
    775 sub xml_xmldecl { 
    776     my $self = shift(@_); 
    777     my ($expat, $version, $encoding, $standalone) = @_; 
    778 } 
    779  
    780 # Called for XML entities 
    781 sub xml_entity { 
    782   my $self = shift(@_); 
    783   my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_; 
    784 } 
    785686 
    786687# Called for DOCTYPE declarations - use die to bail out if this doctype 
     
    793694 
    794695    my $outhandle = $self->{'outhandle'}; 
    795     print $outhandle "CONTENTdmPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     696    print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    796697 
    797698} 
     
    873774} 
    874775 
    875 # Called for processing instructions. The $_ variable will contain a copy 
    876 # of the pi. 
    877 sub xml_pi { 
    878     my $self = shift(@_); 
    879     my ($expat, $target, $data) = @_; 
    880 } 
    881  
    882776# Called at the end of the XML document. 
    883777sub xml_end_document { 
     
    887781} 
    888782 
    889 # Called for any characters not handled by the above functions. 
    890 sub xml_default { 
    891     my $self = shift(@_); 
    892     my ($expat, $text) = @_; 
    893 } 
    894  
    895783 
    8967841; 
  • gsdl/trunk/perllib/plugins/CSVPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # CSVPlug.pm -- A plugin for files in comma-separated value format 
     3# CSVPlugin.pm -- A plugin for files in comma-separated value format 
    44# 
    55# A component of the Greenstone digital library software 
     
    2525########################################################################### 
    2626 
    27 package CSVPlug; 
     27package CSVPlugin; 
    2828 
    2929 
    30 use SplitPlug; 
     30use SplitTextFile; 
    3131use strict; 
    3232no strict 'refs'; # allow filehandles to be variables and viceversa 
    3333 
    3434 
    35 # CSVPlug is a sub-class of SplitPlug. 
     35# CSVPlugin is a sub-class of SplitTextFile. 
    3636sub BEGIN { 
    37     @CSVPlug::ISA = ('SplitPlug'); 
     37    @CSVPlugin::ISA = ('SplitTextFile'); 
    3838} 
    3939 
     
    4141my $arguments =  
    4242    [ { 'name' => "process_exp", 
    43     'desc' => "{BasPlug.process_exp}", 
     43    'desc' => "{BasePlugin.process_exp}", 
    4444    'type' => "regexp", 
    4545    'reqd' => "no", 
    4646    'deft' => &get_default_process_exp() }, 
    4747      { 'name' => "split_exp", 
    48     'desc' => "{SplitPlug.split_exp}", 
     48    'desc' => "{SplitTextFile.split_exp}", 
    4949    'type' => "regexp", 
    5050    'reqd' => "no", 
     
    5454 
    5555 
    56 my $options = { 'name'     => "CSVPlug", 
    57         'desc'     => "{CSVPlug.desc}", 
     56my $options = { 'name'     => "CSVPlugin", 
     57        'desc'     => "{CSVPlugin.desc}", 
    5858        'abstract' => "no", 
    5959        'inherits' => "yes", 
     
    8080    push(@$pluginlist, $class); 
    8181 
    82     if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});} 
    83     if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options)}; 
     82    push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); 
     83    push(@{$hashArgOptLists->{"OptList"}}, $options); 
    8484 
    85     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 
     85    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    8686 
    8787    return bless $self, $class; 
     
    9898    open(FILE, $filename); 
    9999    my $reader = new multiread(); 
    100     $reader->set_handle('CSVPlug::FILE'); 
     100    $reader->set_handle('CSVPlugin::FILE'); 
    101101    $reader->set_encoding($encoding); 
    102102    $reader->read_file($textref); 
     
    144144 
    145145    # Report that we're processing the file 
    146     print STDERR "\n<Processing n='$file' p='CSVPlug'>\n" if ($gli); 
    147     print $outhandle "CSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1; 
     146    print STDERR "\n<Processing n='$file' p='CSVPlugin'>\n" if ($gli); 
     147    print $outhandle "CSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1; 
    148148 
    149149    # Add the raw line as the document text 
  • gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # ConvertToRogPlug.pm -- plugin that inherits from RogPlug 
     3# ConvertToRogPlugin.pm -- plugin that inherits from RogPlugin 
    44# 
    55# A component of the Greenstone digital library software 
     
    2626 
    2727 
    28 package ConvertToRogPlug; 
    29  
    30 use BasPlug; 
    31 use RogPlug; 
     28package ConvertToRogPlugin; 
     29 
     30use RogPlugin; 
    3231use strict; 
    3332no strict 'refs'; # allow filehandles to be variables and viceversa 
    3433 
    3534sub BEGIN { 
    36     @ConvertToRogPlug::ISA = ('RogPlug'); 
     35    @ConvertToRogPlugin::ISA = ('RogPlugin'); 
    3736} 
    3837 
    3938my $arguments = [ 
    4039         ]; 
    41 my $options = { 'name'     => "ConvertToRogPlug", 
    42         'desc'     => "{ConvertToRogPlug.desc}", 
     40my $options = { 'name'     => "ConvertToRogPlugin", 
     41        'desc'     => "{ConvertToRogPlugin.desc}", 
    4342        'abstract' => "yes", 
    4443        'inherits' => "yes" }; 
     
    4948    push(@$pluginlist, $class); 
    5049 
    51     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    52     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    53  
    54     my $self = new RogPlug($pluginlist, $inputargs, $hashArgOptLists); 
     50    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     51    push(@{$hashArgOptLists->{"OptList"}},$options); 
     52 
     53    my $self = new RogPlugin($pluginlist, $inputargs, $hashArgOptLists); 
    5554 
    5655    $self->{'convert_to'} = "Rog"; 
     
    171170 
    172171 
    173 # Exact copy of read_rog_record from RogPlug  
     172# Exact copy of read_rog_record from RogPlugin  
    174173# Needed for FILE in right scope 
    175174 
     
    271270} 
    272271 
    273 # Override RogPlug function so rog files are stored as sections (not docs) 
     272# Override RogPlugin function so rog files are stored as sections (not docs) 
    274273 
    275274sub process_rog_record 
     
    307306 
    308307 
    309 # Override BasPlug read 
     308# Override BasePlugin read 
    310309# We don't want to get language encoding stuff until after we've converted 
    311310# our file to Rog format 
     
    347346    $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 
    348347    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    349     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 
     348    $self->set_Source_metadata($doc_obj, $filemeta); 
     349     
    350350    if ($self->{'cover_image'}) { 
    351351    $self->associate_cover_image($doc_obj, $filename); 
     
    436436    my $ret_val = 1;     
    437437 
    438 #   $ret_val = &RogPlug::process($self, $textref, $pluginfo, 
     438#   $ret_val = &RogPlugin::process($self, $textref, $pluginfo, 
    439439#                $tmp_dirname, $tmp_tailname, 
    440440#                $metadata, $doc_obj); 
  • gsdl/trunk/perllib/plugins/DBPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # DBPlug.pm -- plugin to import records from a database 
     3# DBPlugin.pm -- plugin to import records from a database 
    44#  
    55# A component of the Greenstone digital library software 
     
    3434# Mar, Apr 2003 
    3535 
    36 package DBPlug; 
     36package DBPlugin; 
    3737 
    3838use strict; 
    3939no strict 'refs'; # allow variable as a filehandle 
    4040 
    41 use BasPlug; 
     41use AutoExtractMetadata; 
    4242use unicode; 
    4343 
    44 #use DBI; # database independent stuff 
    45  
    4644sub BEGIN { 
    47     @DBPlug::ISA = ('BasPlug'); 
     45    @DBPlugin::ISA = ('AutoExtractMetadata'); 
    4846} 
    4947 
    5048my $arguments = 
    5149    [ { 'name' => "process_exp", 
    52     'desc' => "{BasPlug.process_exp}", 
     50    'desc' => "{AutoExtractMetadata.process_exp}", 
    5351    'type' => "regexp", 
    5452    'deft' => &get_default_process_exp(), 
    5553    'reqd' => "no" }]; 
    5654 
    57 my $options = { 'name'     => "DBPlug", 
    58         'desc'     => "{DBPlug.desc}", 
     55my $options = { 'name'     => "DBPlugin", 
     56        'desc'     => "{DBPlugin.desc}", 
    5957        'abstract' => "no", 
    6058        'inherits' => "yes", 
     
    6664    push(@$pluginlist, $class); 
    6765 
    68     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    69     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    70  
    71     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     66    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     67    push(@{$hashArgOptLists->{"OptList"}},$options); 
     68 
     69    my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists); 
    7270 
    7371    return bless $self, $class; 
     
    7876 
    7977    return q^(?i)\.dbi$^; 
    80 } 
    81 # we don't have a per-greenstone document process() function! 
    82 sub process { 
    83  
    8478} 
    8579 
     
    9589    my $verbosity = $self->{'verbosity'}; 
    9690 
    97     print $outhandle "DBPlug: processing $file\n"  
     91    print $outhandle "DBPlugin: processing $file\n"  
    9892    if $self->{'verbosity'} > 1; 
    9993    
     
    114108    my $db=undef; 
    115109 
    116 # get id of pages from "nonempty", get latest version number from "recent", and 
    117 # then get pagename from "page" and content from "version" ! 
     110    # get id of pages from "nonempty", get latest version number from  
     111    # "recent", and then get pagename from "page" and content from "version" ! 
    118112 
    119113    my $sql_query_prime = undef ; 
     
    126120    # read in config file. 
    127121    if (!open (CONF, $filename)) { 
    128         print $outhandle "DBPlug: can't read $filename: $!\n"; 
     122        print $outhandle "DBPlugin: can't read $filename: $!\n"; 
    129123        return 0; 
    130124    }  
     
    145139        $callback =~ /[\`]|\|\-/) { 
    146140        # no backticks or functions that start new processes allowed 
    147         print $outhandle "DBPlug: bad function in callback\n"; 
     141        print $outhandle "DBPlugin: bad function in callback\n"; 
    148142        return 0; 
    149143        } 
     
    152146        my $ret = eval "\$callbacks{'$fieldname'} = $callback ; 1"; 
    153147        if (!defined($ret)) { 
    154         print $outhandle "DBPlug: error eval'ing callback: $@\n"; 
     148        print $outhandle "DBPlugin: error eval'ing callback: $@\n"; 
    155149        exit(1); 
    156150        } 
    157151        $callback=""; 
    158         print $outhandle "DBPlug: callback registered for '$fieldname'\n" 
     152        print $outhandle "DBPlugin: callback registered for '$fieldname'\n" 
    159153            if $dbplug_debug; 
    160154    } elsif ($callback) { 
     
    176170            chomp $err; 
    177171            $err =~ s/\.$//; # remove a trailing . 
    178             print $outhandle "DBPlug: error evaluating `$statement'\n"; 
     172            print $outhandle "DBPlugin: error evaluating `$statement'\n"; 
    179173            print $outhandle " $err (in $filename)\n"; 
    180174            return 0; # there was an error reading the config file 
     
    185179        $statement = ""; 
    186180        } else { 
    187         print $outhandle "DBPlug: skipping statement `$statement'\n"; 
     181        print $outhandle "DBPlugin: skipping statement `$statement'\n"; 
    188182        } 
    189183        $statement = ""; 
     
    194188     
    195189    if (!defined($db)) { 
    196     print $outhandle "DBPlug: error: $filename does not specify a db!\n"; 
     190    print $outhandle "DBPlugin: error: $filename does not specify a db!\n"; 
    197191    return 0; 
    198192    } 
    199193    if (!defined($sql_query)) { 
    200         print $outhandle "DBPlug: error: no SQL query specified!\n"; 
     194        print $outhandle "DBPlugin: error: no SQL query specified!\n"; 
    201195    return 0; 
    202196    } 
     
    205199 
    206200    if (!defined($dbhandle)) { 
    207     die "DBPlug: could not connect to database, exiting.\n"; 
     201    die "DBPlugin: could not connect to database, exiting.\n"; 
    208202    } 
    209203    if (defined($dbplug_debug) && $dbplug_debug==1) { 
    210     print $outhandle "DBPlug (debug): connected ok\n"; 
     204    print $outhandle "DBPlugin (debug): connected ok\n"; 
    211205    } 
    212206 
     
    238232    if (defined($db_to_greenstone_fields{$fieldname})) { 
    239233        if (defined($dbplug_debug) && $dbplug_debug==1) { 
    240         print $outhandle "DBPlug (debug): mapping db field " 
     234        print $outhandle "DBPlugin (debug): mapping db field " 
    241235            . "'$fieldname' to " 
    242236            . $db_to_greenstone_fields{$fieldname} . "\n"; 
     
    255249    while (scalar(@row_array)) { 
    256250    if (defined($dbplug_debug) && $dbplug_debug==1) { 
    257         print $outhandle "DBPlug (debug): retrieved a row from query\n"; 
     251        print $outhandle "DBPlugin (debug): retrieved a row from query\n"; 
    258252    } 
    259253 
     
    263257    my $cursection = $doc_obj->get_top_section(); 
    264258 
    265     # if $language not set in config file, will use BasPlug's default 
     259    # if $language not set in config file, will use BasePlugin's default 
    266260    if (defined($language)) { 
    267261        $doc_obj->add_utf8_metadata($cursection, "Language", $language); 
    268262    } 
    269     # if $encoding not set in config file, will use BasPlug's default 
     263    # if $encoding not set in config file, will use BasePlugin's default 
    270264    if (defined($encoding)) { 
    271265        # allow some common aliases 
     
    274268        $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding); 
    275269    } 
    276     $doc_obj->add_utf8_metadata($cursection, 
    277                     "Source", &ghtml::dmsafe($db)); 
     270    $self->set_Source_metadata($doc_obj, $db, $encoding); 
     271 
    278272    if ($self->{'cover_image'}) { 
    279273        $self->associate_cover_image($doc_obj, $filename); 
     
    358352    # check "$sth->err" if empty array for error 
    359353    if ($statement_hand->err) { 
    360     print $outhandle "DBPlug: received error: \"" . 
     354    print $outhandle "DBPlugin: received error: \"" . 
    361355        $statement_hand->errstr . "\"\n"; 
    362356    } 
     
    370364 
    371365    if (defined($dbplug_debug) && $dbplug_debug==1) { 
    372         print $outhandle "DBPlug: imported $count DB records as documents.\n"; 
     366        print $outhandle "DBPlugin: imported $count DB records as documents.\n"; 
    373367    } 
    374368    $count; 
  • gsdl/trunk/perllib/plugins/DSpacePlugin.pm

    r15865 r15872  
    22########################################################################### 
    33# 
    4 # DSpacePlug.pm -- plugin for import the collection from DSpace  
     4# DSpacePlugin.pm -- plugin for importing a collection from DSpace  
    55#  
    66# A component of the Greenstone digital library software 
     
    88# University of Waikato, New Zealand. 
    99# 
    10 # Copyright (C) 1999 New Zealand Digital Library Project 
     10# Copyright (C) 2004 New Zealand Digital Library Project 
    1111# 
    1212# This program is free software; you can redistribute it and/or modify 
     
    2626########################################################################### 
    2727 
    28 # DSpace Plug - 10/2004 
    29 #  
    30 # 
     28 
    3129# This plugin takes "contents" and dublin_core.xml file, which contain  
    3230# Metadata and lists of associated files for a particular document 
     
    4745# 
    4846 
    49 package DSpacePlug; 
    50  
    51 use BasPlug; 
     47package DSpacePlugin; 
     48 
     49use BasePlugin; 
    5250use plugin; 
    53 #use ghtml; 
    5451use XMLParser; 
    5552use strict; 
     
    5754 
    5855sub BEGIN { 
    59     @DSpacePlug::ISA = ('BasPlug'); 
     56    @DSpacePlugin::ISA = ('BasePlugin'); 
    6057} 
    6158 
    6259my $arguments = 
    6360    [ { 'name' => "process_exp", 
    64     'desc' => "{BasPlug.process_exp}", 
     61    'desc' => "{BasePlugin.process_exp}", 
    6562    'type' => "string", 
    6663    'deft' => &get_default_process_exp(), 
    6764    'reqd' => "no" }, 
    6865      { 'name' => "only_first_doc", 
    69     'desc' => "{DSpacePlug.only_first_doc}", 
     66    'desc' => "{DSpacePlugin.only_first_doc}", 
    7067    'type' => "flag", 
    7168    'reqd' => "no" }, 
    7269      { 'name' => "first_inorder_ext", 
    73     'desc' => "{DSpacePlug.first_inorder_ext}", 
     70    'desc' => "{DSpacePlugin.first_inorder_ext}", 
    7471    'type' => "string", 
    7572    'reqd' => "no" }, 
    7673      { 'name' => "first_inorder_mime", 
    77     'desc' => "{DSpacePlug.first_inorder_mime}", 
     74    'desc' => "{DSpacePlugin.first_inorder_mime}", 
    7875    'type' => "flag", 
    7976    'reqd' => "no" }, 
    8077      { 'name' => "block_exp", 
    81     'desc' => "{BasPlug.block_exp}", 
     78    'desc' => "{BasePlugin.block_exp}", 
    8279    'type' => "regexp", 
    8380    'deft' => &get_default_block_exp(), 
     
    8582 
    8683 
    87 my $options = { 'name'     => "DSpacePlug", 
    88         'desc'     => "{DSpacePlug.desc}", 
     84my $options = { 'name'     => "DSpacePlugin", 
     85        'desc'     => "{DSpacePlugin.desc}", 
    8986        'inherits' => "yes", 
    9087        'abstract' => "no", 
     
    104101    push(@$pluginlist, $class); 
    105102 
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    108  
    109     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
    110      
     103    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     104    push(@{$hashArgOptLists->{"OptList"}},$options); 
     105 
     106    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
     107     
     108    if ($self->{'info_only'}) { 
     109    # don't worry about creating the XML parser as all we want is the  
     110    # list of plugin options 
     111    return bless $self, $class; 
     112    } 
     113 
    111114    #create XML::Parser object for parsing dublin_core.xml files 
    112115    my $parser = new XML::Parser('Style' => 'Stream', 
     
    252255    } 
    253256     
    254     print $outhandle "DSpacePlug: extracting metadata from $file\n" 
     257    print $outhandle "DSpacePlugin: extracting metadata from $file\n" 
    255258    if $self->{'verbosity'} > 1; 
    256259     
     
    262265     
    263266    if ($@) { 
    264     die "DSpacePlug: ERROR $filename is not a well formed dublin_core.xml file ($@)\n"; 
     267    die "DSpacePlugin: ERROR $filename is not a well formed dublin_core.xml file ($@)\n"; 
    265268    } 
    266269 
     
    287290 
    288291    # Temporarily store associate file info in metadata table 
    289     # This will be removed in 'extra_metadata' in BasPlug and used 
     292    # This will be removed in 'extra_metadata' in BasePlugin and used 
    290293    # to perform the actual file association (once the doc obj has 
    291294    # been formed 
     
    313316 
    314317 
    315 # The DSpacePlug read() function. This function does all the right things 
    316 # to make general options work for a given plugin. It calls the process() 
    317 # function which does all the work specific to a plugin (like the old 
    318 # read functions used to do). Most plugins should define their own  
    319 # process() function and let this read() function keep control. 
    320 # 
    321 # DSpace overrides read() because there is no need to read the actual  
    322 # text of the file in, because the contents of the file is not text... 
    323 # 
    324 # Return number of files processed, undef if can't process 
    325 # Note that $base_dir might be "" and that $file might  
    326 # include directories 
    327  
     318# The DSpacePlugin read() function. We are not actually reading any documents  
     319# here, just blocking ones that have been processed by metadata read. 
     320# 
     321# Returns 0 for a file its blocking, undef for any other 
    328322sub read { 
    329323    my $self = shift (@_); 
     
    340334    return 0 if (defined $self->{'extra_blocks'}->{$filename}); 
    341335    return undef; 
    342 } 
    343  
    344 # do plugin specific processing of doc_obj 
    345 sub process { 
    346     my $self = shift (@_); 
    347     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 
    348     my $outhandle = $self->{'outhandle'}; 
    349      
    350     return 1; 
    351336} 
    352337 
  • gsdl/trunk/perllib/plugins/EmailPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # EMAILPlug.pm - a plugin for parsing email files 
     3# EmailPlugin.pm - a plugin for parsing email files 
    44# 
    55# A component of the Greenstone digital library software 
     
    2727 
    2828 
    29 # EMAILPlug 
     29# EmailPlugin 
    3030# 
    3131# by Gordon Paynter (gwp@cs.waikato.ac.nz) 
     
    6363 
    6464# 12/05/02 Added usage datastructure - John Thompson 
    65 package EMAILPlug; 
     65package EmailPlugin; 
    6666 
    6767use strict; 
     
    6969 
    7070 
    71 use SplitPlug; 
     71use SplitTextFile; 
    7272use unicode;  # gs conv functions 
    7373use gsprintf 'gsprintf'; # translations 
     
    7777 
    7878sub BEGIN { 
    79     @EMAILPlug::ISA = ('SplitPlug'); 
     79    @EmailPlugin::ISA = ('SplitTextFile'); 
    8080} 
    8181 
     
    8383my $arguments =  
    8484    [ { 'name' => "process_exp", 
    85     'desc' => "{BasPlug.process_exp}", 
     85    'desc' => "{BasePlugin.process_exp}", 
    8686    'type' => "regexp", 
    8787    'reqd' => "no", 
    8888    'deft' => &get_default_process_exp() }, 
    8989      { 'name' => "no_attachments", 
    90     'desc' => "{EMAILPlug.no_attachments}", 
     90    'desc' => "{EmailPlugin.no_attachments}", 
    9191    'type' => "flag", 
    9292    'reqd' => "no" }, 
    9393      { 'name' => "headers", 
    94     'desc' => "{EMAILPlug.headers}", 
     94    'desc' => "{EmailPlugin.headers}", 
    9595    'type' => "flag", 
    9696    'reqd' => "no" }, 
    9797      { 'name' => "split_exp", 
    98     'desc' => "{EMAILPlug.split_exp}", 
     98    'desc' => "{EmailPlugin.split_exp}", 
    9999    'type' => "regexp", 
    100100    'reqd' => "no", 
     
    102102      ]; 
    103103 
    104 my $options = { 'name'     => "EMAILPlug", 
    105         'desc'     => "{EMAILPlug.desc}", 
     104my $options = { 'name'     => "EmailPlugin", 
     105        'desc'     => "{EmailPlugin.desc}", 
    106106        'abstract' => "no", 
    107107        'inherits' => "yes", 
    108108        'args'     => $arguments }; 
    109109 
    110 # Create a new EMAILPlug object with which to parse a file. 
    111 # Accomplished by creating a new BasPlug and using bless to  
    112 # turn it into an EMAILPlug. 
     110# Create a new EmailPlugin object with which to parse a file. 
     111# Accomplished by creating a new BasePlugin and using bless to  
     112# turn it into an EmailPlugin. 
    113113 
    114114sub new { 
     
    117117    push(@$pluginlist, $class); 
    118118 
    119     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    120     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    121  
    122     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 
     119    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     120    push(@{$hashArgOptLists->{"OptList"}},$options); 
     121 
     122    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    123123 
    124124    $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber 
     
    166166 
    167167 
    168     print STDERR "<Processing n='$file' p='EMAILPlug'>\n" if ($gli); 
    169  
    170     gsprintf($outhandle, "EMAILPlug: {common.processing} $file\n") 
     168    print STDERR "<Processing n='$file' p='EmailPlugin'>\n" if ($gli); 
     169 
     170    gsprintf($outhandle, "EmailPlugin: {common.processing} $file\n") 
    171171    if $self->{'verbosity'} > 1; 
    172172 
     
    524524        } 
    525525    } else { 
    526         print $outhandle "EMAILPlug: (warning) couldn't parse MIME boundary\n"; 
     526        print $outhandle "EmailPlugin: (warning) couldn't parse MIME boundary\n"; 
    527527    } 
    528528    # parts start with "--$boundary" 
     
    540540    # make sure it is only -- and whitespace 
    541541    if ($last !~ /^\-\-\s*$/ms) { 
    542         print $outhandle "EMAILPlug: (warning) last part of MIME message isn't empty\n"; 
     542        print $outhandle "EmailPlugin: (warning) last part of MIME message isn't empty\n"; 
    543543    } 
    544544    foreach my $message_part (@message_parts) { 
     
    579579        # or it was an empty message... 
    580580        # do nothing... 
    581         gsprintf($outhandle, "{BasPlug.empty_file} - empty body?\n"); 
     581        gsprintf($outhandle, "{BasePlugin.empty_file} - empty body?\n"); 
    582582        } else { 
    583583        $text = $part_text; 
     
    814814        } 
    815815        open (SAVE, ">$tmpdir/$save_filename") || 
    816         warn "EMAILPlug: Can't save attachment as $tmpdir/$save_filename: $!"; 
     816        warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!"; 
    817817        my $part_text = $message_part; 
    818818        $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header 
     
    834834#           &util::rm("$tmpdir/$save_filename"); 
    835835        my $outhandle=$self->{'outhandle'}; 
    836         print $outhandle "EMAILPlug: saving attachment \"$filename\"\n"; # 
     836        print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; # 
    837837         
    838838        # be nice if "download" was a translatable macro :( 
     
    905905        # rfc2045 also allows binary, which we ignore (for now). 
    906906        my $outhandle=$self->{'outhandle'}; 
    907         print $outhandle "EMAILPlug: unknown transfer encoding: $encoding\n"; 
     907        print $outhandle "EmailPlugin: unknown transfer encoding: $encoding\n"; 
    908908        return ""; 
    909909    } 
     
    10671067      if ($badbytesfound==1) { 
    10681068          # claims to be utf8, but it isn't! 
    1069           print $outhandle "EMAILPlug: Headers claim utf-8 but bad bytes " 
     1069          print $outhandle "EmailPlugin: Headers claim utf-8 but bad bytes " 
    10701070          . "detected and removed.\n"; 
    10711071 
     
    10921092      # 1252 has characters between 0x80 and 0x9f, 8859-1 doesn't 
    10931093      if ($$textref =~ m/[\x80-\x9f]/) { 
    1094       print $outhandle "EMAILPlug: Headers claim ISO charset but MS "; 
     1094      print $outhandle "EmailPlugin: Headers claim ISO charset but MS "; 
    10951095      print $outhandle "codepage 1252 detected.\n"; 
    10961096      $charset = "windows_1252"; 
     
    11061106      # characters out here if this causes problems... 
    11071107      my $outhandle=$self->{'outhandle'}; 
    1108       print $outhandle "EMAILPlug: falling back to iso-8859-1\n"; 
     1108      print $outhandle "EmailPlugin: falling back to iso-8859-1\n"; 
    11091109      $$textref=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1",$textref)); 
    11101110 
  • gsdl/trunk/perllib/plugins/ExcelPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # ExcelPlug.pm -- plugin for importing Microsoft Excel files. 
     3# ExcelPlugin.pm -- plugin for importing Microsoft Excel files. 
    44#  (currently only versions 95 and 97) 
    55# 
     
    2626########################################################################### 
    2727 
    28 package ExcelPlug; 
     28package ExcelPlugin; 
    2929 
    30 use ConvertToPlug; 
     30use ConvertBinaryFile; 
    3131use strict; 
    3232no strict 'refs'; # allow filehandles to be variables and viceversa 
    3333 
    3434sub BEGIN { 
    35     @ExcelPlug::ISA = ('ConvertToPlug'); 
     35    @ExcelPlugin::ISA = ('ConvertBinaryFile'); 
    3636} 
    3737 
    3838my $arguments =  
    3939    [ { 'name' => "process_exp", 
    40     'desc' => "{BasPlug.process_exp}", 
     40    'desc' => "{BasePlugin.process_exp}", 
    4141    'type' => "regexp", 
    4242    'reqd' => "no", 
     
    4444      ]; 
    4545 
    46 my $options = { 'name'     => "ExcelPlug", 
    47         'desc'     => "{ExcelPlug.desc}", 
     46my $options = { 'name'     => "ExcelPlugin", 
     47        'desc'     => "{ExcelPlugin.desc}", 
    4848        'abstract' => "no", 
    4949        'inherits' => "yes", 
     
    5959    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    6060     
    61     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists); 
     61    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 
    6262 
    6363    if ($self->{'info_only'}) { 
     
    6666    } 
    6767 
     68    $self->{'filename_extension'} = "xls"; 
     69    $self->{'file_type'} = "Excel"; 
     70 
    6871    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    69     if (!defined $secondary_plugin_options->{'HTMLPlug'}) { 
    70     $secondary_plugin_options->{'HTMLPlug'} = []; 
     72    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 
     73    $secondary_plugin_options->{'HTMLPlugin'} = []; 
    7174    } 
    72     my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
     75    my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 
    7376     
    74     #$self->{'input_encoding'} = "utf8"; 
    75     #$self->{'extract_language'} = 1; 
    7677    push(@$html_options, "-input_encoding", "utf8"); 
    7778    push(@$html_options,"-extract_language") if $self->{'extract_language'}; 
     
    8283} 
    8384 
    84 sub convert_post_process 
     85sub convert_post_process_old 
    8586{ 
    8687    my $self = shift (@_); 
     
    107108} 
    108109     
    109 sub process { 
    110     my $self = shift (@_); 
    111     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    112  
    113     return $self->process_type("xls",$base_dir,$file,$doc_obj); 
    114 } 
    115110 
    1161111; 
  • gsdl/trunk/perllib/plugins/FOXPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # FOXPlug.pm 
     3# FOXPlugin.pm 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2929# the appropriate fields in the file. 
    3030 
    31 # 12/05/02 Added usage datastructure - John Thompson 
    32  
    33 package FOXPlug; 
    34  
    35 use BasPlug; 
     31package FOXPlugin; 
     32 
     33use BasePlugin; 
    3634use util; 
    3735use doc; 
    3836use unicode; 
    39 use cnseg; 
    40 # use gb; 
    4137 
    4238use strict; 
     
    4541 
    4642sub BEGIN { 
    47     @FOXPlug::ISA = ('BasPlug'); 
     43    @FOXPlugin::ISA = ('BasePlugin'); 
    4844} 
    4945 
    5046my $arguments = 
    5147    [ { 'name' => "process_exp", 
    52     'desc' => "{BasPlug.process_exp}", 
     48    'desc' => "{BasePlugin.process_exp}", 
    5349    'type' => "regexp", 
    5450    'reqd' => "no", 
    5551    'deft' => &get_default_process_exp() }, 
    5652      { 'name' => "block_exp", 
    57     'desc' => "{BasPlug.block_exp}", 
     53    'desc' => "{BasePlugin.block_exp}", 
    5854    'type' => "regexp", 
    5955    'reqd' => "no", 
    6056    'deft' => &get_default_block_exp() } ]; 
    6157 
    62 my $options = { 'name'     => "FOXPlug", 
    63         'desc'     => "{FOXPlug.desc}", 
     58my $options = { 'name'     => "FOXPlugin", 
     59        'desc'     => "{FOXPlugin.desc}", 
    6460        'abstract' => "no", 
    6561        'inherits' => "yes", 
     
    7167    push(@$pluginlist, $class); 
    7268 
    73     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    74     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    75  
    76     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     69    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     70    push(@{$hashArgOptLists->{"OptList"}},$options); 
     71 
     72    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    7773 
    7874    return bless $self, $class; 
     
    10399    return $block_status if ((!defined $block_status) || ($block_status==0)); 
    104100 
    105     print STDERR "<Processing n='$file' p='FOXPlug'>\n" if ($gli); 
    106     print STDERR "FOXPlug: processing $file\n" if $self->{'verbosity'} > 1; 
     101    print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli); 
     102    print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1; 
    107103 
    108104    my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; 
     
    113109        print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n"; 
    114110    } 
    115     print STDERR "FOXPlug::read - couldn't read $fullname\n"; 
     111    print STDERR "FOXPlugin::read - couldn't read $fullname\n"; 
    116112    return -1; # error in processing 
    117113    } 
     
    125121        print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n"; 
    126122    } 
    127     print STDERR "FOXPlug::read - eof while reading database header\n"; 
     123    print STDERR "FOXPlugin::read - eof while reading database header\n"; 
    128124    close (FOXBASEIN); 
    129125    return -1; 
     
    145141        print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n"; 
    146142    } 
    147     print STDERR "FOXPlug:read - $fullname doesn't seem to be a Foxbase file\n"; 
     143    print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n"; 
    148144    return -1; 
    149145    } 
     
    177173        print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n"; 
    178174    } 
    179     print STDERR "FOXPlug::read - couldn't read $dbtfullname\n"; 
     175    print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n"; 
    180176    close (FOXBASEIN); 
    181177    return -1; 
  • gsdl/trunk/perllib/plugins/FavouritesPlugin.pm

    r15865 r15872  
    2828# especially SRCPlug by John McPherson Nov 2000 
    2929 
    30 package FavouritesPlug; 
     30package FavouritesPlugin; 
    3131 
    32 use BasPlug; 
     32use ReadTextFile; 
    3333use strict; 
    3434no strict 'refs'; # allow filehandles to be variables and viceversa 
    3535 
    3636sub BEGIN { 
    37     @FavouritesPlug::ISA = ('BasPlug'); 
     37    @FavouritesPlugin::ISA = ('ReadTextFile'); 
    3838} 
    3939 
    4040my $arguments = 
    4141    [ { 'name' => "process_exp", 
    42     'desc' => "{BasPlug.process_exp}", 
     42    'desc' => "{ReadTextFile.process_exp}", 
    4343    'type' => "regexp", 
    4444    'deft' => &get_default_process_exp(), 
    4545    'reqd' => "no" } ]; 
    4646 
    47 my $options = { 'name'     => "FavouritesPlug", 
    48         'desc'     => "FavouritesPlug imports Internet Explorer style Favourites. Favourites are often found in the \"C:\\Documents and Settings\\[your username]\\Favorites\" folder on your computer, but can also be made by dragging a bookmark or location from your browser (any) to the desktop.", 
     47my $options = { 'name'     => "FavouritesPlugin", 
     48        'desc'     => "{FavouritesPlugin.desc}", 
    4949        'abstract' => "no", 
    5050        'inherits' => "yes", 
     
    5757    push(@$pluginlist, $class); 
    5858 
    59     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    60     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     60    push(@{$hashArgOptLists->{"OptList"}},$options); 
    6161 
    62     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     62    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    6363 
    6464    return bless $self, $class; 
     
    8080 
    8181    my $section = $doc_obj->get_top_section(); 
    82     print STDERR "<Processing n='$file' p='FavouritesPlug'>\n" if ($gli); 
    83     print $outhandle "FavouritesPlug: processing $file\n" if $self->{'verbosity'} > 1; 
     82    print STDERR "<Processing n='$file' p='FavouritesPlugin'>\n" if ($gli); 
     83    print $outhandle "FavouritesPlugin: processing $file\n" if $self->{'verbosity'} > 1; 
    8484 
    8585    # don't want mg to turn escape chars into actual values 
  • gsdl/trunk/perllib/plugins/GAPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # GAPlug.pm 
     3# GAPlugin.pm 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2929# to their DTD. 
    3030 
    31 package GAPlug; 
    32  
    33 use XMLPlug; 
     31package GAPlugin; 
     32 
     33use ReadXMLFile; 
    3434 
    3535use strict; 
     
    3737 
    3838sub BEGIN { 
    39     @GAPlug::ISA = ('XMLPlug'); 
     39    @GAPlugin::ISA = ('ReadXMLFile'); 
    4040} 
    4141 
     
    4949my $arguments = 
    5050    [ { 'name' => "process_exp", 
    51     'desc' => "{BasPlug.process_exp}", 
     51    'desc' => "{BasePlugin.process_exp}", 
    5252    'type' => "regexp", 
    5353    'deft' => &get_default_process_exp(), 
    5454    'reqd' => "no" } ]; 
    5555 
    56 my $options = { 'name'     => "GAPlug", 
    57         'desc'     => "{GAPlug.desc}", 
     56my $options = { 'name'     => "GAPlugin", 
     57        'desc'     => "{GAPlugin.desc}", 
    5858        'abstract' => "no", 
    5959        'inherits' => "yes", 
     
    6565    push(@$pluginlist, $class); 
    6666 
    67     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    68     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    69  
    70     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     67    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     68    push(@{$hashArgOptLists->{"OptList"}},$options); 
     69 
     70    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    7171 
    7272    $self->{'section'} = ""; 
     
    106106 
    107107    my $outhandle = $self->{'outhandle'}; 
    108     print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    109     print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'}; 
     108    print $outhandle "GAPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     109    print STDERR "<Processing n='$self->{'file'}' p='GAPlugin'>\n" if $self->{'gli'}; 
    110110 
    111111} 
  • gsdl/trunk/perllib/plugins/GISExtractor.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # GISBasPlug.pm -- base class to enhance plugins with GIS capabilities 
     3# GISExtractor.pm -- extension base class to enhance plugins with GIS capabilities 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2424########################################################################### 
    2525 
    26 package GISBasPlug; 
     26package GISExtractor; 
     27 
     28use PrintInfo; 
    2729 
    2830use util; 
    29 use locale; 
    3031 
    3132use gsprintf 'gsprintf'; 
     
    3334no strict 'refs'; # allow filehandles to be variables and viceversa 
    3435no strict 'subs'; 
     36 
    3537#field categories in DataBase files 
    3638#$LAT = 3; 
     
    4244 
    4345BEGIN { 
    44     die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 
    45 } 
    46  
    47  
    48 use BasPlug; # uses BasPlug, but is not inherited 
    49  
    50  
    51 my $options = { 'name'     => "GISBasPlug", 
    52         'desc'     => "{GISBasPlug.desc}", 
     46    @GISExtractor::ISA = ('PrintInfo'); 
     47} 
     48 
     49 
     50my $arguments =  
     51    [ { 'name' => "extract_placenames", 
     52    'desc' => "{GISExtractor.extract_placenames}", 
     53    'type' => "flag", 
     54    'reqd' => "no" }, 
     55      { 'name' => "gazetteer", 
     56    'desc' => "{GISExtractor.gazetteer}", 
     57    'type' => "string", 
     58    'reqd' => "no" }, 
     59      { 'name' => "place_list", 
     60    'desc' => "{GISExtractor.place_list}", 
     61    'type' => "flag", 
     62    'reqd' => "no" } ]; 
     63 
     64 
     65my $options = { 'name'     => "GISExtractor", 
     66        'desc'     => "{GISExtractor.desc}", 
    5367        'abstract' => "yes", 
    54         'inherits' => "no" }; 
     68        'inherits' => "yes", 
     69        'args' => $arguments }; 
    5570 
    5671 
    5772sub new { 
    58     my $class = shift (@_); 
    59     my $plugin_name = shift (@_); 
    60  
    61     my $self = {}; 
    62     $self->{'plugin_type'} = "GISBasPlug"; 
    63  
    64     $self->{'option_list'} = [ $options ]; 
     73    my ($class) = shift (@_); 
     74    my ($pluginlist,$inputargs,$hashArgOptLists) = @_; 
     75    push(@$pluginlist, $class); 
     76 
     77    # can we indicate that these are not available if the map data is not there?? 
     78    #if (has_mapdata()) { 
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     80    push(@{$hashArgOptLists->{"OptList"}},$options); 
     81    #} 
     82    my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists); 
     83 
     84    if ($self->{'extract_placenames'}) { 
     85 
     86    my $outhandle = $self->{'outhandle'}; 
     87     
     88    my $places_ref  
     89        = $self->loadGISDatabase($outhandle,$self->{'gazetteer'}); 
     90     
     91    if (!defined $places_ref) { 
     92        print $outhandle "Warning: Error loading mapdata gazetteer \"$self->{'gazetteer'}\"\n"; 
     93        print $outhandle "         No placename extraction will take place.\n"; 
     94        $self->{'extract_placenames'} = undef; 
     95    } 
     96    else { 
     97        $self->{'places'} = $places_ref; 
     98    } 
     99    } 
    65100 
    66101    return bless $self, $class; 
    67 } 
    68  
    69 sub init { 
    70 } 
    71  
    72 sub print_xml_usage 
     102 
     103} 
     104 
     105 
     106sub extract_gis_metadata 
    73107{ 
    74     BasPlug::print_xml_usage(@_); 
    75 } 
    76  
    77 sub print_xml 
    78 { 
    79     BasPlug::print_xml(@_); 
    80 } 
    81  
    82 sub print_txt_usage 
    83 { 
    84    BasPlug::print_txt_usage(@_); 
    85 } 
    86  
    87 sub determine_description_offset 
    88 { 
    89     BasPlug::determine_description_offset(@_); 
    90 } 
    91 sub print_plugin_usage 
    92 { 
    93     my $plugindesc = $options->{'desc'}; 
    94  
    95     if (defined($plugindesc)) { 
    96     gsprintf(STDERR, "$plugindesc\n\n"); 
    97     } 
    98   
    99 } 
    100  
    101 sub set_incremental 
    102 { 
    103     BasPlug::set_incremental(@_); 
     108    my $self = shift (@_); 
     109    my ($doc_obj) = @_; 
     110     
     111    if ($self->{'extract_placenames'}) { 
     112    my $thissection = $doc_obj->get_top_section(); 
     113    while (defined $thissection) { 
     114        my $text = $doc_obj->get_text($thissection); 
     115        $self->extract_placenames (\$text, $doc_obj, $thissection) if $text =~ /./; 
     116        $thissection = $doc_obj->get_next_section ($thissection); 
     117    } 
     118    }  
     119 
    104120} 
    105121 
     
    255271    $doc_obj->associate_file($tempfile, "places.txt", "text/plain"); 
    256272    $self->{'places_filename'} = $tempfile; 
     273     
    257274    my %countries = (); 
    258275     
     
    283300     
    284301    #this line removes apostrophes from placenames (they break the javascript function) 
    285     $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; 
     302    $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; #' (to get emacs colours back) 
    286303         
    287304    #for displaying map of document, count num of places from each country 
     
    314331    if ($self->{'verbosity'} > 2); 
    315332} 
     333 
     334sub clean_up_temp_files { 
     335    my $self = shift(@_); 
     336     
     337    if(defined($self->{'places_filename'}) && -e $self->{'places_filename'}){ 
     338    &util::rm($self->{'places_filename'}); 
     339    } 
     340    $self->{'places_filename'} = undef; 
     341 
     342} 
  • gsdl/trunk/perllib/plugins/GMLPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # GMLPlug.pm -- 
     3# GMLPlugin.pm -- 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2929# 12/05/02 Added usage datastructure - John Thompson 
    3030 
    31 package GMLPlug; 
    32  
    33 use BasPlug; 
     31package GMLPlugin; 
     32 
     33use BasePlugin; 
    3434use util; 
    3535use doc; 
     
    3939 
    4040sub BEGIN { 
    41     @GMLPlug::ISA = ('BasPlug'); 
     41    @GMLPlugin::ISA = ('BasePlugin'); 
    4242} 
    4343 
    4444my $arguments = 
    4545    [ { 'name' => "process_exp", 
    46     'desc' => "{BasPlug.process_exp}", 
     46    'desc' => "{BasePlugin.process_exp}", 
    4747    'type' => "regexp", 
    4848    'deft' =>  &get_default_process_exp() }  
    4949    ]; 
    5050 
    51 my $options = { 'name'     => "GMLPlug", 
    52         'desc'     => "{GMLPlug.desc}", 
     51my $options = { 'name'     => "GMLPlugin", 
     52        'desc'     => "{GMLPlugin.desc}", 
    5353        'abstract' => "no", 
    5454        'inherits' => "yes", 
     
    6060    push(@$pluginlist, $class); 
    6161 
    62     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    63     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    64  
    65     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     62    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     63    push(@{$hashArgOptLists->{"OptList"}},$options); 
     64 
     65    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    6666 
    6767    return bless $self, $class; 
     
    8888    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    8989 
    90     print STDERR "<Processing n='$file' p='GMLPlug'>\n" if ($gli); 
    91     print $outhandle "GMLPlug: processing $file\n"; 
     90    print STDERR "<Processing n='$file' p='GMLPlugin'>\n" if ($gli); 
     91    print $outhandle "GMLPlugin: processing $file\n"; 
    9292 
    9393    my $parent_dir = $file; 
     
    9999        print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n"; 
    100100    } 
    101     print $outhandle "GMLPlug::read - couldn't read $filename\n"; 
     101    print $outhandle "GMLPlugin::read - couldn't read $filename\n"; 
    102102    return -1; 
    103103    } 
     
    130130        if ($gml =~ /^\s*([^>]*)>(.*)$/so) { 
    131131            $tags = $1 if defined $1; 
    132             $text = &GMLPlug::_unescape_text($2); 
     132            $text = &GMLPlugin::_unescape_text($2); 
    133133 
    134134        } else { 
    135             print $outhandle "GMLPlug::read - error in file $filename\n"; 
     135            print $outhandle "GMLPlugin::read - error in file $filename\n"; 
    136136            print $outhandle "text: \"$gml\"\n"; 
    137137            last; 
     
    158158        # could be stored as either attributes or .... 
    159159        while ((defined $tags) && ($tags =~ s/^\s*(\S+)=\"([^\"]*)\"//o)) { 
    160             $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))  
     160            $doc_obj->add_utf8_metadata($section, $1, &GMLPlugin::_unescape_text($2))  
    161161            if (defined $1 and defined $2); 
    162162 
     
    183183                $tagname =~ s/^&\#47;/\//; 
    184184                 
    185                 $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlug::_unescape_text($tagvalue));  
     185                $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlugin::_unescape_text($tagvalue));  
    186186            } 
    187187            } 
  • gsdl/trunk/perllib/plugins/HBPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # HBPlug.pm -- 
     3# HBPlugin.pm -- 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    3838# Humanity Library collections 
    3939 
    40 package HBPlug; 
     40package HBPlugin; 
    4141 
    4242use ghtml; 
    43 use BasPlug; 
     43use BasePlugin; 
    4444use unicode; 
    4545use util; 
     
    5050 
    5151sub BEGIN { 
    52     @HBPlug::ISA = ('BasPlug'); 
    53 } 
    54  
     52    @HBPlugin::ISA = ('BasePlugin'); 
     53} 
     54my $encoding_list =      
     55    [ { 'name' => "ascii", 
     56    'desc' => "{ReadTextFile.input_encoding.ascii}" }, 
     57      { 'name' => "iso_8859_1", 
     58    'desc' => "Latin1 (western languages)" } ]; 
     59  
    5560my $arguments =  
    5661    [ { 'name' => "process_exp", 
    57     'desc' => "{BasPlug.process_exp}", 
     62    'desc' => "{BasePlugin.process_exp}", 
    5863    'type' => "regexp", 
    5964    'reqd' => "no", 
    60     'deft' => &get_default_process_exp() } 
     65    'deft' => &get_default_process_exp() }, 
     66      { 'name' => "input_encoding", 
     67    'desc' => "{ReadTextFile.input_encoding}", 
     68    'type' => "enum", 
     69    'deft' => "iso_8859_1", 
     70    'list' => $encoding_list, 
     71    'reqd' => "no" } 
    6172      ]; 
    6273 
    63 my $options = { 'name'     => "HBPlug", 
    64         'desc'     => "{HBPlug.desc}", 
     74my $options = { 'name'     => "HBPlugin", 
     75        'desc'     => "{HBPlugin.desc}", 
    6576        'abstract' => "no", 
    6677        'inherits' => "yes", 
     
    7283    push(@$pluginlist, $class); 
    7384 
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    76  
    77     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     86    push(@{$hashArgOptLists->{"OptList"}},$options); 
     87 
     88    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    7889 
    7990    return bless $self, $class; 
    8091} 
    8192 
    82 sub init { 
    83     my $self = shift (@_); 
    84     my ($verbosity, $outhandle) = @_; 
    85  
    86     $self->BasPlug::init($verbosity, $outhandle); 
    87     $self->{'input_encoding'} = "iso_8859_1"; 
    88  
    89     # this plugin only handles ascii encodings 
    90     if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) { 
    91     die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" . 
    92         $self->{'input_encoding'} . " is not an acceptable input_encoding value\n"; 
    93     }  
    94 } 
    95  
    9693# this is included only to prevent warnings being printed out  
    97 # from BasPlug::init. The process_exp is not used by this plugin 
     94# from BasePlugin::init. The process_exp is not used by this plugin 
    9895sub get_default_process_exp { 
    9996    my $self = shift (@_); 
     
    148145    if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) { 
    149146        my $font = $1; 
    150         print $outhandle "HBPlug::HB_gettext - warning removed font $font\n"  
     147        print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n"  
    151148        if ($font !~ /^arial$/i); 
    152149    } 
     
    217214} 
    218215 
    219 # if input_encoding is ascii we can call add_utf8_metadata 
    220 # directly but if it's iso_8859_1 (the default) we need to call 
    221 # add_metadata so that the ascii2utf8 conversion is done first 
    222 # this should speed things up a little if processing an ascii only 
    223 # document with input_encoding set to ascii 
    224 sub HB_add_metadata { 
    225     my $self = shift (@_); 
    226     my ($doc_obj, $cursection, $field, $value) = @_; 
    227  
    228 # All text should now be in utf-8 
    229 #    if ($self->{'input_encoding'} eq "ascii") { 
    230     $doc_obj->add_utf8_metadata ($cursection, $field, $value); 
    231 #    } else { 
    232 #   $doc_obj->add_metadata ($cursection, $field, $value); 
    233 #    } 
    234 } 
    235  
    236216# return number of files processed, undef if can't process 
    237217# Note that $base_dir might be "" and that $file might  
     
    251231    return undef unless -e $htmlfile; 
    252232 
    253     print STDERR "<Processing n='$file' p='HBPlug'>\n" if ($gli); 
    254     print $outhandle "HBPlug: processing $file\n"; 
     233    print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli); 
     234    print $outhandle "HBPlugin: processing $file\n"; 
    255235 
    256236    # read in the file and do basic html cleaning (removing header etc) 
     
    276256    # $metadata->{$field} may be an array reference 
    277257    if (ref ($metadata->{$field}) eq "ARRAY") { 
    278         map {  
    279         $self->HB_add_metadata ($doc_obj, $cursection, $field, $_);  
     258        map { 
     259        $doc_obj->add_utf8_metadata($cursection, $field, $_); 
    280260        } @{$metadata->{$field}}; 
    281261    } else { 
    282         $self->HB_add_metadata ($doc_obj, $cursection, $field, $metadata->{$field});  
     262        $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field});  
    283263    } 
    284264    } 
     
    321301 
    322302        # add the metadata to this section 
    323         $self->HB_add_metadata ($doc_obj, $cursection, "Title", $title); 
     303        $doc_obj->add_utf8_metadata($cursection, "Title", $title); 
    324304 
    325305        # clean up the section html 
     
    332312 
    333313        # add the text for this section 
    334 # All read text should now be in utf-8 
    335 #       if ($self->{'input_encoding'} eq "ascii") { 
    336314        $doc_obj->add_utf8_text ($cursection, $sectiontext); 
    337 #       } else { 
    338 #       $doc_obj->add_text ($cursection, $sectiontext); 
    339 #       } 
    340315    } else { 
    341316        print $outhandle "WARNING - leftover text\n" , $self->shorten($html),  
  • gsdl/trunk/perllib/plugins/HTMLPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # HTMLPlug.pm -- basic html plugin 
     3# HTMLPlugin.pm -- basic html plugin 
    44# 
    55# A component of the Greenstone digital library software 
     
    3434# 
    3535 
    36 package HTMLPlug; 
    37  
    38 use BasPlug; 
     36package HTMLPlugin; 
     37 
     38use ReadTextFile; 
     39use HBPlugin; 
    3940use ghtml; 
    4041use unicode; 
     
    4647 
    4748sub BEGIN { 
    48     @HTMLPlug::ISA = ('BasPlug'); 
     49    @HTMLPlugin::ISA = ('ReadTextFile', 'HBPlugin'); 
    4950} 
    5051 
     
    5455my $arguments = 
    5556    [ { 'name' => "process_exp", 
    56     'desc' => "{BasPlug.process_exp}", 
     57    'desc' => "{BasePlugin.process_exp}", 
    5758    'type' => "regexp", 
    5859    'deft' =>  &get_default_process_exp() }, 
    5960      { 'name' => "block_exp", 
    60     'desc' => "{BasPlug.block_exp}", 
     61    'desc' => "{BasePlugin.block_exp}", 
    6162    'type' => 'regexp', 
    6263    'deft' =>  &get_default_block_exp() }, 
    6364      { 'name' => "nolinks", 
    64     'desc' => "{HTMLPlug.nolinks}", 
     65    'desc' => "{HTMLPlugin.nolinks}", 
    6566    'type' => "flag" }, 
    6667      { 'name' => "keep_head", 
    67     'desc' => "{HTMLPlug.keep_head}", 
     68    'desc' => "{HTMLPlugin.keep_head}", 
    6869    'type' => "flag" }, 
    6970      { 'name' => "no_metadata", 
    70     'desc' => "{HTMLPlug.no_metadata}", 
     71    'desc' => "{HTMLPlugin.no_metadata}", 
    7172    'type' => "flag" }, 
    7273      { 'name' => "metadata_fields", 
    73     'desc' => "{HTMLPlug.metadata_fields}", 
     74    'desc' => "{HTMLPlugin.metadata_fields}", 
    7475    'type' => "string", 
    7576    'deft' => "Title" }, 
    7677      { 'name' => "hunt_creator_metadata", 
    77     'desc' => "{HTMLPlug.hunt_creator_metadata}", 
     78    'desc' => "{HTMLPlugin.hunt_creator_metadata}", 
    7879    'type' => "flag" }, 
    7980      { 'name' => "file_is_url", 
    80     'desc' => "{HTMLPlug.file_is_url}", 
     81    'desc' => "{HTMLPlugin.file_is_url}", 
    8182    'type' => "flag" }, 
    8283      { 'name' => "assoc_files", 
    83     'desc' => "{HTMLPlug.assoc_files}", 
     84    'desc' => "{HTMLPlugin.assoc_files}", 
    8485    'type' => "regexp", 
    8586    'deft' => &get_default_block_exp() }, 
    8687      { 'name' => "rename_assoc_files", 
    87     'desc' => "{HTMLPlug.rename_assoc_files}", 
     88    'desc' => "{HTMLPlugin.rename_assoc_files}", 
    8889    'type' => "flag" }, 
    8990      { 'name' => "title_sub", 
    90     'desc' => "{HTMLPlug.title_sub}", 
     91    'desc' => "{HTMLPlugin.title_sub}", 
    9192    'type' => "string",  
    9293    'deft' => "" }, 
    9394      { 'name' => "description_tags", 
    94     'desc' => "{HTMLPlug.description_tags}", 
     95    'desc' => "{HTMLPlugin.description_tags}", 
    9596    'type' => "flag" }, 
    9697      # retain this for backward compatibility (w3mir option was replaced by 
    9798      # file_is_url) 
    9899      { 'name' => "w3mir", 
    99 #   'desc' => "{HTMLPlug.w3mir}", 
     100#   'desc' => "{HTMLPlugin.w3mir}", 
    100101    'type' => "flag", 
    101102    'hiddengli' => "yes"}, 
    102103      { 'name' => "no_strip_metadata_html", 
    103     'desc' => "{HTMLPlug.no_strip_metadata_html}", 
     104    'desc' => "{HTMLPlugin.no_strip_metadata_html}", 
    104105    'type' => "string", 
    105106    'deft' => "", 
    106107    'reqd' => "no"}, 
    107108      { 'name' => "sectionalise_using_h_tags", 
    108     'desc' => "{HTMLPlug.sectionalise_using_h_tags}", 
     109    'desc' => "{HTMLPlugin.sectionalise_using_h_tags}", 
    109110    'type' => "flag" }, 
    110111      { 'name' => "use_realistic_book", 
    111         'desc' => "{HTMLPlug.tidy_html}", 
     112        'desc' => "{HTMLPlugin.tidy_html}", 
    112113    'type' => "flag"}, 
    113       { 'name' => "is_old_HDL_tags", 
    114         'desc' => "{HTMLPlug.old_style_HDL}", 
    115     'type' => "flag"}, 
    116       { 'name' => "no_image_links",            # in future think about removing this option, 
    117         'desc' => "{HTMLPlug.no_image_links}", # since it has become the default behaviour 
    118     'type' => "flag"},   
     114      { 'name' => "old_style_HDL", 
     115        'desc' => "{HTMLPlugin.old_style_HDL}", 
     116    'type' => "flag"} 
    119117      ]; 
    120118 
    121 my $options = { 'name'     => "HTMLPlug", 
    122         'desc'     => "{HTMLPlug.desc}", 
     119my $options = { 'name'     => "HTMLPlugin", 
     120        'desc'     => "{HTMLPlugin.desc}", 
    123121        'abstract' => "no", 
    124122        'inherits' => "yes", 
     
    506504    if (($self->{'tidy_html'}) || ($self->{'old_style_HDL'})) 
    507505    { 
    508         # because the document has to be sectionalized set the description tags  
    509         $self->{'description_tags'} = 1; 
    510  
    511         # set the file to be tidied 
    512             $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/; 
    513          
    514             # get the tidied file 
    515             #my $tidy_filename = $self->tmp_tidy_file($input_filename); 
    516         my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename); 
    517          
    518             # derive tmp filename from input filename 
    519             my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$"); 
     506    # because the document has to be sectionalized set the description tags  
     507    $self->{'description_tags'} = 1; 
    520508     
    521         # set the new input file and base_dir to be from the tidied file 
    522         $file = "$tailname$suffix"; 
    523         $base_dir = $dirname; 
     509    # set the file to be tidied 
     510    $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/; 
     511     
     512    # get the tidied file 
     513    #my $tidy_filename = $self->tmp_tidy_file($input_filename); 
     514    my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename); 
     515     
     516    # derive tmp filename from input filename 
     517    my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$"); 
     518     
     519    # set the new input file and base_dir to be from the tidied file 
     520    $file = "$tailname$suffix"; 
     521    $base_dir = $dirname; 
    524522    } 
    525523     
    526524    # call the parent read_into_doc_obj 
    527     my ($process_status,$doc_obj) = &BasPlug::read_into_doc_obj($self,$pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 
     525    my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 
    528526     
    529527    return ($process_status,$doc_obj); 
     
    535533    push(@$pluginlist, $class); 
    536534     
    537     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    538     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     535    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     536    push(@{$hashArgOptLists->{"OptList"}},$options); 
    539537    
    540538 
    541     my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs); 
     539    my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists); 
    542540     
    543541    if ($self->{'w3mir'}) { 
     
    618616    my $outhandle = $self->{'outhandle'}; 
    619617 
    620     print STDERR "<Processing n='$file' p='HTMLPlug'>\n" if ($gli); 
    621  
    622     print $outhandle "HTMLPlug: processing $file\n" 
     618    print STDERR "<Processing n='$file' p='HTMLPlugin'>\n" if ($gli); 
     619 
     620    print $outhandle "HTMLPlugin: processing $file\n" 
    623621    if $self->{'verbosity'} > 1; 
    624622 
     
    669667    # URL metadata (even invalid ones) are used to support internal 
    670668    # links, so even if 'file_is_url' is off, still need to store info 
    671      
    672     $file = &BasPlug::filename_to_metadata($self, $file); # ensures filename is in UTF8 character encoding 
    673     my $web_url = "http://$file"; 
    674     $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); # will eventually ensure it is utf8 anyway 
     669 
     670    my $utf8_file = $self->filename_to_utf8_metadata($file); 
     671    my $web_url = "http://$utf8_file"; 
     672    $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); 
    675673 
    676674    if ($self->{'file_is_url'}) { 
     
    752750    } 
    753751    if ($cursection ne "") { 
    754         print $outhandle "HTMLPlug: WARNING: $file contains unmatched <Section></Section> tags\n"; 
     752        print $outhandle "HTMLPlugin: WARNING: $file contains unmatched <Section></Section> tags\n"; 
    755753    } 
    756754 
     
    760758        if (!$found_something) { 
    761759        if ($self->{'verbosity'} > 2) { 
    762             print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags so\n"; 
     760            print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags so\n"; 
    763761            print $outhandle "          will be processed as a single section document\n"; 
    764762        } 
     
    775773 
    776774        } else { 
    777         print $outhandle "HTMLPlug: WARNING: $file contains the following text outside\n"; 
     775        print $outhandle "HTMLPlugin: WARNING: $file contains the following text outside\n"; 
    778776        print $outhandle "          of the final closing </Section> tag. This text will\n"; 
    779777        print $outhandle "          be ignored."; 
     
    795793        # been processed already but we should print the warning 
    796794        # as above and extract metadata 
    797         print $outhandle "HTMLPlug: WARNING: $file appears to contain no Section tags and\n"; 
     795        print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags and\n"; 
    798796        print $outhandle "          is blank or empty.  Metadata will be assigned if present.\n"; 
    799797        } 
     
    892890    # trap images 
    893891 
    894     # Previously, by default, HTMLPlug would embed <img> tags inside anchor tags 
     892    # Previously, by default, HTMLPlugin would embed <img> tags inside anchor tags 
    895893    # i.e. <a href="image><img src="image"></a> in order to overcome a problem that 
    896894    # turned regular text succeeding images into links. That is, by embedding <imgs> 
     
    907905 
    908906    # If at any time, there is a need for having images embedded in <a> anchor tags, 
    909     # then it might be better to turn that into an HTMLPlug option rather than make 
     907    # then it might be better to turn that into an HTMLPlugin option rather than make 
    910908    # it the default behaviour. Also, eventually, no_image_links needs to become 
    911     # a deprecated option for HTMLPlug as it has now become the default behaviour. 
     909    # a deprecated option for HTMLPlugin as it has now become the default behaviour. 
    912910 
    913911    #if(!$self->{'no_image_links'}){ 
    914912    $$textref =~ s/(<(?:img|embed|table|tr|td)[^>]*?(?:src|background)\s*=\s*)([\"][^\"]+[\"]|[\'][^\']+[\']|[^\s\/>]+)([^>]*>)/ 
    915         $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
     913    $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 
    916914    #} 
    917915 
     
    936934    $back="\"$back"; 
    937935    } 
     936 
    938937    $link =~ s/\n/ /g; 
    939938 
     
    10741073 
    10751074    my ($before_hash, $hash_part) = $link =~ /^([^\#]*)(\#?.*)$/; 
    1076  
     1075     
    10771076    $hash_part = "" if !defined $hash_part; 
    10781077    if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) { 
    10791078    my $outhandle = $self->{'outhandle'}; 
    1080     print $outhandle "HTMLPlug: ERROR - badly formatted tag ignored ($link)\n" 
     1079    print $outhandle "HTMLPlugin: ERROR - badly formatted tag ignored ($link)\n" 
    10811080        if $self->{'verbosity'}; 
    10821081    return ($link, "", 0); 
     
    12571256 
    12581257    if (!defined $tag) { 
    1259         print $outhandle "HTMLPlug: can't find NAME in \"$metatag\"\n"; 
     1258        print $outhandle "HTMLPlugin: can't find NAME in \"$metatag\"\n"; 
    12601259        next; 
    12611260    } 
     
    12741273    } 
    12751274    if (!defined $value) { 
    1276         print $outhandle "HTMLPlug: can't find VALUE in \"$metatag\"\n"; 
     1275        print $outhandle "HTMLPlugin: can't find VALUE in \"$metatag\"\n"; 
    12771276        next; 
    12781277    } 
     
    14251424 
    14261425 
    1427 # Extend the BasPlug read_file so that strings like &eacute; are 
     1426# Extend read_file so that strings like &eacute; are 
    14281427# converted to UTF8 internally.   
    14291428# 
     
    14321431 
    14331432sub read_file { 
    1434     my ($self, $filename, $encoding, $language, $textref) = @_; 
    1435  
    1436     &BasPlug::read_file($self, $filename, $encoding, $language, $textref); 
     1433    my $self = shift(@_); 
     1434    my ($filename, $encoding, $language, $textref) = @_; 
     1435 
     1436    $self->SUPER::read_file($filename, $encoding, $language, $textref); 
    14371437 
    14381438    # Convert entities to their UTF8 equivalents 
  • gsdl/trunk/perllib/plugins/ISISPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # ISISPlug.pm -- A plugin for CDS/ISIS databases 
     3# ISISPlugin.pm -- A plugin for CDS/ISIS databases 
    44# 
    55# A component of the Greenstone digital library software 
     
    2525########################################################################### 
    2626 
    27 package ISISPlug; 
     27package ISISPlugin; 
    2828 
    2929 
    3030use multiread; 
    31 use SplitPlug; 
     31use SplitTextFile; 
    3232 
    3333use strict; 
    3434no strict 'refs'; # allow filehandles to be variables and viceversa 
    3535 
    36 # ISISPlug is a sub-class of SplitPlug. 
     36# ISISPlugin is a sub-class of SplitTextFile. 
    3737sub BEGIN { 
    38     @ISISPlug::ISA = ('SplitPlug'); 
     38    @ISISPlugin::ISA = ('SplitTextFile'); 
    3939} 
    4040 
     
    4242my $arguments =  
    4343    [ { 'name' => "process_exp", 
    44     'desc' => "{BasPlug.process_exp}", 
     44    'desc' => "{BasePlugin.process_exp}", 
    4545    'type' => "regexp", 
    4646    'reqd' => "no", 
    4747    'deft' => &get_default_process_exp() }, 
    4848      { 'name' => "block_exp", 
    49     'desc' => "{BasPlug.block_exp}", 
     49    'desc' => "{BasePlugin.block_exp}", 
    5050    'type' => "regexp", 
    5151    'reqd' => "no", 
     
    5353    'hiddengli' => "yes" }, 
    5454      { 'name' => "split_exp", 
    55     'desc' => "{SplitPlug.split_exp}", 
     55    'desc' => "{SplitTextFile.split_exp}", 
    5656    'type' => "regexp", 
    5757    'reqd' => "no", 
     
    6161      # The interesting options 
    6262      { 'name' => "entry_separator", 
    63     'desc' => "{ISISPlug.entry_separator}", 
     63    'desc' => "{ISISPlugin.entry_separator}", 
    6464    'type' => "string", 
    6565    'reqd' => "no", 
    6666    'deft' => "<br>" }, 
    6767      { 'name' => "subfield_separator", 
    68     'desc' => "{ISISPlug.subfield_separator}", 
     68    'desc' => "{ISISPlugin.subfield_separator}", 
    6969    'type' => "string", 
    7070    'reqd' => "no", 
     
    7272      ]; 
    7373 
    74 my $options = { 'name'     => "ISISPlug", 
    75         'desc'     => "{ISISPlug.desc}", 
     74my $options = { 'name'     => "ISISPlugin", 
     75        'desc'     => "{ISISPlugin.desc}", 
    7676        'abstract' => "no", 
    7777        'inherits' => "yes", 
     
    104104    push(@$pluginlist, $class); 
    105105 
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    108  
    109     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 
     106    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     107    push(@{$hashArgOptLists->{"OptList"}},$options); 
     108 
     109    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    110110 
    111111    if ($self->{'info_only'}) { 
     
    157157 
    158158    my $reader = new multiread(); 
    159     $reader->set_handle('ISISPlug::FILE'); 
     159    $reader->set_handle('ISISPlugin::FILE'); 
    160160    $reader->set_encoding($encoding); 
    161161    $reader->read_file($textref); 
     
    186186 
    187187    # Report that we're processing the file 
    188     print STDERR "\n<Processing n='$file' p='ISISPlug'>\n" if ($gli); 
     188    print STDERR "\n<Processing n='$file' p='ISISPlugin'>\n" if ($gli); 
    189189    print $outhandle "IsisPlug: processing $file\n" if ($self->{'verbosity'}) > 1; 
    190190 
     
    348348    my $fdtfiletext = ""; 
    349349    my $reader = new multiread(); 
    350     $reader->set_handle('ISISPlug::FDT_FILE'); 
     350    $reader->set_handle('ISISPlugin::FDT_FILE'); 
    351351    $reader->set_encoding($encoding); 
    352352    $reader->read_file($fdtfiletext); 
  • gsdl/trunk/perllib/plugins/ImagePlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # ImagePlug.pm -- simple text plugin 
     3# ImagePlugin.pm -- simple text plugin 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2424########################################################################### 
    2525 
    26 package ImagePlug; 
     26package ImagePlugin; 
    2727 
    28 use BasPlug; 
     28use BasePlugin; 
     29use ImageConverter; 
    2930 
    3031use strict; 
     
    3233 
    3334sub BEGIN { 
    34     @ImagePlug::ISA = ('BasPlug'); 
     35    @ImagePlugin::ISA = ('BasePlugin', 'ImageConverter'); 
    3536} 
    3637 
    3738my $arguments = 
    3839    [ { 'name' => "process_exp", 
    39     'desc' => "{BasPlug.process_exp}", 
     40    'desc' => "{BasePlugin.process_exp}", 
    4041    'type' => "regexp", 
    4142    'deft' => &get_default_process_exp(), 
    4243    'reqd' => "no" }, 
    43       { 'name' => "cache_generated_images", 
    44     'desc' => "{ImagePlug.cache_generated_image}", 
    45     'type' => "flag", 
    46     'reqd' => "no" }, 
    47       { 'name' => "noscaleup", 
    48     'desc' => "{ImagePlug.noscaleup}", 
    49     'type' => "flag", 
    50     'reqd' => "no" }, 
    51       { 'name' => "nothumbnail", 
    52     'desc' => "{ImagePlug.generatethumbnail}", 
    53     'type' => "flag", 
    54     'reqd' => "no" }, 
    55       { 'name' => "thumbnailsize", 
    56     'desc' => "{ImagePlug.thumbnailsize}", 
    57     'type' => "int", 
    58     'deft' => "100", 
    59     'range' => "1,", 
    60     'reqd' => "no" }, 
    61       { 'name' => "thumbnailtype", 
    62     'desc' => "{ImagePlug.thumbnailtype}", 
    63     'type' => "string", 
    64     'deft' => "gif", 
    65     'reqd' => "no" }, 
    66       { 'name' => "noscreenview", 
    67     'desc' => "{ImagePlug.generatescreenview}", 
    68     'type' => "flag", 
    69     'reqd' => "no" }, 
    70       { 'name' => "screenviewsize", 
    71     'desc' => "{ImagePlug.screenviewsize}", 
    72     'type' => "int", 
    73     'deft' => "0", 
    74     'range' => "1,", 
    75     'reqd' => "no" }, 
    76       { 'name' => "screenviewtype", 
    77     'desc' => "{ImagePlug.screenviewtype}", 
    78     'type' => "string", 
    79     'deft' => "jpg", 
    80     'reqd' => "no" }, 
    81       { 'name' => "converttotype", 
    82     'desc' => "{ImagePlug.converttotype}", 
    83     'type' => "string", 
    84     'deft' => "", 
    85     'reqd' => "no" }, 
    86       { 'name' => "minimumsize", 
    87     'desc' => "{ImagePlug.minimumsize}", 
    88     'type' => "int", 
    89     'deft' => "100", 
    90     'range' => "1,", 
    91     'reqd' => "no" } ]; 
     44      ]; 
    9245 
    93 my $options = { 'name'     => "ImagePlug", 
    94         'desc'     => "{ImagePlug.desc}", 
     46my $options = { 'name'     => "ImagePlugin", 
     47        'desc'     => "{ImagePlugin.desc}", 
    9548        'abstract' => "no", 
    9649        'inherits' => "yes", 
     
    10457    push(@$pluginlist, $class); 
    10558 
    106     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    107     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     60    push(@{$hashArgOptLists->{"OptList"}},$options); 
    10861 
    109     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
    110     $self->{'tmp_file_paths'} = (); 
     62    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 
     63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    11164 
    112     # Check that ImageMagick is installed and available on the path (except for Windows 95/98) 
    113     if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) { 
    114     my $result = `identify 2>&1`; 
    115     if ($? == -1 || $? == 256) {  # Linux and Windows return different values for "program not found" 
    116         $self->{'imagemagick_not_installed'} = 1; 
    117     } 
    118     } 
     65    return bless $self, $class; 
     66} 
    11967 
    120         
    121     return bless $self, $class; 
     68sub init { 
     69    my $self = shift (@_); 
     70    my ($verbosity, $outhandle, $failhandle) = @_; 
     71 
     72    $self->SUPER::init(@_); 
     73    $self->ImageConverter::init(); 
    12274} 
    12375 
     
    13688    return; 
    13789} 
    138 # Create the thumbnail and screenview images, and discover the Image's 
    139 # size, width, and height using the convert utility. 
    140  
    141 sub generate_images 
    142 { 
    143     my $self = shift (@_); 
    144     my $filename = shift (@_);   # filename with full path 
    145     my $file = shift (@_);       # filename without path 
    146     my $doc_obj = shift (@_); 
    147     my $section = $doc_obj->get_top_section(); 
    148      
    149     my $verbosity = $self->{'verbosity'}; 
    150     my $outhandle = $self->{'outhandle'}; 
    151  
    152     # check the filename is okay 
    153     return 0 if ($file eq "" || $filename eq ""); 
    154  
    155 #    Code now extended to quote filenames in 'convert' commnads 
    156 #    Allows spaces in filenames, but note needs spaces to be escaped in URL as well 
    157 #    if ($filename =~ m/ /) { 
    158 #   print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n"; 
    159 #   return undef; 
    160 #    } 
    161  
    162     my $minimumsize = $self->{'minimumsize'}; 
    163     if (defined $minimumsize && (-s $filename < $minimumsize)) { 
    164         print $outhandle "ImagePlug: \"$filename\" too small, skipping\n" 
    165         if ($verbosity > 1); 
    166     } 
    167  
    168  
    169     # Convert the image to a new type (if required). 
    170     my $converttotype = $self->{'converttotype'}; 
    171     my $originalfilename = "";  # only set if we do a conversion 
    172     my $type = "unknown"; 
    173  
    174     if ($converttotype ne "" && $filename !~ m/$converttotype$/) { 
    175     $originalfilename = $filename; 
    176  
    177     my $result = $self->convert($originalfilename, $converttotype, "", ""); 
    178     ($filename) = ($result =~ /=>(.*\.$converttotype)/); 
    179  
    180     $type = $converttotype; 
    181     $file =~ s/\..*$/\.$type/; 
    182     } 
    183      
    184  
    185     # Add the image metadata 
    186     my $url = $file; 
    187      
    188     ##not know why it is required at the first place, it seems all works fine without it, so I comment it out  
    189     ##$url =~ s/ /%20/g; 
    190  
    191     my $utf8_filename_meta = $self->filename_to_metadata($url); 
    192     $doc_obj->add_utf8_metadata ($section, "Image", $utf8_filename_meta); 
    193  
    194     # Also want to set filename as 'Source' metadata to be 
    195     # consistent with other plugins 
    196     $doc_obj->add_utf8_metadata ($section, "Source", $utf8_filename_meta); 
    197  
    198     my ($image_type, $image_width, $image_height, $image_size)  
    199     = &identify($filename, $outhandle, $verbosity); 
    200  
    201     if ($image_type ne " ") { 
    202     $type = $image_type; 
    203     } 
    204      
    205     $doc_obj->add_metadata ($section, "FileFormat", $type); 
    206     $doc_obj->add_metadata ($section, "FileSize",   $image_size); 
    207  
    208     $doc_obj->add_metadata ($section, "ImageType",   $image_type); 
    209     $doc_obj->add_metadata ($section, "ImageWidth",  $image_width); 
    210     $doc_obj->add_metadata ($section, "ImageHeight", $image_height); 
    211     $doc_obj->add_metadata ($section, "ImageSize",   $image_size); 
    212     $doc_obj->add_metadata ($section, "NoText",    "1"); 
    213  
    214     $doc_obj->add_metadata ($section, "srclink",  
    215                 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">"); 
    216     $doc_obj->add_metadata ($section, "/srclink", "</a>"); 
    217  
    218     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>"); 
    219  
    220      
    221     # Add the image as an associated file 
    222     $doc_obj->associate_file($filename,$file,"image/$type",$section); 
    223  
    224  
    225     if (!$self->{'nothumbnail'}) { 
    226  
    227     # Make the thumbnail image 
    228     my $thumbnailsize = $self->{'thumbnailsize'} || 100; 
    229     my $thumbnailtype = $self->{'thumbnailtype'} || 'gif'; 
    230  
    231     # Generate the thumbnail with convert 
    232     my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB"); 
    233     my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/); 
    234      
    235     # Add the thumbnail as an associated file ... 
    236     if (-e "$thumbnailfile") {  
    237         $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",  
    238                      "image/$thumbnailtype",$section); 
    239         $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype); 
    240         $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype"); 
    241          
    242         $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 
    243     } 
    244  
    245     # Extract Thumnail metadata from convert output 
    246     if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) { 
    247         $doc_obj->add_metadata ($section, "ThumbWidth", $1); 
    248         $doc_obj->add_metadata ($section, "ThumbHeight", $2); 
    249     } 
    250  
    251     } 
    252  
    253  
    254     # Make a screen-sized version of the picture if requested 
    255     if (!$self->{'noscreenview'}) { 
    256  
    257     # To do: if the actual image smaller than the screenview size, 
    258     # we should use the original ! 
    259  
    260     my $screenviewsize = $self->{'screenviewsize'}; 
    261     my $screenviewtype = $self->{'screenviewtype'} || 'jpeg'; 
    262  
    263     # make the screenview image 
    264     my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN"); 
    265     my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/); 
    266  
    267     # get screenview dimensions, size and type 
    268         if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) { 
    269         $doc_obj->add_metadata ($section, "ScreenWidth", $1); 
    270         $doc_obj->add_metadata ($section, "ScreenHeight", $2); 
    271     } 
    272     else { 
    273         $doc_obj->add_metadata ($section, "ScreenWidth", $image_width); 
    274         $doc_obj->add_metadata ($section, "ScreenHeight", $image_height); 
    275     } 
    276  
    277     #add the screenview as an associated file ... 
    278     if (-e "$screenviewfilename") {  
    279         $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype", 
    280                      "image/$screenviewtype",$section); 
    281         $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype); 
    282         $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype"); 
    283  
    284         $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>"); 
    285     } else { 
    286         print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n"; 
    287     } 
    288     } 
    289  
    290     return $type; 
    291  
    292  
    293 } 
    294  
    295  
    296  
    297 # Discover the characteristics of an image file with the ImageMagick 
    298 # "identify" command. 
    299  
    300 sub identify {  
    301     my ($image, $outhandle, $verbosity) = @_; 
    302  
    303     # Use the ImageMagick "identify" command to get the file specs 
    304     my $command = "identify \"$image\" 2>&1"; 
    305     print $outhandle "$command\n" if ($verbosity > 2); 
    306     my $result = ''; 
    307     $result = `$command`; 
    308     print $outhandle "$result\n" if ($verbosity > 3); 
    309  
    310     # Read the type, width, and height 
    311     my $type =   'unknown'; 
    312     my $width =  'unknown'; 
    313     my $height = 'unknown'; 
    314  
    315     my $image_safe = quotemeta $image; 
    316     if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) { 
    317     $type = $1; 
    318     $width = $2; 
    319     $height = $3; 
    320     } 
    321  
    322     # Read the size 
    323     my $size = "unknown"; 
    324     if ($result =~ m/^.* ([0-9]+)b/) { 
    325     $size = $1; 
    326     } 
    327     elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) { 
    328     $size = 1024 * $1; 
    329     if (defined($2)) { 
    330         $size = $size + (1024 * $2); 
    331         # Truncate size (it isn't going to be very accurate anyway) 
    332         $size = int($size); 
    333     } 
    334     } 
    335     elsif ($result =~ m/^.* (([0-9]+)(\.([0-9]+))?e\+([0-9]+))(kb|b)?/) { 
    336     # Deals with file sizes on Linux of type "3.4e+02kb" where e+02 is 1*10^2. 
    337     # 3.4e+02 therefore evaluates to 3.4 x 1 x 10^2 = 340kb. 
    338     # Programming languages including Perl know how that 3.4e+02 is a number, 
    339     # so we don't need to do any calculations. 
    340     $size = $1*1; # turn the string into a number by multiplying it by 1 
    341            #if we did $size = $1; $size would be merely the string "3.4e+02" 
    342     $size = int($size); # truncate size 
    343     } 
    344     print $outhandle "file: $image:\t $type, $width, $height, $size\n"  
    345     if ($verbosity > 2); 
    346  
    347     # Return the specs 
    348     return ($type, $width, $height, $size); 
    349 } 
    350  
    351  
    352 sub convert 
    353 { 
    354     my $self = shift(@_); 
    355     my $source_file_path = shift(@_); 
    356     my $target_file_type = shift(@_); 
    357     my $convert_options = shift(@_) || ""; 
    358     my $convert_type = shift(@_) || ""; 
    359  
    360     my $outhandle = $self->{'outhandle'}; 
    361     my $verbosity = $self->{'verbosity'}; 
    362  
    363     # Determine the full name and path of the output file 
    364     my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type; 
    365     push(@{$self->{'tmp_file_paths'}}, $target_file_path); 
    366  
    367     # Generate and run the convert command 
    368     my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\""; 
    369     print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2); 
    370     my $result = `$convert_command 2>&1`; 
    371     print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2); 
    372  
    373     return $result; 
    374 } 
    375  
    376  
    377 # The ImagePlug read() function.  
    378 # ImagePlug overrides read() because there is no need to read the actual  
    379 # text of the file in, because the contents of the file is not text... 
    380 # 
    381 # Return number of files processed, undef if can't process 
    382 # Note that $base_dir might be "" and that $file might  
    383 # include directories 
    384  
    385 sub read { 
    386     my $self = shift (@_); 
    387     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    388  
    389     my $outhandle = $self->{'outhandle'}; 
    390  
    391     #check process and block exps, smart block, etc 
    392     my ($block_status,$filename) = $self->read_block(@_);     
    393     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    394  
    395     print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli); 
    396     print $outhandle "ImagePlug processing $file\n" 
    397         if $self->{'verbosity'} > 1; 
    398  
    399     # None of this works very well on Windows 95/98... 
    400     if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) { 
    401     if ($gli) { 
    402         print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n"; 
    403     } 
    404     print $outhandle "ImagePlug: Windows 95/98 not supported\n"; 
    405     return -1; 
    406     } 
    407  
    408     # None of this is going to work very well without ImageMagick... 
    409     if ($self->{'imagemagick_not_installed'}) { 
    410     if ($gli) { 
    411         print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n"; 
    412     } 
    413     print $outhandle "ImagePlug: ImageMagick not installed\n"; 
    414     return -1; 
    415     } 
    416  
    417     #if there's a leading directory name, eat it... 
    418     $file =~ s/^.*[\/\\]//; 
    419      
    420     # create a new document 
    421     my $doc_obj = new doc ($filename, "indexed_doc"); 
    422     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});     
    423     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    424  
    425     #run convert to get the thumbnail and extract size and type info 
    426     my $result = generate_images($self, $filename, $file, $doc_obj); 
    427      
    428     if (!defined $result) 
    429     { 
    430     if ($gli) { 
    431         print STDERR "<ProcessingError n='$file'>\n"; 
    432     } 
    433     print $outhandle "ImagePlug: couldn't process \"$filename\"\n"; 
    434     return -1; # error during processing 
    435     } 
    436  
    437       
    438     #create an empty text string so we don't break downstream plugins 
    439     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 
    440     
    441     # include any metadata passed in from previous plugins  
    442     # note that this metadata is associated with the top level section 
    443     my $section = $doc_obj->get_top_section(); 
    444     $self->extra_metadata ($doc_obj, $section, $metadata); 
    445  
    446     # do plugin specific processing of doc_obj 
    447     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) { 
    448     print STDERR "<ProcessingError n='$file'>\n" if ($gli); 
    449     return -1; 
    450     } 
    451  
    452     # do any automatic metadata extraction 
    453     $self->auto_extract_metadata ($doc_obj); 
    454  
    455     # if we haven't found any Title so far, assign one 
    456     # this was shifted to here from inside read() 
    457     $self->title_fallback($doc_obj,$section,$file); 
    458     # add an OID 
    459     $doc_obj->set_OID(); 
    460     $doc_obj->add_utf8_text($section, $text); 
    461  
    462     # process the document 
    463     $processor->process($doc_obj); 
    464  
    465     # clean up temporary files - we do this here instead of in   
    466     # generate_images becuase associated files aren't actually copied  
    467     # until after process has been run. 
    468     foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}}) 
    469     { 
    470     if (-e $tmp_file_path) 
    471     { 
    472         &util::rm($tmp_file_path); 
    473     } 
    474     } 
    475  
    476     $self->{'num_processed'}++; 
    477  
    478     return 1; 
    479 } 
    48090 
    48191# do plugin specific processing of doc_obj 
    48292sub process { 
    48393    my $self = shift (@_); 
    484     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 
     94    # options?? 
     95    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     96 
    48597    my $outhandle = $self->{'outhandle'}; 
     98    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     99    if ($self->check_image_magick()) { 
     100    $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 
     101    } else { 
     102    # do some basic stuff 
     103    # associate the image, fileformat, mimetype, srclink, srcicon 
     104    # do this if image magick not installed. but also if generate hasn't worked?? what about images too small? 
     105    } 
     106    #we have no text - adds dummy text and NoText metadata 
     107    $self->add_dummy_text($doc_obj, $doc_obj->get_top_section()); 
     108 
     109    return 1; 
     110 
     111} 
     112 
     113sub clean_up_after_doc_obj_processing { 
     114    my $self = shift(@_); 
    486115     
    487     return 1; 
     116    $self->ImageConverter::clean_up_temporary_files(); 
    488117} 
    489118 
  • gsdl/trunk/perllib/plugins/IndexPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # IndexPlug.pm -- 
     3# IndexPlugin.pm -- 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    5050# named 'Subject'. 
    5151 
    52 # 12/05/02 Added usage datastructure - John Thompson 
    53  
    54 package IndexPlug; 
     52package IndexPlugin; 
    5553 
    5654use plugin; 
    57 use BasPlug; 
     55use BasePlugin; 
    5856use doc; 
    5957use util; 
     
    6462 
    6563sub BEGIN { 
    66     @IndexPlug::ISA = ('BasPlug'); 
     64    @IndexPlugin::ISA = ('BasePlugin'); 
    6765} 
    6866 
    69 my $arguments = [ 
    70          ]; 
     67#my $arguments = [ 
     68#        ]; 
    7169 
    72 my $options = { 'name'     => "IndexPlug", 
    73         'desc'     => "{IndexPlug.desc}", 
     70my $options = { 'name'     => "IndexPlugin", 
     71        'desc'     => "{IndexPlugin.desc}", 
    7472        'abstract' => "no", 
    7573        'inherits' => "yes" }; 
     
    8078    push(@$pluginlist, $class); 
    8179 
    82     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    83     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     80    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     81    push(@{$hashArgOptLists->{"OptList"}},$options); 
    8482 
    85     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     83    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    8684 
    8785    return bless $self, $class; 
     
    110108 
    111109    # found an index.txt file 
    112     print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli); 
    113     print $outhandle "IndexPlug: processing $indexfile\n"; 
     110    print STDERR "<Processing n='$file' p='IndexPlugin'>\n" if ($gli); 
     111    print $outhandle "IndexPlugin: processing $indexfile\n"; 
    114112 
    115113    # read in the index.txt 
  • gsdl/trunk/perllib/plugins/LOMPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # LOMPlug.pm -- plugin for import the collection from LOM 
     3# LOMPlugin.pm -- plugin for import the collection from LOM 
    44#  
    55# A component of the Greenstone digital library software 
     
    2727### Note this plugin currently can't download source documents from outside if you are behind a firewall. 
    2828 
    29 package LOMPlug; 
    30  
    31 use BasPlug; 
     29package LOMPlugin; 
     30 
     31use ReadTextFile; 
    3232use MetadataPass; 
    3333use XMLParser; 
     
    3535 
    3636sub BEGIN { 
    37     @ISA = ('BasPlug', 'MetadataPass'); 
     37    @ISA = ('ReadTextFile', 'MetadataPass'); 
    3838} 
    3939 
     
    4444my $arguments = 
    4545    [ { 'name' => "process_exp", 
    46     'desc' => "{BasPlug.process_exp}", 
     46    'desc' => "{ReadTextFile.process_exp}", 
    4747    'type' => "string", 
    4848    'deft' => &get_default_process_exp(), 
    4949    'reqd' => "no" }, 
    5050      { 'name' => "root_tag", 
    51     'desc' => "{LOMPlug.root_tag}", 
     51    'desc' => "{LOMPlugin.root_tag}", 
    5252    'type' => "regexp", 
    5353    'deft' => q/^(?i)lom$/, 
    5454    'reqd' => "no" }, 
    5555      { 'name' => "check_timestamp", 
    56     'desc' => "{LOMPlug.check_timestamp}", 
     56    'desc' => "{LOMPlugin.check_timestamp}", 
    5757    'type' => "flag" }, 
    5858      { 'name' => "download_srcdocs", 
    59     'desc' => "{LOMPlug.download_srcdocs}", 
     59    'desc' => "{LOMPlugin.download_srcdocs}", 
    6060    'type' => "regexp", 
    6161    'deft' => "", 
    6262    'reqd' => "no" }]; 
    6363 
    64 my $options = { 'name'     => "LOMPlug", 
    65         'desc'     => "{LOMPlug.desc}", 
     64my $options = { 'name'     => "LOMPlugin", 
     65        'desc'     => "{LOMPlugin.desc}", 
    6666        'inherits' => "yes", 
    6767        'args'     => $arguments }; 
     
    7575    push(@$pluginlist, $class); 
    7676     
    77     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    78     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     77    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     78    push(@{$hashArgOptLists->{"OptList"}},$options); 
    7979    
    80     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     80    $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 
     81 
     82    if ($self->{'info_only'}) { 
     83    # don't worry about creating the XML parser as all we want is the  
     84    # list of plugin options 
     85    return bless $self, $class; 
     86    } 
    8187 
    8288    #create XML::Parser object for parsing dublin_core.xml files 
     
    120126    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 
    121127     
    122     print $outhandle "LOMPlug: extracting metadata from $file\n" 
     128    print $outhandle "LOMPlugin: extracting metadata from $file\n" 
    123129    if $self->{'verbosity'} > 1; 
    124130 
     
    131137     
    132138    if ($@) { 
    133     print $outhandle "LOMPlug: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 
     139    print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 
    134140    print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2); 
    135141    return 0; 
     
    262268    my $outhandle = $self->{'outhandle'}; 
    263269 
    264     print STDERR "<Processing n='$file' p='LOMPlug'>\n" if ($gli); 
     270    print STDERR "<Processing n='$file' p='LOMPlugin'>\n" if ($gli); 
    265271 
    266272    print $outhandle "LOMPLug: processing $file\n"; 
  • gsdl/trunk/perllib/plugins/LaTeXPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # LaTeXPlug.pm 
     3# LaTeXPlugin.pm 
    44# 
    55# A component of the Greenstone digital library software 
     
    2626#  parse/remove tex \if ... macros 
    2727 
    28 package LaTeXPlug; 
     28package LaTeXPlugin; 
    2929 
    3030# System complains about $arguments if the strict is set 
     
    3333 
    3434# greenstone packages 
    35 use BasPlug; 
     35use ReadTextFile; 
    3636use unicode; 
    3737use util; 
     
    3939my $arguments = 
    4040    [ { 'name' => "process_exp", 
    41     'desc' => "{BasPlug.process_exp}", 
     41    'desc' => "{ReadTextFile.process_exp}", 
    4242    'type' => "regexp", 
    4343    'reqd' => "no", 
    4444    'deft' => &get_default_process_exp() } ]; 
    4545 
    46 my $options = { 'name'     => 'LaTeXPlug', 
    47         'desc'     => '{LaTeXPlug.desc}', 
     46my $options = { 'name'     => 'LaTeXPlugin', 
     47        'desc'     => '{LaTeXPlugin.desc}', 
    4848        'abstract' => 'no', 
    4949        'inherits' => 'yes', 
     
    5151 
    5252sub BEGIN { 
    53     @LaTeXPlug::ISA = ('BasPlug'); 
    54 } 
    55  
    56 sub print_usage { 
    57     print STDERR "\n  usage: plugin LaTeXPlug [options]\n\n"; 
     53    @LaTeXPlugin::ISA = ('ReadTextFile'); 
    5854} 
    5955 
     
    6359    push(@$pluginlist, $class); 
    6460 
    65     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    66     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    67  
    68     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     61    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     62    push(@{$hashArgOptLists->{"OptList"}},$options); 
     63 
     64    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    6965 
    7066    $self->{'aux_files'} = {}; 
     
    9995    my $outhandle = $self->{'outhandle'}; 
    10096    if ($gli) { 
    101     print STDERR "<Processing n='$file' p='LaTeXPlug'>\n"; 
     97    print STDERR "<Processing n='$file' p='LaTeXPlugin'>\n"; 
    10298    } elsif ($self->{'verbosity'} > 1) { 
    103     print $outhandle "LaTeXPlug: processing $file\n"  
     99    print $outhandle "LaTeXPlugin: processing $file\n"  
    104100    } 
    105101    my $cursection = $doc_obj->get_top_section(); 
  • gsdl/trunk/perllib/plugins/MARCPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MARCPlug.pm -- basic MARC plugin 
     3# MARCPlugin.pm -- basic MARC plugin 
    44# 
    55# A component of the Greenstone digital library software 
     
    2525########################################################################### 
    2626 
    27 package MARCPlug; 
    28  
    29 use SplitPlug; 
     27package MARCPlugin; 
     28 
     29use SplitTextFile; 
    3030 
    3131use unicode; 
     
    3636 
    3737sub BEGIN { 
    38     @MARCPlug::ISA = ('SplitPlug'); 
     38    @MARCPlugin::ISA = ('SplitTextFile'); 
    3939    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 
    4040} 
     
    4242my $arguments =  
    4343    [ { 'name' => "metadata_mapping", 
    44     'desc' => "{MARCPlug.metadata_mapping}", 
     44    'desc' => "{MARCPlugin.metadata_mapping}", 
    4545    'type' => "string", 
    4646    'deft' => "marctodc.txt", 
     
    5353    'reqd' => "no" }, 
    5454      { 'name' => "process_exp", 
    55     'desc' => "{BasPlug.process_exp}", 
     55    'desc' => "{BasePlugin.process_exp}", 
    5656    'type' => "regexp", 
    5757    'reqd' => "no", 
    5858    'deft' => &get_default_process_exp() }, 
    5959      { 'name' => "split_exp", 
    60     'desc' => "{SplitPlug.split_exp}", 
     60    'desc' => "{SplitTextFile.split_exp}", 
    6161    'type' => "regexp", 
    6262    'reqd' => "no", 
     
    6464      ]; 
    6565 
    66 my $options = { 'name'     => "MARCPlug", 
    67         'desc'     => "{MARCPlug.desc}", 
     66my $options = { 'name'     => "MARCPlugin", 
     67        'desc'     => "{MARCPlugin.desc}", 
    6868        'abstract' => "no", 
    6969        'inherits' => "yes", 
     
    8181    push(@$pluginlist, $class); 
    8282 
    83     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    84     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    85  
    86     my $self = new SplitPlug($pluginlist, $inputargs, $hashArgOptLists); 
     83    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     84    push(@{$hashArgOptLists->{"OptList"}},$options); 
     85 
     86    my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 
    8787 
    8888    # 'metadata_mapping' was used in two ways in the plugin: as a plugin 
     
    119119    { 
    120120 
    121     my $msg = "MARCPlug ERROR: Can't locate mapping file \"" . 
     121    my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" . 
    122122        $self->{'metadata_mapping_file'} . "\".\n" . 
    123123        "    No marc files can be processed.\n"; 
     
    245245        push(@marc_entries,$marc); 
    246246    $$textref .= $marc->as_formatted(); 
    247     $$textref .= "\n\n"; # for SplitPlug - see default_split_exp above... 
     247    $$textref .= "\n\n"; # for SplitTextFile - see default_split_exp above... 
    248248    } 
    249249 
     
    254254 
    255255# do plugin specific processing of doc_obj 
    256 # This gets done for each record found by SplitPlug in marc files. 
     256# This gets done for each record found by SplitTextFile in marc files. 
    257257sub process { 
    258258    my $self = shift (@_); 
     
    264264    if (! defined($self->{'metadata_mapping'})) 
    265265    { 
    266     print $outhandle "MARCPlug: no metadata file! Can't process $file\n"; 
     266    print $outhandle "MARCPlugin: no metadata file! Can't process $file\n"; 
    267267    return undef; 
    268268    } 
    269269 
    270     print STDERR "<Processing n='$file' p='MARCPlug'>\n" if ($gli); 
    271     print $outhandle "MARCPlug: processing $file\n" 
     270    print STDERR "<Processing n='$file' p='MARCPlugin'>\n" if ($gli); 
     271    print $outhandle "MARCPlugin: processing $file\n" 
    272272    if $self->{'verbosity'} > 1; 
    273273 
  • gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MARCXMLPlug.pm 
     3# MARCXMLPlugin.pm 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2828# well-formedness). 
    2929 
    30 package MARCXMLPlug; 
    31  
    32 use XMLPlug; 
     30package MARCXMLPlugin; 
     31 
     32use ReadXMLFile; 
    3333 
    3434use strict; 
     
    3636 
    3737sub BEGIN { 
    38     @MARCXMLPlug::ISA = ('XMLPlug'); 
     38    @MARCXMLPlugin::ISA = ('ReadXMLFile'); 
    3939} 
    4040 
    4141my $arguments = [{'name' => "metadata_mapping_file", 
    42           'desc' => "{MARCXMLPlug.metadata_mapping_file}", 
     42          'desc' => "{MARCXMLPlugin.metadata_mapping_file}", 
    4343          'type' => "string", 
    4444          'deft' => "marctodc.txt", 
    4545          'reqd' => "no" }]; 
    4646 
    47 my $options = { 'name'     => "MARCXMLPlug", 
    48         'desc'     => "{MARCXMLPlug.desc}", 
     47my $options = { 'name'     => "MARCXMLPlugin", 
     48        'desc'     => "{MARCXMLPlugin.desc}", 
    4949        'abstract' => "no", 
    5050        'inherits' => "yes", 
     
    5757    push(@$pluginlist, $class); 
    5858 
    59     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    60     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    61      
    62     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     60    push(@{$hashArgOptLists->{"OptList"}},$options); 
     61     
     62    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    6363     
    6464    $self->{'content'} = ""; 
     
    222222    if (scalar(@$mm_files)==0) 
    223223    { 
    224     my $msg = "MARCXMLPlug ERROR: Can't locate mapping file \"" . 
     224    my $msg = "MARCXMLPlugin ERROR: Can't locate mapping file \"" . 
    225225        $self->{'metadata_mapping_file'} . "\".\n " . 
    226226        "    No marc files can be processed.\n"; 
     
    269269    $self->{'indent'} = 0; 
    270270    my $outhandle = $self->{'outhandle'}; 
    271     print $outhandle "MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    272     print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlug'>\n" if $self->{'gli'}; 
     271    print $outhandle "MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     272    print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlugin'>\n" if $self->{'gli'}; 
    273273  
    274274} 
     
    310310    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 
    311311    my ($filemeta) = $file =~ /([^\\\/]+)$/; 
    312     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 
     312    $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 
    313313    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}"); 
    314314        if ($self->{'cover_image'}) { 
     
    319319 
    320320    my $outhandle = $self->{'outhandle'}; 
    321     print $outhandle "Record $self->{'record_count'} - MARCXMLPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     321    print $outhandle "Record $self->{'record_count'} - MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    322322 
    323323        $self->{'record_count'}++; 
  • gsdl/trunk/perllib/plugins/METSPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # METSPlug.pm 
     3# METSPlugin.pm 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    3030 
    3131 
    32 package METSPlug; 
     32package METSPlugin; 
    3333 
    3434use ghtml; 
     
    3737no strict 'refs'; # allow filehandles to be variables and viceversa 
    3838 
    39 use XMLPlug; 
     39use ReadXMLFile; 
    4040use XML::XPath; 
    4141use XML::XPath::XMLParser; 
    4242 
    4343sub BEGIN { 
    44     @METSPlug::ISA = ('XMLPlug'); 
     44    @METSPlugin::ISA = ('ReadXMLFile'); 
    4545} 
    4646 
    4747my $arguments = [ 
    4848         ]; 
    49 my $options = { 'name'     => "METSPlug", 
    50         'desc'     => "{METSPlug.desc}", 
     49my $options = { 'name'     => "METSPlugin", 
     50        'desc'     => "{METSPlugin.desc}", 
    5151        'abstract' => "no", 
    5252        'inherits' => "yes" }; 
     
    6464    push(@$pluginlist, $class); 
    6565 
    66     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    67     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    68  
    69     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     66    # have no args - do we still want this? 
     67    #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     68    push(@{$hashArgOptLists->{"OptList"}},$options); 
     69 
     70    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    7071 
    7172    $self->{'section'} = ""; 
     
    117118    } 
    118119    my $outhandle = $self->{'outhandle'}; 
    119     print $outhandle "METSPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    120     print STDERR "<Processing n='$self->{'file'}' p='METSPlug'>\n" if ($self->{'gli'}); 
     120    print $outhandle "METSPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     121    print STDERR "<Processing n='$self->{'file'}' p='METSPlugin'>\n" if ($self->{'gli'}); 
    121122 
    122123} 
  • gsdl/trunk/perllib/plugins/MP3Plugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MP3Plug.pm -- Plugin for MP3 files (MPEG audio layer 3). 
     3# MP3Plugin.pm -- Plugin for MP3 files (MPEG audio layer 3). 
    44# 
    55# A component of the Greenstone digital library software from the New 
     
    2626 
    2727 
    28 package MP3Plug; 
    29  
    30 use UnknownPlug; 
     28package MP3Plugin; 
     29 
     30use BasePlugin; 
    3131 
    3232use strict; 
    3333no strict 'refs'; # allow filehandles to be variables and viceversa 
     34no strict 'subs'; 
    3435 
    3536use MP3::Info; 
     
    3839 
    3940sub BEGIN { 
    40     @MP3Plug::ISA = ('UnknownPlug'); 
     41    @MP3Plugin::ISA = ('BasePlugin'); 
    4142} 
    4243 
    4344my $arguments = 
    4445    [ { 'name' => "process_exp", 
    45     'desc' => "{BasPlug.process_exp}", 
     46    'desc' => "{BasePlugin.process_exp}", 
    4647    'type' => "regexp", 
    4748    'deft' => &get_default_process_exp(), 
    4849    'reqd' => "no" }, 
    4950      { 'name' => "assoc_images", 
    50         'desc' => "{MP3Plug.assoc_images}", 
     51        'desc' => "{MP3Plugin.assoc_images}", 
    5152        'type' => "flag", 
    5253        'deft' => "", 
    5354        'reqd' => "no" }, 
    5455      { 'name' => "applet_metadata", 
    55     'desc' => "{MP3Plug.applet_metadata}", 
     56    'desc' => "{MP3Plugin.applet_metadata}", 
    5657    'type' => "flag", 
    5758    'deft' => "" }, 
    5859      { 'name' => "metadata_fields", 
    59     'desc' => "{MP3Plug.metadata_fields}", 
     60    'desc' => "{MP3Plugin.metadata_fields}", 
    6061    'type' => "string", 
    6162    'deft' => "Title,Artist,Genre" } ]; 
    6263 
    63 my $options = { 'name'     => "MP3Plug", 
    64         'desc'     => "{MP3Plug.desc}", 
     64my $options = { 'name'     => "MP3Plugin", 
     65        'desc'     => "{MP3Plugin.desc}", 
    6566        'abstract' => "no", 
    6667        'inherits' => "yes", 
     
    7273    push(@$pluginlist, $class); 
    7374 
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    76  
    77     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists); 
     75    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     76    push(@{$hashArgOptLists->{"OptList"}},$options); 
     77 
     78    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    7879 
    7980    return bless $self, $class; 
     
    8283sub get_default_process_exp { 
    8384    return q^(?i)\.mp3$^; 
     85} 
     86 
     87sub process { 
     88    my $self = shift (@_); 
     89    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     90 
     91    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     92    # do something about OIDtype so no hashing 
     93      
     94    # old code was in effect the following.  
     95    if ($doc_obj->{'OIDtype'} =~ /^hash$/) { 
     96    $doc_obj->set_OIDtype ("incremental"); 
     97    } 
     98 
     99 
     100    # associate the file with the document 
     101    if ($self->associate_mp3_file($filename_full_path, $filename_no_path, $doc_obj) != 1) 
     102    { 
     103    print "MP3Plugin: couldn't process \"$filename_full_path\"\n"; 
     104    return 0; 
     105    } 
     106    
     107    #whats this crap? 
     108   my $text = &gsprintf::lookup_string("{BasePlugin.dummy_text}",1); 
     109    if ($self->{'assoc_images'}) { 
     110    $text .= "[img1]<br>"; 
     111    $text .= "[img2]<br>"; 
     112    } 
     113    $doc_obj->add_utf8_text($doc_obj->get_top_section(), $text); 
     114 
    84115} 
    85116 
     
    155186 
    156187    $doc_obj->associate_file($filename, $dst_file, $mime_type, $section); 
    157     $doc_obj->add_metadata ($section, "Source", $file); 
    158188    $doc_obj->add_metadata ($section, $assoc_field, $assoc_name); 
    159189    $doc_obj->add_metadata ($section, "srcurl", $assoc_url); 
     
    295325 
    296326 
    297  
    298 # The MP3Plug read() function is based on UnknownPlug read().  This 
    299 # function does all the right things to make general options work for 
    300 # a given plugin.  
    301  
    302 my $mp3_doc_count = 0; ## is this used anywhere now !!??? 
    303  
    304 sub read { 
    305     my $self = shift (@_); 
    306     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    307  
    308     my $outhandle = $self->{'outhandle'}; 
    309  
    310     #check for associate_ext, blocking etc 
    311     my ($block_status,$filename) = $self->read_block(@_);     
    312     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    313  
    314     print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli); 
    315     print $outhandle "MP3Plug processing \"$filename\"\n" 
    316         if $self->{'verbosity'} > 1; 
    317  
    318     #if there's a leading directory name, eat it... 
    319     $file =~ s/^.*[\/\\]//; 
    320      
    321     # create a new document 
    322     my $doc_obj = new doc ($filename, "indexed_doc"); 
    323     $mp3_doc_count++; 
    324      
    325 ##    $doc_obj->set_OIDtype ($processor->{'OIDtype'});   
    326     if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) { 
    327     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 
    328     } 
    329     else { 
    330     $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file 
    331     } 
    332     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 
    333     $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename)); 
    334  
    335     # associate the file with the document 
    336     if (associate_mp3_file($self, $filename, $file, $doc_obj) != 1) 
    337     { 
    338     print "MP3Plug: couldn't process \"$filename\"\n"; 
    339     return 0; 
    340     } 
    341  
    342     #create an empty text string so we don't break downstream plugins  
    343     my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 
    344     if ($self->{'assoc_images'}) { 
    345     $text .= "[img1]<br>"; 
    346     $text .= "[img2]<br>"; 
    347     } 
    348     # include any metadata passed in from previous plugins  
    349     my $section = $doc_obj->get_top_section(); 
    350     $self->extra_metadata ($doc_obj, $section, $metadata); 
    351  
    352     $self->title_fallback($doc_obj,$section,$file); 
    353  
    354     # do plugin specific processing of doc_obj 
    355     return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,  
    356                          $file, $metadata, $doc_obj)); 
    357  
    358     # do any automatic metadata extraction 
    359     $self->auto_extract_metadata ($doc_obj); 
    360  
    361     # add an OID 
    362     $doc_obj->set_OID(); 
    363     $doc_obj->add_utf8_text($section, $text); 
    364  
    365     # process the document 
    366     $processor->process($doc_obj); 
    367  
    368     $self->{'num_processed'} ++; 
    369     return 1; 
    370 } 
    371  
    372  
     327# we want to use mp3:Title if its there, otherwise we'll use BasePlugin method 
    373328sub title_fallback 
    374329{ 
     
    382337    } 
    383338    else { 
    384         &BasPlug::title_fallback($self, $doc_obj, $section, $file); 
     339        $self->BasePlugin::title_fallback($doc_obj, $section, $file); 
    385340    } 
    386341    } 
  • gsdl/trunk/perllib/plugins/MediaWikiPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MediaWikiPlug.pm -- html plugin with extra facilities for wiki page  
     3# MediaWikiPlugin.pm -- html plugin with extra facilities for wiki page  
    44# 
    55# A component of the Greenstone digital library software 
     
    3131# collection's Home page.  
    3232 
    33 package MediaWikiPlug; 
    34  
    35 use HTMLPlug; 
    36 # use ImagePlug; 
     33package MediaWikiPlugin; 
     34 
     35use HTMLPlugin; 
     36# use ImagePlugin; 
    3737# use File::Copy; 
    3838use unicode; 
     
    4343 
    4444sub BEGIN { 
    45     @MediaWikiPlug::ISA = ('HTMLPlug');         
     45    @MediaWikiPlugin::ISA = ('HTMLPlugin');         
    4646} 
    4747 
     
    5050     # show the table of contents on collection's home page 
    5151     { 'name' => "show_toc", 
    52        'desc' => "{MediaWikiPlug.show_toc}", 
     52       'desc' => "{MediaWikiPlugin.show_toc}", 
    5353       'type' => "flag", 
    5454       'reqd' => "no"}, 
    5555     # set to delete the table of contents section on each MediaWiki page 
    5656     { 'name' => "delete_toc", 
    57        'desc' => "{MediaWikiPlug.delete_toc}", 
     57       'desc' => "{MediaWikiPlugin.delete_toc}", 
    5858       'type' => "flag", 
    5959       'reqd' => "no"}, 
    6060     # regexp to match the table of contents 
    6161     { 'name' => "toc_exp", 
    62        'desc' => "{MediaWikiPlug.toc_exp}", 
     62       'desc' => "{MediaWikiPlugin.toc_exp}", 
    6363       'type' => "regexp", 
    6464       'reqd' => "no", 
     
    6666     # set to delete the navigation section 
    6767     { 'name' => "delete_nav", 
    68        'desc' => "{MediaWikiPlug.delete_nav}", 
     68       'desc' => "{MediaWikiPlugin.delete_nav}", 
    6969       'type' => "flag", 
    7070       'reqd' => "no", 
     
    7272     # regexp to match the navigation section     
    7373     { 'name' => "nav_div_exp", 
    74        'desc' => "{MediaWikiPlug.nav_div_exp}", 
     74       'desc' => "{MediaWikiPlugin.nav_div_exp}", 
    7575       'type' => "regexp", 
    7676       'reqd' => "no", 
     
    7878     # set to delete the searchbox section 
    7979     { 'name' => "delete_searchbox", 
    80        'desc' => "{MediaWikiPlug.delete_searchbox}", 
     80       'desc' => "{MediaWikiPlugin.delete_searchbox}", 
    8181       'type' => "flag", 
    8282       'reqd' => "no", 
     
    8484     # regexp to match the searchbox section 
    8585     { 'name' => "searchbox_div_exp", 
    86        'desc' => "{MediaWikiPlug.searchbox_div_exp}", 
     86       'desc' => "{MediaWikiPlugin.searchbox_div_exp}", 
    8787       'type' => "regexp", 
    8888       'reqd' => "no", 
    8989       'deft' => "<div([^>]*)id=(\\\"|')p-search(\\\"|')(.|\\n)*?<\/div>"},      
    9090     # regexp to match title suffix 
    91      # can't use the title_sub option in HTMLPlug instead 
     91     # can't use the title_sub option in HTMLPlugin instead 
    9292     # because title_sub always matches from the begining       
    9393     { 'name' => "remove_title_suffix_exp", 
    94        'desc' => "{MediaWikiPlug.remove_title_suffix_exp}", 
     94       'desc' => "{MediaWikiPlugin.remove_title_suffix_exp}", 
    9595       'type' => "regexp", 
    9696       'reqd' => "no", 
     
    9898     ]; 
    9999 
    100 my $options = { 'name'     => "MediaWikiPlug", 
    101         'desc'     => "{MediaWikiPlug.desc}", 
     100my $options = { 'name'     => "MediaWikiPlugin", 
     101        'desc'     => "{MediaWikiPlugin.desc}", 
    102102        'abstract' => "no", 
    103103        'inherits' => "yes", 
     
    109109    push(@$pluginlist, $class); 
    110110     
    111     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    112     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    113      
    114     my $self = new HTMLPlug($pluginlist, $inputargs, $hashArgOptLists);     
     111    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     112    push(@{$hashArgOptLists->{"OptList"}},$options); 
     113     
     114    my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists);     
    115115    return bless $self, $class; 
    116116} 
     
    123123    my $outhandle = $self->{'outhandle'}; 
    124124 
    125     print $outhandle "MediaWikiPlug: processing $file\n" if $self->{'verbosity'} > 1; 
     125    print $outhandle "MediaWikiPlugin: processing $file\n" if $self->{'verbosity'} > 1; 
    126126           
    127127    my @head_and_body = split(/<body/i,$$textref); 
     
    205205                                        # linux: /research/lh92/greenstone/greenstone2.73/collect/wiki/import 
    206206    # $file use different delimiters : forward slash for linux; backward slash for windows 
    207     # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlug.html     
     207    # print "\nfile : $file\n\n";         # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlugin.html     
    208208                                        # linux: greenstone.sourceforge.net/wiki/index.php/Using_GreenstoneWiki.html 
    209209     
     
    618618        $value = $1; 
    619619        if (!defined $value || !defined $tag){ 
    620         #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n"; 
     620        #print $outhandle "MediaWikiPlugin: can't find VALUE in \"$tag\"\n"; 
    621621        next; 
    622622        } else { 
  • gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MetadataCSVPlug.pm -- A plugin for metadata in comma-separated value format 
     3# MetadataCSVPlugin.pm -- A plugin for metadata in comma-separated value format 
    44# 
    55# A component of the Greenstone digital library software 
     
    2525########################################################################### 
    2626 
    27 package MetadataCSVPlug; 
     27package MetadataCSVPlugin; 
    2828 
    2929 
    30 use BasPlug; 
     30use BasePlugin; 
    3131use strict; 
    3232 
    3333 
    3434sub BEGIN { 
    35     @MetadataCSVPlug::ISA = ('BasPlug'); 
     35    @MetadataCSVPlugin::ISA = ('BasePlugin'); 
    3636} 
    3737 
     
    3939my $arguments = 
    4040    [ { 'name' => "block_exp", 
    41     'desc' => "{BasPlug.block_exp}", 
     41    'desc' => "{BasePlugin.block_exp}", 
    4242    'type' => "regexp", 
    4343    'reqd' => "no", 
     
    4545 
    4646 
    47 my $options = { 'name'     => "MetadataCSVPlug", 
    48         'desc'     => "{MetadataCSVPlug.desc}", 
     47my $options = { 'name'     => "MetadataCSVPlugin", 
     48        'desc'     => "{MetadataCSVPlugin.desc}", 
    4949        'abstract' => "no", 
    5050        'inherits' => "yes", 
     
    5858    push(@$pluginlist, $class); 
    5959 
    60     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    61     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     60    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     61    push(@{$hashArgOptLists->{"OptList"}},$options); 
    6262 
    63     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    6464 
    6565    return bless $self, $class; 
     
    7474 
    7575 
    76 # We don't want any other plugins to see .csv files 
     76# Used by BasePlugin read to block this file 
    7777sub get_default_block_exp 
    7878{ 
     
    9191    return undef; 
    9292    } 
    93     print STDERR "\n<Processing n='$file' p='MetadataCSVPlug'>\n" if ($gli); 
    94     print STDERR "MetadataCSVPlug: processing $file\n" if ($self->{'verbosity'}) > 1; 
     93    print STDERR "\n<Processing n='$file' p='MetadataCSVPlugin'>\n" if ($gli); 
     94    print STDERR "MetadataCSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1; 
    9595 
    9696    # Read the CSV file to get the metadata 
     
    9898    open(CSV_FILE, "$filename"); 
    9999    my $csv_file_reader = new multiread(); 
    100     $csv_file_reader->set_handle('MetadataCSVPlug::CSV_FILE'); 
     100    $csv_file_reader->set_handle('MetadataCSVPlugin::CSV_FILE'); 
    101101    $csv_file_reader->read_file(\$csv_file_content); 
    102102    close(CSV_FILE); 
     
    118118 
    119119    if (!$found_filename_field) { 
    120     print STDERR "MetadataCSVPlug Error: No Filename field in CSV file: $filename\n"; 
     120    print STDERR "MetadataCSVPlugin Error: No Filename field in CSV file: $filename\n"; 
    121121    return -1; # error 
    122122    } 
     
    153153        # The line must be formatted incorrectly 
    154154        else { 
    155         print STDERR "MetadataCSVPlug Error: Badly formatted CSV line: $csv_line.\n"; 
     155        print STDERR "MetadataCSVPlugin Error: Badly formatted CSV line: $csv_line.\n"; 
    156156        last; 
    157157        } 
     
    163163    my $csv_line_filename_array = $csv_line_metadata{"Filename"}; 
    164164    if (!defined $csv_line_filename_array) { 
    165         print STDERR "MetadataCSVPlug Error: No Filename metadata in CSV line: $orig_csv_line\n"; 
     165        print STDERR "MetadataCSVPlugin Error: No Filename metadata in CSV line: $orig_csv_line\n"; 
    166166        next; 
    167167    } 
  • gsdl/trunk/perllib/plugins/MetadataPass.pm

    r12970 r15872  
    2929no strict 'refs'; # allow filehandles to be variables and viceversa 
    3030 
    31 use BasPlug; # uses BasPlug, but is not inherited 
     31use PrintInfo; # uses PrintInfo, but is not inherited 
    3232 
    3333 
     
    5555sub print_xml_usage 
    5656{ 
    57     BasPlug::print_xml_usage(@_); 
     57    PrintInfo::print_xml_usage(@_); 
    5858} 
    5959 
    6060sub print_xml 
    6161{ 
    62     BasPlug::print_xml(@_); 
     62    PrintInfo::print_xml(@_); 
    6363} 
    6464 
    6565sub set_incremental 
    6666{ 
    67     BasPlug::set_incremental(@_); 
     67    PrintInfo::set_incremental(@_); 
    6868} 
    6969 
  • gsdl/trunk/perllib/plugins/MetadataXMLPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # MetadataXMLPlug.pm -- 
     3# MetadataXMLPlugin.pm -- 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2424########################################################################### 
    2525 
    26 # MetadataXMLPlug process metadata.xml files in a collection 
     26# MetadataXMLPlugin process metadata.xml files in a collection 
    2727 
    2828# Here's an example of a metadata file that uses three FileSet structures 
     
    8585# metadata is explictly overridden later in the import. 
    8686 
    87 package MetadataXMLPlug; 
     87package MetadataXMLPlugin; 
    8888 
    8989use strict; 
    9090no strict 'refs'; 
    91 use BasPlug; 
     91use BasePlugin; 
    9292use util; 
    9393use metadatautil; 
    9494 
    9595sub BEGIN { 
    96     @MetadataXMLPlug::ISA = ('BasPlug'); 
     96    @MetadataXMLPlugin::ISA = ('BasePlugin'); 
    9797    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 
    9898} 
     
    102102my $arguments = [ 
    103103      { 'name' => "block_exp", 
    104     'desc' => "{BasPlug.block_exp}", 
     104    'desc' => "{BasePlugin.block_exp}", 
    105105    'type' => "regexp", 
    106106    'reqd' => "no", 
     
    108108]; 
    109109 
    110 my $options = { 'name'     => "MetadataXMLPlug", 
    111         'desc'     => "{MetadataXMLPlug.desc}", 
     110my $options = { 'name'     => "MetadataXMLPlugin", 
     111        'desc'     => "{MetadataXMLPlugin.desc}", 
    112112        'abstract' => "no", 
    113113        'inherits' => "yes", 
     
    121121    push(@$pluginlist, $class); 
    122122 
    123     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    124     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    125  
    126     $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     123    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     124    push(@{$hashArgOptLists->{"OptList"}},$options); 
     125 
     126    $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    127127 
    128128    if ($self->{'info_only'}) { 
     
    180180    } 
    181181 
    182     print STDERR "\n<Processing n='$file' p='MetadataXMLPlug'>\n" if ($gli); 
    183     print STDERR "MetadataXMLPlug: processing $file\n" if ($self->{'verbosity'})> 1; 
     182    print STDERR "\n<Processing n='$file' p='MetadataXMLPlugin'>\n" if ($gli); 
     183    print STDERR "MetadataXMLPlugin: processing $file\n" if ($self->{'verbosity'})> 1; 
    184184 
    185185    $self->{'metadataref'} = $extrametadata; 
  • gsdl/trunk/perllib/plugins/NulPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # NULPlug.pm -- Plugin for dummy (.nul) files 
     3# NulPlugin.pm -- Plugin for dummy (.nul) files 
    44# 
    55# A component of the Greenstone digital library software from the New 
     
    2626########################################################################### 
    2727 
    28 # NULPlug - a plugin for dummy files 
     28# NulPlugin - a plugin for dummy files 
    2929 
    3030# This is a simple Plugin for importing dummy files, along with 
     
    3535# databases 
    3636 
    37 package NULPlug; 
     37package NulPlugin; 
    3838 
    39 use BasPlug; 
     39use BasePlugin; 
    4040 
    4141use strict; 
     
    4343 
    4444sub BEGIN { 
    45     @NULPlug::ISA = ('BasPlug'); 
     45    @NulPlugin::ISA = ('BasePlugin'); 
    4646} 
    4747 
    4848my $arguments =  
    4949    [ { 'name' => "process_exp", 
    50     'desc' => "{BasPlug.process_exp}", 
     50    'desc' => "{BasePlugin.process_exp}", 
    5151    'type' => "regexp", 
    5252    'reqd' => "no", 
    5353    'deft' => &get_default_process_exp() }, 
    5454      { 'name' => "assoc_field", 
    55     'desc' => "{NULPlug.assoc_field}", 
     55    'desc' => "{NulPlugin.assoc_field}", 
    5656    'type' => "string", 
    57     'deft' => "", 
     57    'deft' => "null_file", 
    5858    'reqd' => "no" }, 
    5959      { 'name' => "add_metadata_as_text", 
    60     'desc' => "{NULPlug.add_metadata_as_text}", 
     60    'desc' => "{NulPlugin.add_metadata_as_text}", 
    6161    'type' => "flag" }, 
    6262      { 'name' => "remove_namespace_for_text", 
    63     'desc' => "{NULPlug.remove_namespace_for_text}", 
     63    'desc' => "{NulPlugin.remove_namespace_for_text}", 
    6464    'type' => "flag" } 
    6565      ]; 
    6666 
    67 my $options = { 'name'     => "NULPlug", 
    68         'desc'     => "{NULPlug.desc}", 
     67my $options = { 'name'     => "NulPlugin", 
     68        'desc'     => "{NulPlugin.desc}", 
    6969        'abstract' => "no", 
    7070        'inherits' => "yes", 
     
    7777    push(@$pluginlist, $class); 
    7878 
    79     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    80     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     80    push(@{$hashArgOptLists->{"OptList"}},$options); 
    8181 
    82     my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 
     82    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    8383     
    8484    return bless $self, $class; 
     
    8989} 
    9090 
    91 # The NULPlug read() function. This function does all the right 
    92 # things to make general options work for a given plugin.  NULPlug 
    93 # overrides read() because there is no need to read the actual text of 
    94 # the file in, because the contents of the file is not text... 
    95 # 
    96 # 
    97 # Return number of files processed, undef if can't process  
    98 # 
    99 # Note that $base_dir might be "" and that $file might include directories 
     91# NulPlugin specific processing of doc_obj.  
     92sub process { 
     93    my $self = shift (@_); 
     94    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     95     
     96    my $topsection = $doc_obj->get_top_section(); 
     97       
     98    my $assoc_field = $self->{'assoc_field'}; # || "null_file"; TODO, check this 
     99    $doc_obj->add_metadata ($topsection, $assoc_field, $file); 
    100100 
    101 sub read { 
    102     my $self = shift (@_); 
    103     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
    104  
    105     my $outhandle = $self->{'outhandle'}; 
    106  
    107     #check for associate_ext, blocking etc 
    108     my ($block_status,$filename) = $self->read_block(@_);     
    109     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    110  
    111     print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli); 
    112     print $outhandle "NULPlug processing \"$filename\"\n" 
    113         if $self->{'verbosity'} > 1; 
    114  
    115     #if there's a leading directory name, eat it... 
    116     $file =~ s/^.*[\/\\]//; 
    117      
    118     # create a new document 
    119     my $doc_obj = new doc ($filename, "indexed_doc"); 
    120     my $top_section = $doc_obj->get_top_section(); 
    121  
    122     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});     
    123     #$doc_obj->set_OIDtype ("incremental"); 
    124     $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}"); 
    125     $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins 
    126  
    127     $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename)); 
    128  
    129     # the metadata NoText is used to suppress the dummy text 'This document has no text.' 
    130     $doc_obj->add_metadata ($top_section, "NoText",    "1"); 
    131  
    132     my $assoc_field = $self->{'assoc_field'} || "null_file"; 
    133     $doc_obj->add_metadata ($top_section, $assoc_field, $file); 
    134      
    135      if ($self->{'cover_image'}) { 
    136     $self->associate_cover_image($doc_obj, $filename); 
    137     } 
    138  
    139     # include any metadata passed in from previous plugins  
    140     my $section = $doc_obj->get_top_section(); 
    141     $self->extra_metadata ($doc_obj, $section, $metadata); 
    142      
    143101    # format the metadata passed in (presumably from metadata.xml) 
    144102    my $text = ""; 
    145103    if ($self->{'add_metadata_as_text'}) { 
    146104    $text = &metadatautil::format_metadata_as_table($metadata, $self->{'remove_namespace_for_text'}); 
     105    $doc_obj->add_utf8_text($topsection, $text); 
    147106    } else { 
    148     #create an empty text string so we don't break downstream plugins  
    149     $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 
     107    $self->add_dummy_text($doc_obj, $topsection); 
    150108    } 
    151     $self->title_fallback($doc_obj,$section,$file); 
    152      
    153     # do plugin specific processing of doc_obj 
    154     unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) { 
    155     print STDERR "<ProcessingError n='$file'>\n" if ($gli); 
    156     return -1; 
    157     } 
    158  
    159     # do any automatic metadata extraction 
    160     $self->auto_extract_metadata ($doc_obj); 
    161  
    162     # add an OID 
    163     $doc_obj->set_OID(); 
    164     $doc_obj->add_utf8_text($section, $text); 
    165      
    166     # process the document 
    167     $processor->process($doc_obj); 
    168  
    169     $self->{'num_processed'} ++; 
    170     return 1; 
    171 } 
    172  
    173  
    174 # NULPlug processing of doc_obj.  In practice we don't need to do 
    175 # anything here because the read function takes care of everything. 
    176  
    177 sub process { 
    178     my $self = shift (@_); 
    179     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 
    180     my $outhandle = $self->{'outhandle'}; 
    181109     
    182110    return 1; 
  • gsdl/trunk/perllib/plugins/OAIPlugin.pm

    r15865 r15872  
    2525########################################################################### 
    2626 
    27 package OAIPlug; 
    28  
    29 use BasPlug; 
     27package OAIPlugin; 
     28 
    3029use unicode; 
    3130use util; 
     
    3433no strict 'refs'; # allow filehandles to be variables and viceversa 
    3534 
    36 use XMLPlug; 
     35use ReadXMLFile; 
    3736 
    3837sub BEGIN { 
    39     @OAIPlug::ISA = ('XMLPlug'); 
     38    @OAIPlugin::ISA = ('ReadXMLFile'); 
    4039} 
    4140 
     
    4948      ]; 
    5049 
    51 my $options = { 'name'     => "OAIPlug", 
    52         'desc'     => "{OAIPlug.desc}", 
     50my $options = { 'name'     => "OAIPlugin", 
     51        'desc'     => "{OAIPlugin.desc}", 
    5352        'abstract' => "no", 
    5453        'inherits' => "yes", 
     
    6160    push(@$pluginlist, $class); 
    6261 
    63     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    64     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    65  
    66     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     62    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     63    push(@{$hashArgOptLists->{"OptList"}},$options); 
     64 
     65    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    6766 
    6867    return bless $self, $class; 
     
    9897 
    9998    my $outhandle = $self->{'outhandle'}; 
    100     print $outhandle "OAIPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
    101     print STDERR "<Processing n='$self->{'file'}' p='OAIPlug'>\n" if $self->{'gli'}; 
     99    print $outhandle "OAIPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 
     100    print STDERR "<Processing n='$self->{'file'}' p='OAIPlugin'>\n" if $self->{'gli'}; 
    102101 
    103102} 
     
    199198    if ($srcdoc_exists) 
    200199    { 
    201         print $outhandle "OAIPlug: passing metadata on to $url_array->[0]\n" 
     200        print $outhandle "OAIPlugin: passing metadata on to $url_array->[0]\n" 
    202201        if ($self->{'verbosity'}>1); 
    203202         
     
    265264    my $outhandle = $self->{'outhandle'}; 
    266265 
    267     print STDERR "<Processing n='$file' p='OAIPlug'>\n" if ($gli); 
    268     print $outhandle "OAIPlug: processing $file\n" 
     266    print STDERR "<Processing n='$file' p='OAIPlugin'>\n" if ($gli); 
     267    print $outhandle "OAIPlugin: processing $file\n" 
    269268    if $self->{'verbosity'} > 1; 
    270269 
     
    411410 
    412411    if ($top_level_prefix !~ /dc$/) { 
    413         print $outhandle "Warning: OAIPlug currently only designed for Dublin Core (or variant) metadata\n"; 
     412        print $outhandle "Warning: OAIPlugin currently only designed for Dublin Core (or variant) metadata\n"; 
    414413        print $outhandle "         This recorded metadata section '$top_level_prefix' does not appear to match.\n"; 
    415414        print $outhandle "         Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n"; 
  • gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm

    r15865 r15872  
    2727########################################################################### 
    2828 
    29 package OggVorbisPlug; 
     29package OggVorbisPlugin; 
    3030 
    3131 
    32 use UnknownPlug; 
     32use BasePlugin; 
    3333use Ogg::Vorbis::Header::PurePerl; 
    3434 
    3535use strict; 
    3636no strict 'refs'; # allow filehandles to be variables and viceversa 
     37no strict 'subs'; 
    3738 
    3839sub BEGIN { 
    39     @OggVorbisPlug::ISA = ('UnknownPlug'); 
     40    @OggVorbisPlugin::ISA = ('BasePlugin'); 
    4041} 
    4142 
     
    4344my $arguments = 
    4445    [ { 'name' => "process_exp", 
    45     'desc' => "{BasPlug.process_exp}", 
     46    'desc' => "{BasePlugin.process_exp}", 
    4647    'type' => "string", 
    4748    'deft' => &get_default_process_exp(), 
    4849    'reqd' => "no" }, 
    4950      { 'name' => "add_technical_metadata", 
    50     'desc' => "{OggVorbisPlug.add_technical_metadata}", 
     51    'desc' => "{OggVorbisPlugin.add_technical_metadata}", 
    5152    'type' => "flag", 
    5253    'deft' => "" } ]; 
    5354 
    54 my $options = { 'name'     => "OggVorbisPlug", 
    55         'desc'     => "{OggVorbisPlug.desc}", 
     55my $options = { 'name'     => "OggVorbisPlugin", 
     56        'desc'     => "{OggVorbisPlugin.desc}", 
    5657        'inherits' => "yes", 
    5758        'abstract' => "no", 
     
    7273    push(@$pluginlist, $class); 
    7374     
    74     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    75     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     75    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     76    push(@{$hashArgOptLists->{"OptList"}},$options); 
    7677     
    77     my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists); 
     78    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 
    7879     
    7980    return bless $self, $class; 
    8081} 
    8182 
    82  
    83 sub read 
     83sub process 
    8484{ 
    8585    my $self = shift (@_); 
    86     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     86    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    8787 
    88     my $outhandle = $self->{'outhandle'}; 
     88    my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 
     89    # do something about OIDtype so no hashing 
     90      
     91    # old code was in effect the following.  
     92    if ($doc_obj->{'OIDtype'} =~ /^hash$/) { 
     93    $doc_obj->set_OIDtype ("incremental"); 
     94    } 
    8995 
    90     #check process and block exps, smart block, etc 
    91     my ($block_status,$filename) = $self->read_block(@_);     
    92     return $block_status if ((!defined $block_status) || ($block_status==0)); 
    93  
    94      # Report that we're processing the file 
    95     print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli); 
    96     print $outhandle "OggVorbisPlug: processing $file\n" 
    97     if ($self->{'verbosity'}) > 1; 
    98      
    99     # file is just the name of the file (need to get rid off any leading directory names) 
    100     $file =~ s/^.*[\/\\]//; 
    101  
    102     # create a new index document 
    103     my $doc_obj = new doc ($filename, "indexed_doc"); 
    104     if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) { 
    105     $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 
    106     } 
    107     else { 
    108     $doc_obj->set_OIDtype ("incremental");    # this is done to avoid hashing content of file 
    109     } 
    110     my $section = $doc_obj->get_top_section(); 
    111      
    112     # replace spaces in filename with %20 in url for metadata entry 
    113     my $url = $file; 
    114     ##$url =~ s/ /%20/g; 
    115  
    116     # Source (filename) to be consistent with other plugins 
    117     $doc_obj->add_metadata ($section, "Source", $url); 
    118  
     96    my $top_section = $doc_obj->get_top_section(); 
    11997    # Extract metadata 
    120     my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename); 
     98    my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename_full_path); 
    12199 
    122100    # Comments added to the file 
     
    128106    { 
    129107        if (defined $value && $value ne "") { 
    130         $doc_obj->add_metadata($section, $keytc, $value); 
     108        $doc_obj->add_metadata($top_section, $keytc, $value); 
    131109        } 
    132110    } 
     
    141119        my $value = $ogg->info->{$key}; 
    142120        if (defined $value && $value ne "") { 
    143         $doc_obj->add_metadata($section, $keytc, $value); 
     121        $doc_obj->add_metadata($top_section, $keytc, $value); 
    144122        } 
    145123    } 
    146124    } 
    147125 
    148     # srclink 
    149     $doc_obj->add_metadata ($section, "FileFormat", "OggVorbis"); 
    150     $doc_obj->add_metadata ($section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">"); 
    151     $doc_obj->add_metadata ($section, "/srclink", "</a>"); 
     126    $doc_obj->add_metadata ($top_section, "FileFormat", "OggVorbis"); 
     127    $doc_obj->add_metadata ($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">"); 
     128    $doc_obj->add_metadata ($top_section, "/srclink", "</a>"); 
    152129    # srcicon (need to include "iogg.gif" in the greenstone images directory 
    153     $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>"); 
     130    $doc_obj->add_metadata ($top_section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>"); 
    154131 
    155132    # add NoText metadata which can be used to suppress the dummy text 
    156     $doc_obj->add_metadata ($section, "NoText", "1"); 
     133    $doc_obj->add_metadata ($top_section, "NoText", "1"); 
    157134 
    158135    # Add the actual file as an associated file 
    159     $doc_obj->associate_file($filename, $file, "VORBIS", $section); 
     136    $doc_obj->associate_file($filename_full_path, $filename_no_path, "VORBIS", $top_section); 
    160137 
    161     # Create an empty text string so we don't break downstream plugins  
    162      my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 
    163  
    164     # include any metadata passed in from previous plugins 
    165     $self->extra_metadata ($doc_obj, $section, $metadata); 
    166  
    167     # do plugin specific processing of doc_obj 
    168     return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj)); 
    169  
    170     # do any automatic metadata extraction 
    171     $self->auto_extract_metadata($doc_obj); 
    172  
    173     # add an OID 
    174     $doc_obj->set_OID(); 
    175     $doc_obj->add_utf8_text($section, $text); 
    176  
    177     # process the document 
    178     $processor->process($doc_obj); 
    179  
    180     $self->{'num_processed'}++; 
    181     return 1; 
    182138} 
    183139 
  • gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # OpenDocumentPlug.pm -- The Open Document plugin 
     3# OpenDocumentPlugin.pm -- The Open Document plugin 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    3333#This basically extracts any text out of the document, but not much else.  
    3434 
    35 package OpenDocumentPlug; 
     35# this inherits ReadXMLFile, and therefore offers -xslt option, but does 
     36# nothing with it. 
     37 
     38package OpenDocumentPlugin; 
    3639 
    3740use strict; 
    3841no strict 'refs'; # allow filehandles to be variables and viceversa 
    3942 
    40 use XMLPlug; 
     43use ReadXMLFile; 
    4144use XML::XPath; 
    4245use XML::XPath::XMLParser; 
     
    4649 
    4750sub BEGIN { 
    48     @OpenDocumentPlug::ISA = ('XMLPlug'); 
    49 } 
    50  
    51  
    52 #our @filesAssoc = (); 
     51    @OpenDocumentPlugin::ISA = ('ReadXMLFile'); 
     52} 
     53 
    5354our @filesProcess = ( "content.xml" , "meta.xml" ); 
    54 #XML plug has this so we need it too 
    55 our ($self); 
    5655 
    5756my $arguments = [ 
     
    6261         ]; 
    6362 
    64 my $options = { 'name'     => "OpenDocumentPlug", 
    65         'desc'     => "{OpenDocumentPlug.desc}", 
     63my $options = { 'name'     => "OpenDocumentPlugin", 
     64        'desc'     => "{OpenDocumentPlugin.desc}", 
    6665        'abstract' => "no", 
    6766        'inherits' => "yes", 
     
    7574    push(@$pluginlist, $class); 
    7675 
    77     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    78     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    79  
    80     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     76    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     77    push(@{$hashArgOptLists->{"OptList"}},$options); 
     78 
     79    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    8180 
    8281    $self->{'section'} = ""; 
     
    160159 
    161160sub read { 
    162     # $self must be global to work with XML callback routines. 
    163     $self = shift (@_);   
     161    my $self = shift (@_);   
    164162    
    165163    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 
     
    188186     
    189187    $self->unzip ("\"$file_only\""); 
    190     foreach my $xmlFile (@OpenDocumentPlug::filesProcess) { 
     188    foreach my $xmlFile (@OpenDocumentPlugin::filesProcess) { 
    191189        if (-e $xmlFile) { 
    192         $self->parse_file($xmlFile); 
     190        $self->{'parser'}->parsefile($xmlFile); 
    193191        } 
    194192    } 
     
    203201 
    204202    # parsefile may either croak somewhere in XML::Parser (e.g. because 
    205     # the document is not well formed) or die somewhere in XMLPlug or a 
     203    # the document is not well formed) or die somewhere in ReadXMLFile or a 
    206204    # derived plugin (e.g. because we're attempting to process a 
    207205    # document whose DOCTYPE is not meant for this plugin). For the 
     
    255253    $doc_obj->add_utf8_metadata ("", "srcicon",  "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[archivedir]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">");  
    256254    $doc_obj->add_utf8_metadata ("", "/srclink", "</a>");  
    257     $doc_obj->add_utf8_metadata ("", "Source", &ghtml::dmsafe($file_only)); 
    258     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename)); 
     255    $self->set_Source_metadata($doc_obj, $file_only); 
     256     $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename)); 
    259257      
    260258    # include any metadata passed in from previous plugins  
     
    268266    
    269267    # add an OID 
    270     $doc_obj->set_OID(); 
     268    $self->add_OID($doc_obj); 
    271269     
    272270    $doc_obj->add_utf8_metadata("", "Plugin", "$self->{'plugin_type'}"); 
  • gsdl/trunk/perllib/plugins/PDFPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # PDFPlug.pm -- reasonably with-it pdf plugin 
     3# PDFPlugin.pm -- reasonably with-it pdf plugin 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2323# 
    2424########################################################################### 
    25 package PDFPlug; 
    26  
    27 use ConvertToPlug; 
     25package PDFPlugin; 
     26 
     27use ConvertBinaryFile; 
     28use ReadTextFile; 
    2829use unicode; 
    2930use strict; 
     
    3132 
    3233sub BEGIN { 
    33     @PDFPlug::ISA = ('ConvertToPlug'); 
     34    @PDFPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 
    3435} 
    3536 
    3637my $convert_to_list = 
    3738    [ { 'name' => "auto", 
    38     'desc' => "{ConvertToPlug.convert_to.auto}" }, 
     39    'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 
    3940      { 'name' => "html", 
    40     'desc' => "{ConvertToPlug.convert_to.html}" }, 
     41    'desc' => "{ConvertBinaryFile.convert_to.html}" }, 
    4142      { 'name' => "text", 
    42     'desc' => "{ConvertToPlug.convert_to.text}" }, 
     43    'desc' => "{ConvertBinaryFile.convert_to.text}" }, 
    4344      { 'name' => "pagedimg_jpg", 
    44     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}"}, 
     45    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}"}, 
    4546      { 'name' => "pagedimg_gif", 
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}"}, 
     47    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}"}, 
    4748      { 'name' => "pagedimg_png", 
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}"},  
     49    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}"},  
    4950      ]; 
    5051 
     
    5354    [ 
    5455     { 'name' => "convert_to", 
    55        'desc' => "{ConvertToPlug.convert_to}", 
     56       'desc' => "{ConvertBinaryFile.convert_to}", 
    5657       'type' => "enum", 
    5758       'reqd' => "yes", 
     
    5960       'deft' => "html" },    
    6061     { 'name' => "process_exp", 
    61        'desc' => "{BasPlug.process_exp}", 
     62       'desc' => "{BasePlugin.process_exp}", 
    6263       'type' => "regexp", 
    6364       'deft' => &get_default_process_exp(), 
    6465       'reqd' => "no" }, 
    6566     { 'name' => "block_exp", 
    66        'desc' => "{BasPlug.block_exp}", 
     67       'desc' => "{BasePlugin.block_exp}", 
    6768       'type' => "regexp", 
    6869       'deft' => &get_default_block_exp() }, 
    6970     { 'name' => "metadata_fields", 
    70        'desc' => "{HTMLPlug.metadata_fields}", 
     71       'desc' => "{HTMLPlugin.metadata_fields}", 
    7172       'type' => "string", 
    7273       'deft' => "" }, 
    7374     { 'name' => "noimages", 
    74        'desc' => "{PDFPlug.noimages}", 
     75       'desc' => "{PDFPlugin.noimages}", 
    7576       'type' => "flag" }, 
    7677     { 'name' => "allowimagesonly", 
    77        'desc' => "{PDFPlug.allowimagesonly}", 
     78       'desc' => "{PDFPlugin.allowimagesonly}", 
    7879       'type' => "flag" }, 
    7980     { 'name' => "complex", 
    80        'desc' => "{PDFPlug.complex}", 
     81       'desc' => "{PDFPlugin.complex}", 
    8182       'type' => "flag" }, 
    8283     { 'name' => "nohidden", 
    83        'desc' => "{PDFPlug.nohidden}", 
     84       'desc' => "{PDFPlugin.nohidden}", 
    8485       'type' => "flag" }, 
    8586     { 'name' => "zoom", 
    86        'desc' => "{PDFPlug.zoom}", 
     87       'desc' => "{PDFPlugin.zoom}", 
    8788       'deft' => "2", 
    8889       'range' => "1,3", # actually the range is 0.5-3  
    8990       'type' => "int" }, 
    9091     { 'name' => "use_sections", 
    91        'desc' => "{PDFPlug.use_sections}", 
     92       'desc' => "{PDFPlugin.use_sections}", 
    9293       'type' => "flag" }, 
    9394     { 'name' => "description_tags", 
    94        'desc' => "{HTMLPlug.description_tags}", 
     95       'desc' => "{HTMLPlugin.description_tags}", 
    9596       'type' => "flag" } 
    9697     ]; 
    9798 
    98 my $options = { 'name'     => "PDFPlug", 
    99         'desc'     => "{PDFPlug.desc}", 
     99my $options = { 'name'     => "PDFPlugin", 
     100        'desc'     => "{PDFPlugin.desc}", 
    100101        'abstract' => "no", 
    101102        'inherits' => "yes", 
     
    111112    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 
    112113 
    113     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    114     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     114    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     115    push(@{$hashArgOptLists->{"OptList"}},$options); 
    115116 
    116117    my @arg_array = @$inputargs; 
    117     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists); 
     118    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 
    118119     
    119120    if ($self->{'info_only'}) { 
     
    122123    } 
    123124 
    124     # these are passed through to gsConvert.pl by ConvertToPlug.pm 
     125    $self->{'filename_extension'} = "pdf"; 
     126    $self->{'file_type'} = "PDF"; 
     127 
     128    # these are passed through to gsConvert.pl by ConvertBinaryFile.pm 
    125129    my $zoom = $self->{"zoom"}; 
    126130    $self->{'convert_options'} = "-pdf_zoom $zoom"; 
     
    132136    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    133137 
    134     if (!defined $secondary_plugin_options->{'HTMLPlug'}) { 
    135     $secondary_plugin_options->{'HTMLPlug'} = []; 
    136     } 
    137     if (!defined $secondary_plugin_options->{'TEXTPlug'}) { 
    138     $secondary_plugin_options->{'TEXTPlug'} = []; 
     138    if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 
     139    $secondary_plugin_options->{'HTMLPlugin'} = []; 
     140    } 
     141    if (!defined $secondary_plugin_options->{'TextPlugin'}) { 
     142    $secondary_plugin_options->{'TextPlugin'} = []; 
    139143    } 
    140144    if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 
    141     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 
    142         $secondary_plugin_options->{'PagedImgPlug'} = []; 
    143         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};  
     145    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 
     146        $secondary_plugin_options->{'PagedImagePlugin'} = []; 
     147        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};  
    144148        push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    145149    } 
    146150    } 
    147     my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
    148     my $text_options = $secondary_plugin_options->{'TEXTPlug'}; 
    149     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 
     151    my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 
     152    my $text_options = $secondary_plugin_options->{'TextPlugin'}; 
     153    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 
    150154    
    151155    if ($self->{'input_encoding'} eq "auto") { 
     
    227231    && $self->{'converted_to'} eq "HTML") { 
    228232 
    229     print $outhandle "PDFPlug: Calculating sections...\n"; 
     233    print $outhandle "PDFPlugin: Calculating sections...\n"; 
    230234 
    231235    # we have "<a name=1></a>" etc for each page 
     
    236240 
    237241    if (scalar (@sections) == 1) { #only one section - no split! 
    238         print $outhandle "PDFPlug: warning - no sections found\n"; 
     242        print $outhandle "PDFPlugin: warning - no sections found\n"; 
    239243    } else { 
    240244        $top_section .= shift @sections; # keep HTML header etc as top_section 
     
    274278        $title = " "; # get rid of the undefined warning in next line 
    275279        } 
    276         my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n"; 
     280        my $newsection = "<!-- from PDFPlugin -->\n<!-- <Section>\n"; 
    277281        $newsection .= "<Metadata name=\"Title\">" . $title 
    278282        . "</Metadata>\n--><p>\n"; 
     
    296300sub process { 
    297301    my $self = shift (@_); 
    298     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
     302    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    299303 
    300304    my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj); 
  • gsdl/trunk/perllib/plugins/PPTPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # PPTPlug.pm -- plugin for importing Microsoft PowerPoint files. 
     3# PPTPlugin.pm -- plugin for importing Microsoft PowerPoint files. 
    44#  (currently only versions 95 and 97) 
    55# 
     
    2626########################################################################### 
    2727 
    28 package PPTPlug; 
     28package PPTPlugin; 
    2929 
    30 use ConvertToPlug; 
     30use ConvertBinaryFile; 
     31use ReadTextFile; # for read_file in convert_post_process. do we need it? 
     32 
    3133use strict; 
    3234no strict 'refs'; # allow filehandles to be variables and viceversa 
    3335 
    3436sub BEGIN { 
    35     @PPTPlug::ISA = ('ConvertToPlug'); 
     37    @PPTPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 
    3638} 
    3739 
    3840my $convert_to_list = 
    3941    [ { 'name' => "auto", 
    40     'desc' => "{ConvertToPlug.convert_to.auto}" }, 
     42    'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 
    4143      { 'name' => "html", 
    42     'desc' => "{ConvertToPlug.convert_to.html}" }, 
     44    'desc' => "{ConvertBinaryFile.convert_to.html}" }, 
    4345      { 'name' => "text", 
    44     'desc' => "{ConvertToPlug.convert_to.text}" }, 
     46    'desc' => "{ConvertBinaryFile.convert_to.text}" }, 
    4547      { 'name' => "pagedimg_jpg", 
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" }, 
     48    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" }, 
    4749      { 'name' => "pagedimg_gif", 
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" }, 
     50    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" }, 
    4951      { 'name' => "pagedimg_png", 
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" } 
     52    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" } 
    5153      ]; 
    5254 
    5355my $arguments =  
    5456    [ { 'name' => "process_exp", 
    55     'desc' => "{BasPlug.process_exp}", 
     57    'desc' => "{BasePlugin.process_exp}", 
    5658    'type' => "regexp", 
    5759    'reqd' => "no", 
     
    5961      ]; 
    6062 
    61 my $options = { 'name'     => "PPTPlug", 
    62         'desc'     => "{PPTPlug.desc}", 
     63my $options = { 'name'     => "PPTPlugin", 
     64        'desc'     => "{PPTPlugin.desc}", 
    6365        'abstract' => "no", 
    6466        'inherits' => "yes", 
     
    7375    if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 
    7476    my $ws_arg =[{ 'name' => "convert_to", 
    75                'desc' => "{ConvertToPlug.convert_to}", 
     77               'desc' => "{ConvertBinaryFile.convert_to}", 
    7678               'type' => "enum", 
    7779               'reqd' => "yes", 
     
    7981               'deft' => "html" }, 
    8082             { 'name' => "windows_scripting", 
    81                'desc' => "{PPTPlug.windows_scripting}", 
     83               'desc' => "{PPTPlugin.windows_scripting}", 
    8284               'type' => "flag", 
    8385               'reqd' => "no" } 
     
    8688    } 
    8789     
    88     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    89     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
     90    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     91    push(@{$hashArgOptLists->{"OptList"}},$options); 
    9092 
    9193 
    92     my @arg_array = @$inputargs; 
    93     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists); 
     94    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 
    9495  
    9596    if ($self->{'info_only'}) { 
     
    9899    } 
    99100 
     101    $self->{'filename_extension'} = "ppt"; 
     102    $self->{'file_type'} = "PPT"; 
     103 
    100104    # ppthtml outputs utf-8 already. 
    101     #these are passed through to gsConvert.pl by ConvertToPlug.pm 
     105    #these are passed through to gsConvert.pl by ConvertBinaryFile.pm 
    102106    $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 
    103107    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    104108 
    105109    if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) { 
    106     $secondary_plugin_options->{'PagedImgPlug'} = []; 
     110    $secondary_plugin_options->{'PagedImagePlugin'} = []; 
    107111    } else { 
    108     $secondary_plugin_options->{'HTMLPlug'} = []; 
     112    $secondary_plugin_options->{'HTMLPlugin'} = []; 
    109113    } 
    110     my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 
    111     my $pageimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 
     114    my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 
     115    my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 
    112116 
    113117    if ($self->{'input_encoding'} eq "auto") { 
    114118    $self->{'input_encoding'} = "utf8"; 
    115     if (defined $secondary_plugin_options->{'HTMLPlug'}){ 
     119    if (defined $secondary_plugin_options->{'HTMLPlugin'}){ 
    116120        push(@$html_options,"-input_encoding", "utf8"); 
    117121        push(@$html_options,"-extract_language") if $self->{'extract_language'}; 
    118122 
    119         # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)  
     123        # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj)  
    120124        # to extract these metadata fields from the HEAD META fields 
    121125        push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 
    122126    }  
    123     if (defined $secondary_plugin_options->{'PagedImgPlug'}){ 
     127    if (defined $secondary_plugin_options->{'PagedImagePlugin'}){ 
    124128        push(@$pageimg_options,"-input_encoding", "utf8"); 
    125129        push(@$pageimg_options,"-extract_language") if $self->{'extract_language'}; 
     
    138142} 
    139143 
    140 sub get_file_type { 
    141     my $self = shift (@_); 
    142     my $file_type = "PPT"; 
    143     return $file_type; 
    144 } 
    145  
     144# do we need this? above states that ppthtml produces utf8 text... 
    146145sub convert_post_process 
    147146{ 
     
    161160} 
    162161 
    163 sub process { 
    164     my $self = shift (@_); 
    165     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    166  
    167     return $self->process_type("ppt",$base_dir,$file,$doc_obj); 
    168 } 
    169162 
    1701631; 
  • gsdl/trunk/perllib/plugins/PSPlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # PSPlug.pm -- this might look VERY similar to the PDF plugin... 
     3# PSPlugin.pm -- this might look VERY similar to the PDF plugin... 
    44# A component of the Greenstone digital library software 
    55# from the New Zealand Digital Library Project at the  
     
    2626# 12/05/02 Added usage datastructure - John Thompson 
    2727 
    28 package PSPlug; 
    29  
    30 use ConvertToPlug; 
     28package PSPlugin; 
     29 
     30use ConvertBinaryFile; 
     31use ReadTextFile; # for read_file in convert_post_process. do we need it? 
    3132use sorttools; 
    3233 
     
    3536 
    3637sub BEGIN { 
    37     @PSPlug::ISA = ('ConvertToPlug'); 
     38    @PSPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 
    3839} 
    3940 
    4041my $convert_to_list = 
    4142    [ { 'name' => "auto", 
    42     'desc' => "{ConvertToPlug.convert_to.auto}" }, 
     43    'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 
    4344      { 'name' => "text", 
    44     'desc' => "{ConvertToPlug.convert_to.text}" }, 
     45    'desc' => "{ConvertBinaryFile.convert_to.text}" }, 
    4546      { 'name' => "pagedimg_jpg", 
    46     'desc' => "{ConvertToPlug.convert_to.pagedimg_jpg}" }, 
     47    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" }, 
    4748      { 'name' => "pagedimg_gif", 
    48     'desc' => "{ConvertToPlug.convert_to.pagedimg_gif}" }, 
     49    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" }, 
    4950      { 'name' => "pagedimg_png", 
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg_png}" } 
     51    'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" } 
    5152      ]; 
    5253 
    5354my $arguments = 
    5455    [ { 'name' => "convert_to", 
    55     'desc' => "{ConvertToPlug.convert_to}", 
     56    'desc' => "{ConvertBinaryFile.convert_to}", 
    5657    'type' => "enum", 
    5758    'reqd' => "yes", 
     
    5960    'deft' => "text" }, 
    6061      { 'name' => "process_exp", 
    61     'desc' => "{BasPlug.process_exp}", 
     62    'desc' => "{BasePlugin.process_exp}", 
    6263    'type' => "regexp", 
    6364    'deft' => &get_default_process_exp(), 
    6465    'reqd' => "no" }, 
    6566      { 'name' => "block_exp", 
    66     'desc' => "{BasPlug.block_exp}", 
     67    'desc' => "{BasePlugin.block_exp}", 
    6768    'type' => 'regexp', 
    6869    'deft' => &get_default_block_exp() }, 
    6970      { 'name' => "extract_date", 
    70     'desc' => "{PSPlug.extract_date}", 
     71    'desc' => "{PSPlugin.extract_date}", 
    7172    'type' => "flag" }, 
    7273      { 'name' => "extract_pages", 
    73     'desc' => "{PSPlug.extract_pages}", 
     74    'desc' => "{PSPlugin.extract_pages}", 
    7475    'type' => "flag" }, 
    7576      { 'name' => "extract_title", 
    76     'desc' => "{PSPlug.extract_title}", 
     77    'desc' => "{PSPlugin.extract_title}", 
    7778    'type' => "flag" } ]; 
    7879 
    79 my $options = { 'name'     => "PSPlug", 
    80         'desc'     => "{PSPlug.desc}", 
     80my $options = { 'name'     => "PSPlugin", 
     81        'desc'     => "{PSPlugin.desc}", 
    8182        'abstract' => "no", 
    8283        'inherits' => "yes", 
     
    9495    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 
    9596     
    96     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    97     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    98      
    99     my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists); 
     97    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     98    push(@{$hashArgOptLists->{"OptList"}},$options); 
     99     
     100    my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 
    100101 
    101102    if ($self->{'info_only'}) { 
     
    104105    } 
    105106 
     107    $self->{'filename_extension'} = "ps"; 
     108    $self->{'file_type'} = "PS"; 
     109 
    106110    my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 
    107111 
    108     if (!defined $secondary_plugin_options->{'TEXTPlug'}) { 
    109     $secondary_plugin_options->{'TEXTPlug'} = []; 
    110     } 
    111  
    112     my $text_options = $secondary_plugin_options->{'TEXTPlug'}; 
     112    if (!defined $secondary_plugin_options->{'TextPlugin'}) { 
     113    $secondary_plugin_options->{'TextPlugin'} = []; 
     114    } 
     115 
     116    my $text_options = $secondary_plugin_options->{'TextPlugin'}; 
    113117 
    114118    if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 
    115     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 
    116         $secondary_plugin_options->{'PagedImgPlug'} = []; 
    117         my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};  
     119    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 
     120        $secondary_plugin_options->{'PagedImagePlugin'} = []; 
     121        my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};  
    118122        push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    119123    } 
     
    148152} 
    149153 
     154# this has been commented out in other plugins. do we need it here? 
    150155sub convert_post_process 
    151156{ 
     
    179184    my $date_found = 0; 
    180185 
    181     print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n"  
     186    print STDERR "PSPlugin: extracting PostScript metadata from \"$filename\"\n"  
    182187    if $self->{'verbosity'} > 1; 
    183188 
     
    237242} 
    238243 
    239 # do plugin specific processing of doc_obj for HTML type 
     244# do plugin specific processing of doc_obj  
    240245sub process { 
    241246    my $self = shift (@_); 
    242     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    243  
    244 #    my $outhandle = $self->{'outhandle'}; 
    245  
    246 #    print $outhandle "PSPlug: passing $file on to $self->{'converted_to'}Plug\n"  
    247 #   if $self->{'verbosity'} > 1; 
    248 #    print STDERR "<Processing n='$file' p='PSPlug'>\n" if ($gli); 
     247    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 
    249248 
    250249    my $filename = &util::filename_cat($base_dir,$file); 
    251250    $self->extract_metadata_from_postscript($filename, $doc_obj); 
    252251 
    253     return $self->process_type("ps",$base_dir,$file,$doc_obj); 
     252    return $self->SUPER::process(@_); 
     253 
    254254} 
    255255 
  • gsdl/trunk/perllib/plugins/PagedImagePlugin.pm

    r15865 r15872  
    11########################################################################### 
    22# 
    3 # PagedImgPlug.pm -- plugin for sets of images and OCR text that 
     3# PagedImagePlugin.pm -- plugin for sets of images and OCR text that 
    44#  make up a document 
    55# A component of the Greenstone digital library software 
     
    2525########################################################################### 
    2626 
    27 # PagedImgPlug 
     27# PagedImagePlugin 
    2828# processes sequences of images, with optional OCR text 
    2929# 
     
    116116# To have it create medium size images for display, use the '-screenview' 
    117117# option. As usual, running  
    118 # 'perl -S pluginfo.pl PagedImgPlug' will list all the options. 
     118# 'perl -S pluginfo.pl PagedImagePlugin' will list all the options. 
    119119 
    120120# If you want the resulting documents to be presented with a table of  
     
    132132# FileName (only for document level metadata). 
    133133 
    134 package PagedImgPlug; 
    135  
    136 use XMLPlug; 
     134package PagedImagePlugin; 
     135 
     136use ReadXMLFile; 
     137use ReadTextFile; 
     138use ImageConverter; 
     139 
    137140use strict; 
    138141no strict 'refs'; # allow filehandles to be variables and viceversa 
    139142 
    140143sub BEGIN { 
    141     @PagedImgPlug::ISA = ('XMLPlug'); 
     144    @PagedImagePlugin::ISA = ('ReadXMLFile', 'ReadTextFile', 'ImageConverter'); 
    142145} 
    143146 
    144147my $type_list = 
    145148    [ { 'name' => "paged", 
    146         'desc' => "{PagedImgPlug.documenttype.paged}" }, 
     149        'desc' => "{PagedImagePlugin.documenttype.paged}" }, 
    147150      { 'name' => "hierarchy", 
    148         'desc' => "{PagedImgPlug.documenttype.hierarchy}" } ]; 
     151        'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ]; 
    149152 
    150153my $arguments = 
     
    163166    'type' => "string",  
    164167    'deft' => "" }, 
    165       { 'name' => "noscaleup", 
    166     'desc' => "{ImagePlug.noscaleup}", 
    167     'type' => "flag", 
    168     'reqd' => "no" }, 
    169       { 'name' => "thumbnail", 
    170     'desc' => "{PagedImgPlug.thumbnail}", 
    171     'type' => "flag", 
    172     'reqd' => "no" }, 
    173       { 'name' => "thumbnailsize", 
    174     'desc' => "{ImagePlug.thumbnailsize}", 
    175     'type' => "int", 
    176     'deft' => "100", 
    177     'range' => "1,", 
    178     'reqd' => "no" }, 
    179       { 'name' => "thumbnailtype", 
    180     'desc' => "{ImagePlug.thumbnailtype}", 
    181     'type' => "string", 
    182     'deft' => "gif", 
    183     'reqd' => "no" }, 
    184       { 'name' => "screenview", 
    185     'desc' => "{PagedImgPlug.screenview}", 
    186     'type' => "flag", 
    187     'reqd' => "no" }, 
    188       { 'name' => "screenviewsize", 
    189     'desc' => "{PagedImgPlug.screenviewsize}", 
    190     'type' => "int", 
    191         'deft' => "500", 
    192         'range' => "1,", 
    193     'reqd' => "no" }, 
    194       { 'name' => "screenviewtype", 
    195     'desc' => "{PagedImgPlug.screenviewtype}", 
    196     'type' => "string", 
    197     'deft' => "jpg", 
    198     'reqd' => "no" }, 
    199       { 'name' => "converttotype", 
    200     'desc' => "{ImagePlug.converttotype}", 
    201     'type' => "string", 
    202     'deft' => "", 
    203     'reqd' => "no" }, 
    204       { 'name' => "minimumsize", 
    205     'desc' => "{ImagePlug.minimumsize}", 
    206     'type' => "int", 
    207     'deft' => "100", 
    208     'range' => "1,", 
    209     'reqd' => "no" }, 
    210168      { 'name' => "headerpage", 
    211     'desc' => "{PagedImgPlug.headerpage}", 
     169    'desc' => "{PagedImagePlugin.headerpage}", 
    212170    'type' => "flag", 
    213171    'reqd' => "no" }, 
    214172      { 'name' => "documenttype", 
    215     'desc' => "{PagedImgPlug.documenttype}", 
     173    'desc' => "{PagedImagePlugin.documenttype}", 
    216174    'type' => "enum", 
    217175    'list' => $type_list, 
     
    220178 
    221179 
    222 my $options = { 'name'     => "PagedImgPlug", 
    223         'desc'     => "{PagedImgPlug.desc}", 
     180my $options = { 'name'     => "PagedImagePlugin", 
     181        'desc'     => "{PagedImagePlugin.desc}", 
    224182        'abstract' => "no", 
    225183        'inherits' => "yes", 
     
    231189    push(@$pluginlist, $class); 
    232190 
    233     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 
    234     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 
    235      
    236     my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 
     191    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
     192    push(@{$hashArgOptLists->{"OptList"}},$options); 
     193     
     194    new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 
     195    new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 
     196    my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 
    237197     
    238198    return bless $self, $class; 
     199} 
     200 
     201 
     202sub init { 
     203    my $self = shift (@_); 
     204    my ($verbosity, $outhandle, $failhandle) = @_; 
     205 
     206    $self->SUPER::init(@_); 
     207    $self->ImageConverter::init(); 
    239208} 
    240209 
     
    260229} 
    261230 
    262 # Create the thumbnail and screenview images, and discover the Image's 
    263 # size, width, and height using the convert utility. 
     231sub rotate_image { 
     232    my $self = shift (@_); 
     233    my ($filename_full_path) = @_; 
     234     
     235    my ($this_filetype) = $filename_full_path =~ /\.([^\.]*)$/; 
     236    my $result = $self->convert($filename_full_path, $this_filetype, "-rotate 180", "ROTATE"); 
     237    my ($new_filename) = ($result =~ /=>(.*\.$this_filetype)/); 
     238    if (-e "$new_filename") { 
     239    return $new_filename; 
     240    } 
     241    # somethings gone wrong 
     242    return $filename_full_path; 
     243 
     244} 
     245 
    264246sub process_image { 
    265     my $self = shift (@_); 
    266     my $filename = shift (@_); # filename with full path 
    267     my $srcfile = shift (@_); # filename without path 
    268     my $doc_obj = shift (@_); 
    269     my $section = shift (@_); #the current section 
    270     my $rotate = shift (@_); # whether to rotate the image or not 
    271     $rotate = 0 unless defined $rotate; 
    272      
    273     # check that the image file exists!! 
    274     if (!-f $filename) { 
    275     print "PagedImgPlug: ERROR: File $filename does not exist, skipping\n"; 
    276     return 0; 
    277     } 
    278  
    279     my $top=0; 
    280     if ($section eq $doc_obj->get_top_section()) { 
    281     $top=1; 
    282     } 
    283     my $verbosity = $self->{'verbosity'}; 
    284     my $outhandle = $self->{'outhandle'}; 
    285  
    286     # check the filename is okay 
    287     return 0 if ($srcfile eq "" || $filename eq ""); 
    288      
    289