Ignore:
Timestamp:
2008-06-05T09:29:32+12:00 (16 years ago)
Author:
kjdon
Message:

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm

    r15865 r15872  
    11###########################################################################
    22#
    3 # CONTENTdmPlug.pm -- reasonably with-it pdf plugin
     3# CONTENTdmPlugin.pm -- reasonably with-it pdf plugin
    44# A component of the Greenstone digital library software
    55# from the New Zealand Digital Library Project at the
     
    2323#
    2424###########################################################################
    25 package CONTENTdmPlug;
    26 
    27 use ConvertToPlug;
     25package CONTENTdmPlugin;
     26
     27use ConvertBinaryFile;
     28use ReadXMLFile;
     29
    2830use unicode;
    2931use ghtml;
     
    3537use XMLParser;
    3638
     39# inherit ReadXMLFile for the apply_xslt method
    3740sub BEGIN {
    38     @CONTENTdmPlug::ISA = ('ConvertToPlug');
     41    @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile');
    3942}
    4043
     
    4245my $convert_to_list =
    4346    [ { 'name' => "auto",
    44     'desc' => "{ConvertToPlug.convert_to.auto}" },
     47    'desc' => "{ConvertBinaryFile.convert_to.auto}" },
    4548      { 'name' => "html",
    46     'desc' => "{ConvertToPlug.convert_to.html}" },
     49    'desc' => "{ConvertBinaryFile.convert_to.html}" },
    4750      { 'name' => "text",
    48     'desc' => "{ConvertToPlug.convert_to.text}" },
     51    'desc' => "{ConvertBinaryFile.convert_to.text}" },
    4952      { 'name' => "pagedimg",
    50     'desc' => "{ConvertToPlug.convert_to.pagedimg}"},
     53    'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"},
    5154      ];
    5255
     
    5659      [
    5760       { 'name' => "convert_to",
    58     'desc' => "{ConvertToPlug.convert_to}",
     61    'desc' => "{ConvertBinaryFile.convert_to}",
    5962    'type' => "enum",
    6063    'reqd' => "yes",
     
    6265    'deft' => "html" }, 
    6366      { 'name' => "xslt",
    64     'desc' => "{XMLPlug.xslt}",
     67    'desc' => "{ReadXMLFile.xslt}",
    6568    'type' => "string",
    6669    'deft' => "",
    6770    'reqd' => "no" },
    6871       { 'name' => "process_exp",
    69     'desc' => "{BasPlug.process_exp}",
     72    'desc' => "{BasePlugin.process_exp}",
    7073    'type' => "regexp",
    7174    'deft' => &get_default_process_exp(),
    7275    'reqd' => "no" },
    7376      { 'name' => "block_exp",
    74     'desc' => "{BasPlug.block_exp}",
     77    'desc' => "{BasePlugin.block_exp}",
    7578    'type' => "regexp",
    7679    'deft' => &get_default_block_exp() }
    7780];
    7881
    79 my $options = { 'name'     => "CONTENTdmPlug",
    80         'desc'     => "{CONTENTdmPlug.desc}",
     82my $options = { 'name'     => "CONTENTdmPlugin",
     83        'desc'     => "{CONTENTdmPlugin.desc}",
    8184        'abstract' => "no",
    8285        'inherits' => "yes",
    83         # CONTENTdmPlug is one of the few ConvertToPlug subclasses whose source doc can't be replaced by a GS-generated html
     86        # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html
    8487        'srcreplaceable' => "no",
    8588        'args'     => $arguments };
    86 
    87 our ($self);
    8889
    8990sub new {
     
    9596    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    9697
    97     if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    98     if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
     98    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
     99    push(@{$hashArgOptLists->{"OptList"}},$options);
    99100
    100101    my @arg_array = @$inputargs;
    101     $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
     102    my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists);
    102103   
    103104    if ($self->{'info_only'}) {
     
    107108
    108109    my $parser = new XML::Parser('Style' => 'Stream',
     110                 'Pkg' => 'CONTENTdmPlugin',
     111                 'PluginObj' => $self,
    109112                 'Handlers' => {'Char' => \&Char,
    110113                        'XMLDecl' => \&XMLDecl,
     
    119122    $self->{'metadata_value'} = undef;
    120123
    121     $self->{'convert_to'} = "PagedImg";
     124    $self->{'convert_to'} = "PagedImage";
    122125    my $secondary_plugin_options = $self->{'secondary_plugin_options'};
    123126
    124     if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
    125     $secondary_plugin_options->{'PagedImgPlug'} = [];
    126     my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
    127     push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    128     push(@$pagedimg_options, "-thumbnail", "-screenview");
    129     }
     127    if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){
     128    $secondary_plugin_options->{'PagedImagePlugin'} = [];
     129    }
     130    my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'};
     131    push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     132    push(@$pagedimg_options, "-thumbnail", "-screenview");
     133
    130134
    131135    $self = bless $self, $class;
     
    140144}
    141145
    142 # so we don't inherit HTMLPlug's block exp...
    143146sub get_default_block_exp {
    144147    return q^(?i)\.(jpg|jpeg|gif)$^;
    145 }
    146 
    147 
    148 
    149 
    150 # A smarter (?) option would be to add XMLPlug into inheritence above
    151 # thereby avoiding a fair amount of code duplication
    152 
    153 sub apply_xslt
    154 {
    155     my $self = shift @_;
    156     my ($xslt,$filename) = @_;
    157    
    158     my $outhandle = $self->{'outhandle'};
    159 
    160     my $xslt_filename = $xslt;
    161 
    162     if (! -e $xslt_filename) {
    163     # Look in main site directory
    164     my $gsdlhome = $ENV{'GSDLHOME'};
    165     $xslt_filename = &util::filename_cat($gsdlhome,$xslt);
    166     }
    167 
    168     if (! -e $xslt_filename) {
    169     # Look in collection directory
    170     my $coldir = $ENV{'GSDLCOLLECTDIR'};
    171     $xslt_filename = &util::filename_cat($coldir,$xslt);
    172     }
    173 
    174     if (! -e $xslt_filename) {
    175     print $outhandle "Warning: Unable to find XSLT $xslt\n";
    176     if (open(XMLIN,"<$filename")) {
    177 
    178         my $untransformed_xml = "";
    179         while (defined (my $line = <XMLIN>)) {
    180 
    181         $untransformed_xml .= $line;
    182         }
    183         close(XMLIN);
    184        
    185         return $untransformed_xml;
    186     }
    187     else {
    188         print $outhandle "Error: Unable to open file $filename\n";
    189         print $outhandle "       $!\n";
    190         return "";
    191     }
    192    
    193     }
    194 
    195     my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");
    196     my $jar_filename = &util::filename_cat($bin_java,"xalan.jar");
    197     my $xslt_base_cmd = "java -jar $jar_filename";
    198     my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\"";
    199 
    200     my $transformed_xml = "";
    201 
    202     if (open(XSLT_IN,"$xslt_cmd |")) {
    203     while (defined (my $line = <XSLT_IN>)) {
    204 
    205         $transformed_xml .= $line;
    206     }
    207     close(XSLT_IN);
    208     }
    209     else {
    210     print $outhandle "Error: Unable to run command $xslt_cmd\n";
    211     print $outhandle "       $!\n";
    212     }
    213 
    214     return $transformed_xml;
    215 
    216148}
    217149
     
    523455
    524456
    525 # Override ConvertToPlug tmp_area_convert_file() to provide solution specific
     457# Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific
    526458# to CONTENTdm
    527459#
     
    612544    $self->{'converted_to'} = "HTML";
    613545    } elsif ($output_type =~ /te?xt/i) {
    614     $self->{'converted_to'} = "TEXT";
     546    $self->{'converted_to'} = "Text";
    615547    } elsif ($output_type =~ /item/i){
    616     $self->{'converted_to'} = "PagedImg";
     548    $self->{'converted_to'} = "PagedImage";
    617549    }
    618550
     
    624556
    625557
    626 # Override ConvertToPlug read
     558# Override ConvertBinaryFile read
    627559# Needed so multiple .item files generate are sent down secondary plugin
    628560
     
    640572    my ($block_status,$filename) = $self->read_block(@_);
    641573    return $block_status if ((!defined $block_status) || ($block_status==0));
    642     $file = $self->read_tidy_file($file);
    643    
    644     # read() deviates at this point from ConvertToPlug
     574    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
     575       
     576    # read() deviates at this point from ConvertBinaryFile
    645577    # Need to work with list of filename returned
    646578
     
    693625   
    694626    my ($filemeta) = $file =~ /([^\\\/]+)$/;
    695     $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));
     627    $self->set_Source_metadata($doc_obj, $filemeta);
    696628    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
    697629    $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename));
     
    713645
    714646    # add an OID
    715     $doc_obj->set_OID();
     647    $self->add_OID($doc_obj);
    716648    # process the document
    717649    $processor->process($doc_obj);
     
    723655}
    724656
    725 
    726 
    727657sub process {
    728     my $self = shift (@_);
    729     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     658
     659}
     660# do we need this? sec pluginn process would have already been called as part of read_into_doc_obj??
     661sub process_old {
     662    my $self = shift (@_);
     663    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    730664
    731665   
     
    741675}
    742676
    743 
    744 
    745 
    746 sub StartDocument {$self->xml_start_document(@_);}
    747 sub XMLDecl {$self->xml_xmldecl(@_);}
    748 sub Entity {$self->xml_entity(@_);}
    749 sub Doctype {$self->xml_doctype(@_);}
    750 sub StartTag {$self->xml_start_tag(@_);}
    751 sub EndTag {$self->xml_end_tag(@_);}
    752 sub Text {$self->xml_text(@_);}
    753 sub PI {$self->xml_pi(@_);}
    754 sub EndDocument {$self->xml_end_document(@_);}
    755 sub Default {$self->xml_default(@_);}
    756 
    757 # This Char function overrides the one in XML::Parser::Stream to overcome a
    758 # problem where $expat->{Text} is treated as the return value, slowing
    759 # things down significantly in some cases.
    760 sub Char {
    761     use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+
    762     $_[0]->{'Text'} .= $_[1];
    763     return undef;
    764 }
    765677
    766678# Called at the beginning of the XML document.
     
    772684}
    773685
    774 # Called for XML declarations
    775 sub xml_xmldecl {
    776     my $self = shift(@_);
    777     my ($expat, $version, $encoding, $standalone) = @_;
    778 }
    779 
    780 # Called for XML entities
    781 sub xml_entity {
    782   my $self = shift(@_);
    783   my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;
    784 }
    785686
    786687# Called for DOCTYPE declarations - use die to bail out if this doctype
     
    793694
    794695    my $outhandle = $self->{'outhandle'};
    795     print $outhandle "CONTENTdmPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
     696    print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
    796697
    797698}
     
    873774}
    874775
    875 # Called for processing instructions. The $_ variable will contain a copy
    876 # of the pi.
    877 sub xml_pi {
    878     my $self = shift(@_);
    879     my ($expat, $target, $data) = @_;
    880 }
    881 
    882776# Called at the end of the XML document.
    883777sub xml_end_document {
     
    887781}
    888782
    889 # Called for any characters not handled by the above functions.
    890 sub xml_default {
    891     my $self = shift(@_);
    892     my ($expat, $text) = @_;
    893 }
    894 
    895783
    8967841;
Note: See TracChangeset for help on using the changeset viewer.