Changeset 15872 for gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # CONTENTdmPlug .pm -- reasonably with-it pdf plugin3 # CONTENTdmPlugin.pm -- reasonably with-it pdf plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 23 23 # 24 24 ########################################################################### 25 package CONTENTdmPlug; 26 27 use ConvertToPlug; 25 package CONTENTdmPlugin; 26 27 use ConvertBinaryFile; 28 use ReadXMLFile; 29 28 30 use unicode; 29 31 use ghtml; … … 35 37 use XMLParser; 36 38 39 # inherit ReadXMLFile for the apply_xslt method 37 40 sub BEGIN { 38 @CONTENTdmPlug ::ISA = ('ConvertToPlug');41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile'); 39 42 } 40 43 … … 42 45 my $convert_to_list = 43 46 [ { 'name' => "auto", 44 'desc' => "{Convert ToPlug.convert_to.auto}" },47 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 45 48 { 'name' => "html", 46 'desc' => "{Convert ToPlug.convert_to.html}" },49 'desc' => "{ConvertBinaryFile.convert_to.html}" }, 47 50 { 'name' => "text", 48 'desc' => "{Convert ToPlug.convert_to.text}" },51 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 49 52 { 'name' => "pagedimg", 50 'desc' => "{Convert ToPlug.convert_to.pagedimg}"},53 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"}, 51 54 ]; 52 55 … … 56 59 [ 57 60 { 'name' => "convert_to", 58 'desc' => "{Convert ToPlug.convert_to}",61 'desc' => "{ConvertBinaryFile.convert_to}", 59 62 'type' => "enum", 60 63 'reqd' => "yes", … … 62 65 'deft' => "html" }, 63 66 { 'name' => "xslt", 64 'desc' => "{ XMLPlug.xslt}",67 'desc' => "{ReadXMLFile.xslt}", 65 68 'type' => "string", 66 69 'deft' => "", 67 70 'reqd' => "no" }, 68 71 { 'name' => "process_exp", 69 'desc' => "{Bas Plug.process_exp}",72 'desc' => "{BasePlugin.process_exp}", 70 73 'type' => "regexp", 71 74 'deft' => &get_default_process_exp(), 72 75 'reqd' => "no" }, 73 76 { 'name' => "block_exp", 74 'desc' => "{Bas Plug.block_exp}",77 'desc' => "{BasePlugin.block_exp}", 75 78 'type' => "regexp", 76 79 'deft' => &get_default_block_exp() } 77 80 ]; 78 81 79 my $options = { 'name' => "CONTENTdmPlug ",80 'desc' => "{CONTENTdmPlug .desc}",82 my $options = { 'name' => "CONTENTdmPlugin", 83 'desc' => "{CONTENTdmPlugin.desc}", 81 84 'abstract' => "no", 82 85 'inherits' => "yes", 83 # CONTENTdmPlug is one of the few ConvertToPlugsubclasses whose source doc can't be replaced by a GS-generated html86 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html 84 87 'srcreplaceable' => "no", 85 88 'args' => $arguments }; 86 87 our ($self);88 89 89 90 sub new { … … 95 96 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 96 97 97 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}98 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 99 push(@{$hashArgOptLists->{"OptList"}},$options); 99 100 100 101 my @arg_array = @$inputargs; 101 $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);102 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists); 102 103 103 104 if ($self->{'info_only'}) { … … 107 108 108 109 my $parser = new XML::Parser('Style' => 'Stream', 110 'Pkg' => 'CONTENTdmPlugin', 111 'PluginObj' => $self, 109 112 'Handlers' => {'Char' => \&Char, 110 113 'XMLDecl' => \&XMLDecl, … … 119 122 $self->{'metadata_value'} = undef; 120 123 121 $self->{'convert_to'} = "PagedIm g";124 $self->{'convert_to'} = "PagedImage"; 122 125 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 123 126 124 if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 125 $secondary_plugin_options->{'PagedImgPlug'} = []; 126 my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 127 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 128 push(@$pagedimg_options, "-thumbnail", "-screenview"); 129 } 127 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 128 $secondary_plugin_options->{'PagedImagePlugin'} = []; 129 } 130 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 131 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 132 push(@$pagedimg_options, "-thumbnail", "-screenview"); 133 130 134 131 135 $self = bless $self, $class; … … 140 144 } 141 145 142 # so we don't inherit HTMLPlug's block exp...143 146 sub get_default_block_exp { 144 147 return q^(?i)\.(jpg|jpeg|gif)$^; 145 }146 147 148 149 150 # A smarter (?) option would be to add XMLPlug into inheritence above151 # thereby avoiding a fair amount of code duplication152 153 sub apply_xslt154 {155 my $self = shift @_;156 my ($xslt,$filename) = @_;157 158 my $outhandle = $self->{'outhandle'};159 160 my $xslt_filename = $xslt;161 162 if (! -e $xslt_filename) {163 # Look in main site directory164 my $gsdlhome = $ENV{'GSDLHOME'};165 $xslt_filename = &util::filename_cat($gsdlhome,$xslt);166 }167 168 if (! -e $xslt_filename) {169 # Look in collection directory170 my $coldir = $ENV{'GSDLCOLLECTDIR'};171 $xslt_filename = &util::filename_cat($coldir,$xslt);172 }173 174 if (! -e $xslt_filename) {175 print $outhandle "Warning: Unable to find XSLT $xslt\n";176 if (open(XMLIN,"<$filename")) {177 178 my $untransformed_xml = "";179 while (defined (my $line = <XMLIN>)) {180 181 $untransformed_xml .= $line;182 }183 close(XMLIN);184 185 return $untransformed_xml;186 }187 else {188 print $outhandle "Error: Unable to open file $filename\n";189 print $outhandle " $!\n";190 return "";191 }192 193 }194 195 my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");196 my $jar_filename = &util::filename_cat($bin_java,"xalan.jar");197 my $xslt_base_cmd = "java -jar $jar_filename";198 my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\"";199 200 my $transformed_xml = "";201 202 if (open(XSLT_IN,"$xslt_cmd |")) {203 while (defined (my $line = <XSLT_IN>)) {204 205 $transformed_xml .= $line;206 }207 close(XSLT_IN);208 }209 else {210 print $outhandle "Error: Unable to run command $xslt_cmd\n";211 print $outhandle " $!\n";212 }213 214 return $transformed_xml;215 216 148 } 217 149 … … 523 455 524 456 525 # Override Convert ToPlugtmp_area_convert_file() to provide solution specific457 # Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific 526 458 # to CONTENTdm 527 459 # … … 612 544 $self->{'converted_to'} = "HTML"; 613 545 } elsif ($output_type =~ /te?xt/i) { 614 $self->{'converted_to'} = "T EXT";546 $self->{'converted_to'} = "Text"; 615 547 } elsif ($output_type =~ /item/i){ 616 $self->{'converted_to'} = "PagedIm g";548 $self->{'converted_to'} = "PagedImage"; 617 549 } 618 550 … … 624 556 625 557 626 # Override Convert ToPlugread558 # Override ConvertBinaryFile read 627 559 # Needed so multiple .item files generate are sent down secondary plugin 628 560 … … 640 572 my ($block_status,$filename) = $self->read_block(@_); 641 573 return $block_status if ((!defined $block_status) || ($block_status==0)); 642 $file = $self->read_tidy_file($file);643 644 # read() deviates at this point from Convert ToPlug574 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 575 576 # read() deviates at this point from ConvertBinaryFile 645 577 # Need to work with list of filename returned 646 578 … … 693 625 694 626 my ($filemeta) = $file =~ /([^\\\/]+)$/; 695 $ doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));627 $self->set_Source_metadata($doc_obj, $filemeta); 696 628 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 697 629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename)); … … 713 645 714 646 # add an OID 715 $ doc_obj->set_OID();647 $self->add_OID($doc_obj); 716 648 # process the document 717 649 $processor->process($doc_obj); … … 723 655 } 724 656 725 726 727 657 sub process { 728 my $self = shift (@_); 729 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 658 659 } 660 # do we need this? sec pluginn process would have already been called as part of read_into_doc_obj?? 661 sub process_old { 662 my $self = shift (@_); 663 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 730 664 731 665 … … 741 675 } 742 676 743 744 745 746 sub StartDocument {$self->xml_start_document(@_);}747 sub XMLDecl {$self->xml_xmldecl(@_);}748 sub Entity {$self->xml_entity(@_);}749 sub Doctype {$self->xml_doctype(@_);}750 sub StartTag {$self->xml_start_tag(@_);}751 sub EndTag {$self->xml_end_tag(@_);}752 sub Text {$self->xml_text(@_);}753 sub PI {$self->xml_pi(@_);}754 sub EndDocument {$self->xml_end_document(@_);}755 sub Default {$self->xml_default(@_);}756 757 # This Char function overrides the one in XML::Parser::Stream to overcome a758 # problem where $expat->{Text} is treated as the return value, slowing759 # things down significantly in some cases.760 sub Char {761 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+762 $_[0]->{'Text'} .= $_[1];763 return undef;764 }765 677 766 678 # Called at the beginning of the XML document. … … 772 684 } 773 685 774 # Called for XML declarations775 sub xml_xmldecl {776 my $self = shift(@_);777 my ($expat, $version, $encoding, $standalone) = @_;778 }779 780 # Called for XML entities781 sub xml_entity {782 my $self = shift(@_);783 my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;784 }785 686 786 687 # Called for DOCTYPE declarations - use die to bail out if this doctype … … 793 694 794 695 my $outhandle = $self->{'outhandle'}; 795 print $outhandle "CONTENTdmPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;696 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 796 697 797 698 } … … 873 774 } 874 775 875 # Called for processing instructions. The $_ variable will contain a copy876 # of the pi.877 sub xml_pi {878 my $self = shift(@_);879 my ($expat, $target, $data) = @_;880 }881 882 776 # Called at the end of the XML document. 883 777 sub xml_end_document { … … 887 781 } 888 782 889 # Called for any characters not handled by the above functions.890 sub xml_default {891 my $self = shift(@_);892 my ($expat, $text) = @_;893 }894 895 783 896 784 1;
Note:
See TracChangeset
for help on using the changeset viewer.