Changeset 15872
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- Location:
- gsdl/trunk/perllib/plugins
- Files:
-
- 49 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/BibTexPlugin.pm
r15864 r15872 1 1 ########################################################################### 2 2 # 3 # BibTexPlug .pm - a plugin for bibliography records in BibTex format3 # BibTexPlugin.pm - a plugin for bibliography records in BibTex format 4 4 # 5 5 # A component of the Greenstone digital library software … … 27 27 28 28 29 # BibTexPlug reads bibliography files in BibTex format.29 # BibTexPlugin reads bibliography files in BibTex format. 30 30 # 31 31 # by Gordon W. Paynter ([email protected]), November 2000 32 32 # Based on ReferPlug. See ReferPlug for geneology. 33 33 # 34 # BibTexPlug creates a document object for every reference a the file.35 # It is a subclass of Split Plug, so if there are multiple records, all34 # BibTexPlugin creates a document object for every reference a the file. 35 # It is a subclass of SplitTextFile, so if there are multiple records, all 36 36 # are read. 37 37 # … … 42 42 43 43 44 package BibTexPlug ;45 46 use Split Plug;44 package BibTexPlugin; 45 46 use SplitTextFile; 47 47 use strict; 48 48 no strict 'refs'; # allow filehandles to be variables and viceversa 49 49 50 # BibTexPlug is a sub-class of BasPlug.50 # BibTexPlugin is a sub-class of SplitTextFile. 51 51 sub BEGIN { 52 @BibTexPlug ::ISA = ('SplitPlug');52 @BibTexPlugin::ISA = ('SplitTextFile'); 53 53 } 54 54 55 55 my $arguments = 56 56 [ { 'name' => "process_exp", 57 'desc' => "{Bas Plug.process_exp}",57 'desc' => "{BasePlugin.process_exp}", 58 58 'type' => "regexp", 59 59 'reqd' => "no", 60 60 'deft' => &get_default_process_exp() }, 61 61 { 'name' => "split_exp", 62 'desc' => "{Split Plug.split_exp}",62 'desc' => "{SplitTextFile.split_exp}", 63 63 'type' => "regexp", 64 64 'deft' => &get_default_split_exp(), … … 66 66 ]; 67 67 68 my $options = { 'name' => "BibTexPlug ",69 'desc' => "{BibTexPlug .desc}",68 my $options = { 'name' => "BibTexPlugin", 69 'desc' => "{BibTexPlugin.desc}", 70 70 'abstract' => "no", 71 71 'inherits' => "yes", … … 82 82 return q^\n+(?=@)^; 83 83 } 84 84 85 sub new { 85 86 my ($class) = shift (@_); … … 87 88 push(@$pluginlist, $class); 88 89 89 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}90 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};91 92 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);90 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 91 push(@{$hashArgOptLists->{"OptList"}},$options); 92 93 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 93 94 94 95 return bless $self, $class; … … 115 116 116 117 # Report that we're processing the file 117 print STDERR "<Processing n='$file' p='BibTexPlug '>\n" if ($gli);118 print $outhandle "BibTexPlug : processing $file\n"118 print STDERR "<Processing n='$file' p='BibTexPlugin'>\n" if ($gli); 119 print $outhandle "BibTexPlugin: processing $file\n" 119 120 if ($self->{'verbosity'}) > 1; 120 121 … … 312 313 $vonlast=shift @parts; 313 314 if (scalar(@parts) > 0) { 314 print $outhandle "BibTexPlug : couldn't parse name $a\n";315 print $outhandle "BibTexPlugin: couldn't parse name $a\n"; 315 316 # but we continue anyway... 316 317 } … … 331 332 # some non-English names do start with lowercase 332 333 # eg "Marie desJardins". Also we can get typos... 333 print $outhandle "BibTexPlug : couldn't parse surname $vonlast\n";334 print $outhandle "BibTexPlugin: couldn't parse surname $vonlast\n"; 334 335 $von=""; 335 336 if ($vonlast =~ /^[a-z]+$/) { … … 724 725 my $replacement=$utf8_chars{$tex}; 725 726 if (!defined($replacement)) { 726 print STDERR "BibTexPlug : Warning: unknown latex accent \"$tex\" in \"$text\"\n";727 print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 727 728 $replacement=$char; 728 729 } … … 737 738 my $replacement=$special_utf8_chars{$tex}; 738 739 if (!defined($replacement)) { 739 print STDERR "BibTexPlug : Warning: unknown latex accent \"$tex\" in \"$text\"\n";740 print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 740 741 $replacement=$tex; 741 742 } … … 749 750 my $replacement=$special_utf8_chars{$tex}; 750 751 if (!defined($replacement)) { 751 print STDERR "BibTexPlug : Warning: unknown latex accent \"$tex\" in \"$text\"\n";752 print STDERR "BibTexPlugin: Warning: unknown latex accent \"$tex\" in \"$text\"\n"; 752 753 $replacement=$char; 753 754 } -
gsdl/trunk/perllib/plugins/BookPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # BookPlug .pm (formally called HBSPlug) -- plugin for processing simple3 # BookPlugin.pm (formally called HBSPlug) -- plugin for processing simple 4 4 # html (or text) books 5 5 # … … 40 40 # taken as the cover image (jpg files are blocked by this plugin) 41 41 42 # BookPlug is a simplification (and extension) of the HBPlug used43 # by the Humanity Library collections. BookPlug is faster as it expects42 # BookPlugin is a simplification (and extension) of the HBPlug used 43 # by the Humanity Library collections. BookPlugin is faster as it expects 44 44 # the input files to be cleaner (The input to the HDL collections 45 45 # contains lots of excess html tags around <<TOC>> tags, uses <<I>> … … 49 49 # use this plugin instead of HBPlug. 50 50 51 # 12/05/02 Added usage datastructure - John Thompson 52 53 package BookPlug; 54 55 use BasPlug; 51 package BookPlugin; 52 53 use AutoExtractMetadata; 56 54 use util; 57 55 use strict; … … 59 57 60 58 sub BEGIN { 61 @BookPlug ::ISA = ('BasPlug');59 @BookPlugin::ISA = ('AutoExtractMetadata'); 62 60 } 63 61 64 62 my $arguments = 65 63 [ { 'name' => "process_exp", 66 'desc' => "{Bas Plug.process_exp}",64 'desc' => "{BasePlugin.process_exp}", 67 65 'type' => "regexp", 68 66 'reqd' => "no", 69 67 'deft' => &get_default_process_exp() }, 70 68 { 'name' => "block_exp", 71 'desc' => "{Bas Plug.block_exp}",69 'desc' => "{BasePlugin.block_exp}", 72 70 'type' => "regexp", 73 71 'reqd' => "no", 74 72 'deft' => &get_default_block_exp() } ]; 75 73 76 my $options = { 'name' => "BookPlug ",77 'desc' => "{BookPlug .desc}",74 my $options = { 'name' => "BookPlugin", 75 'desc' => "{BookPlugin.desc}", 78 76 'abstract' => "no", 79 77 'inherits' => "yes", … … 85 83 push(@$pluginlist, $class); 86 84 87 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}88 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};89 90 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);85 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 86 push(@{$hashArgOptLists->{"OptList"}},$options); 87 88 my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists); 91 89 92 90 return bless $self, $class; … … 111 109 my $outhandle = $self->{'outhandle'}; 112 110 113 print STDERR "<Processing n='$file' p='BookPlug '>\n" if ($gli);114 print $outhandle "BookPlug : processing $file\n"111 print STDERR "<Processing n='$file' p='BookPlugin'>\n" if ($gli); 112 print $outhandle "BookPlugin: processing $file\n" 115 113 if $self->{'verbosity'} > 1; 116 114 … … 211 209 if ($imagetype eq "jpg") {$imagetype = "jpeg";} 212 210 if ($imagetype !~ /^(jpeg|gif|png)$/) { 213 print $outhandle "BookPlug : Warning - unknown image type ($imagetype)\n";211 print $outhandle "BookPlugin: Warning - unknown image type ($imagetype)\n"; 214 212 } 215 213 my ($imagefile) = $link =~ /([^\/]*)$/; … … 223 221 $foundimage = 1; 224 222 } else { 225 $error = "BookPlug : Warning - couldn't find image file $imagefile in either $filename or";223 $error = "BookPlugin: Warning - couldn't find image file $imagefile in either $filename or"; 226 224 } 227 225 } … … 235 233 print $outhandle "$error $filename\n"; 236 234 } else { 237 print $outhandle "BookPlug : Warning - couldn't find image file $imagefile in $filename\n";235 print $outhandle "BookPlugin: Warning - couldn't find image file $imagefile in $filename\n"; 238 236 } 239 237 } -
gsdl/trunk/perllib/plugins/CONTENTdmPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # CONTENTdmPlug .pm -- reasonably with-it pdf plugin3 # CONTENTdmPlugin.pm -- reasonably with-it pdf plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 23 23 # 24 24 ########################################################################### 25 package CONTENTdmPlug; 26 27 use ConvertToPlug; 25 package CONTENTdmPlugin; 26 27 use ConvertBinaryFile; 28 use ReadXMLFile; 29 28 30 use unicode; 29 31 use ghtml; … … 35 37 use XMLParser; 36 38 39 # inherit ReadXMLFile for the apply_xslt method 37 40 sub BEGIN { 38 @CONTENTdmPlug ::ISA = ('ConvertToPlug');41 @CONTENTdmPlugin::ISA = ('ConvertBinaryFile', 'ReadXMLFile'); 39 42 } 40 43 … … 42 45 my $convert_to_list = 43 46 [ { 'name' => "auto", 44 'desc' => "{Convert ToPlug.convert_to.auto}" },47 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 45 48 { 'name' => "html", 46 'desc' => "{Convert ToPlug.convert_to.html}" },49 'desc' => "{ConvertBinaryFile.convert_to.html}" }, 47 50 { 'name' => "text", 48 'desc' => "{Convert ToPlug.convert_to.text}" },51 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 49 52 { 'name' => "pagedimg", 50 'desc' => "{Convert ToPlug.convert_to.pagedimg}"},53 'desc' => "{ConvertBinaryFile.convert_to.pagedimg}"}, 51 54 ]; 52 55 … … 56 59 [ 57 60 { 'name' => "convert_to", 58 'desc' => "{Convert ToPlug.convert_to}",61 'desc' => "{ConvertBinaryFile.convert_to}", 59 62 'type' => "enum", 60 63 'reqd' => "yes", … … 62 65 'deft' => "html" }, 63 66 { 'name' => "xslt", 64 'desc' => "{ XMLPlug.xslt}",67 'desc' => "{ReadXMLFile.xslt}", 65 68 'type' => "string", 66 69 'deft' => "", 67 70 'reqd' => "no" }, 68 71 { 'name' => "process_exp", 69 'desc' => "{Bas Plug.process_exp}",72 'desc' => "{BasePlugin.process_exp}", 70 73 'type' => "regexp", 71 74 'deft' => &get_default_process_exp(), 72 75 'reqd' => "no" }, 73 76 { 'name' => "block_exp", 74 'desc' => "{Bas Plug.block_exp}",77 'desc' => "{BasePlugin.block_exp}", 75 78 'type' => "regexp", 76 79 'deft' => &get_default_block_exp() } 77 80 ]; 78 81 79 my $options = { 'name' => "CONTENTdmPlug ",80 'desc' => "{CONTENTdmPlug .desc}",82 my $options = { 'name' => "CONTENTdmPlugin", 83 'desc' => "{CONTENTdmPlugin.desc}", 81 84 'abstract' => "no", 82 85 'inherits' => "yes", 83 # CONTENTdmPlug is one of the few ConvertToPlugsubclasses whose source doc can't be replaced by a GS-generated html86 # CONTENTdmPlugin is one of the few ConvertBinaryFile subclasses whose source doc can't be replaced by a GS-generated html 84 87 'srcreplaceable' => "no", 85 88 'args' => $arguments }; 86 87 our ($self);88 89 89 90 sub new { … … 95 96 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 96 97 97 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}98 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};98 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 99 push(@{$hashArgOptLists->{"OptList"}},$options); 99 100 100 101 my @arg_array = @$inputargs; 101 $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);102 my $self = new ConvertBinaryFile($pluginlist,$inputargs,$hashArgOptLists); 102 103 103 104 if ($self->{'info_only'}) { … … 107 108 108 109 my $parser = new XML::Parser('Style' => 'Stream', 110 'Pkg' => 'CONTENTdmPlugin', 111 'PluginObj' => $self, 109 112 'Handlers' => {'Char' => \&Char, 110 113 'XMLDecl' => \&XMLDecl, … … 119 122 $self->{'metadata_value'} = undef; 120 123 121 $self->{'convert_to'} = "PagedIm g";124 $self->{'convert_to'} = "PagedImage"; 122 125 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 123 126 124 if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 125 $secondary_plugin_options->{'PagedImgPlug'} = []; 126 my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 127 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 128 push(@$pagedimg_options, "-thumbnail", "-screenview"); 129 } 127 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 128 $secondary_plugin_options->{'PagedImagePlugin'} = []; 129 } 130 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 131 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 132 push(@$pagedimg_options, "-thumbnail", "-screenview"); 133 130 134 131 135 $self = bless $self, $class; … … 140 144 } 141 145 142 # so we don't inherit HTMLPlug's block exp...143 146 sub get_default_block_exp { 144 147 return q^(?i)\.(jpg|jpeg|gif)$^; 145 }146 147 148 149 150 # A smarter (?) option would be to add XMLPlug into inheritence above151 # thereby avoiding a fair amount of code duplication152 153 sub apply_xslt154 {155 my $self = shift @_;156 my ($xslt,$filename) = @_;157 158 my $outhandle = $self->{'outhandle'};159 160 my $xslt_filename = $xslt;161 162 if (! -e $xslt_filename) {163 # Look in main site directory164 my $gsdlhome = $ENV{'GSDLHOME'};165 $xslt_filename = &util::filename_cat($gsdlhome,$xslt);166 }167 168 if (! -e $xslt_filename) {169 # Look in collection directory170 my $coldir = $ENV{'GSDLCOLLECTDIR'};171 $xslt_filename = &util::filename_cat($coldir,$xslt);172 }173 174 if (! -e $xslt_filename) {175 print $outhandle "Warning: Unable to find XSLT $xslt\n";176 if (open(XMLIN,"<$filename")) {177 178 my $untransformed_xml = "";179 while (defined (my $line = <XMLIN>)) {180 181 $untransformed_xml .= $line;182 }183 close(XMLIN);184 185 return $untransformed_xml;186 }187 else {188 print $outhandle "Error: Unable to open file $filename\n";189 print $outhandle " $!\n";190 return "";191 }192 193 }194 195 my $bin_java = &util::filename_cat($ENV{'GSDLHOME'},"bin","java");196 my $jar_filename = &util::filename_cat($bin_java,"xalan.jar");197 my $xslt_base_cmd = "java -jar $jar_filename";198 my $xslt_cmd = "$xslt_base_cmd -IN \"$filename\" -XSL \"$xslt_filename\"";199 200 my $transformed_xml = "";201 202 if (open(XSLT_IN,"$xslt_cmd |")) {203 while (defined (my $line = <XSLT_IN>)) {204 205 $transformed_xml .= $line;206 }207 close(XSLT_IN);208 }209 else {210 print $outhandle "Error: Unable to run command $xslt_cmd\n";211 print $outhandle " $!\n";212 }213 214 return $transformed_xml;215 216 148 } 217 149 … … 523 455 524 456 525 # Override Convert ToPlugtmp_area_convert_file() to provide solution specific457 # Override ConvertBinaryFile tmp_area_convert_file() to provide solution specific 526 458 # to CONTENTdm 527 459 # … … 612 544 $self->{'converted_to'} = "HTML"; 613 545 } elsif ($output_type =~ /te?xt/i) { 614 $self->{'converted_to'} = "T EXT";546 $self->{'converted_to'} = "Text"; 615 547 } elsif ($output_type =~ /item/i){ 616 $self->{'converted_to'} = "PagedIm g";548 $self->{'converted_to'} = "PagedImage"; 617 549 } 618 550 … … 624 556 625 557 626 # Override Convert ToPlugread558 # Override ConvertBinaryFile read 627 559 # Needed so multiple .item files generate are sent down secondary plugin 628 560 … … 640 572 my ($block_status,$filename) = $self->read_block(@_); 641 573 return $block_status if ((!defined $block_status) || ($block_status==0)); 642 $file = $self->read_tidy_file($file);643 644 # read() deviates at this point from Convert ToPlug574 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 575 576 # read() deviates at this point from ConvertBinaryFile 645 577 # Need to work with list of filename returned 646 578 … … 693 625 694 626 my ($filemeta) = $file =~ /([^\\\/]+)$/; 695 $ doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));627 $self->set_Source_metadata($doc_obj, $filemeta); 696 628 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 697 629 $doc_obj->set_utf8_metadata_element($doc_obj->get_top_section(), "FileSize", (-s $filename)); … … 713 645 714 646 # add an OID 715 $ doc_obj->set_OID();647 $self->add_OID($doc_obj); 716 648 # process the document 717 649 $processor->process($doc_obj); … … 723 655 } 724 656 725 726 727 657 sub process { 728 my $self = shift (@_); 729 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 658 659 } 660 # do we need this? sec pluginn process would have already been called as part of read_into_doc_obj?? 661 sub process_old { 662 my $self = shift (@_); 663 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 730 664 731 665 … … 741 675 } 742 676 743 744 745 746 sub StartDocument {$self->xml_start_document(@_);}747 sub XMLDecl {$self->xml_xmldecl(@_);}748 sub Entity {$self->xml_entity(@_);}749 sub Doctype {$self->xml_doctype(@_);}750 sub StartTag {$self->xml_start_tag(@_);}751 sub EndTag {$self->xml_end_tag(@_);}752 sub Text {$self->xml_text(@_);}753 sub PI {$self->xml_pi(@_);}754 sub EndDocument {$self->xml_end_document(@_);}755 sub Default {$self->xml_default(@_);}756 757 # This Char function overrides the one in XML::Parser::Stream to overcome a758 # problem where $expat->{Text} is treated as the return value, slowing759 # things down significantly in some cases.760 sub Char {761 use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+762 $_[0]->{'Text'} .= $_[1];763 return undef;764 }765 677 766 678 # Called at the beginning of the XML document. … … 772 684 } 773 685 774 # Called for XML declarations775 sub xml_xmldecl {776 my $self = shift(@_);777 my ($expat, $version, $encoding, $standalone) = @_;778 }779 780 # Called for XML entities781 sub xml_entity {782 my $self = shift(@_);783 my ($expat, $name, $val, $sysid, $pubid, $ndata) = @_;784 }785 686 786 687 # Called for DOCTYPE declarations - use die to bail out if this doctype … … 793 694 794 695 my $outhandle = $self->{'outhandle'}; 795 print $outhandle "CONTENTdmPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;696 print $outhandle "CONTENTdmPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 796 697 797 698 } … … 873 774 } 874 775 875 # Called for processing instructions. The $_ variable will contain a copy876 # of the pi.877 sub xml_pi {878 my $self = shift(@_);879 my ($expat, $target, $data) = @_;880 }881 882 776 # Called at the end of the XML document. 883 777 sub xml_end_document { … … 887 781 } 888 782 889 # Called for any characters not handled by the above functions.890 sub xml_default {891 my $self = shift(@_);892 my ($expat, $text) = @_;893 }894 895 783 896 784 1; -
gsdl/trunk/perllib/plugins/CSVPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # CSVPlug .pm -- A plugin for files in comma-separated value format3 # CSVPlugin.pm -- A plugin for files in comma-separated value format 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 package CSVPlug ;27 package CSVPlugin; 28 28 29 29 30 use Split Plug;30 use SplitTextFile; 31 31 use strict; 32 32 no strict 'refs'; # allow filehandles to be variables and viceversa 33 33 34 34 35 # CSVPlug is a sub-class of SplitPlug.35 # CSVPlugin is a sub-class of SplitTextFile. 36 36 sub BEGIN { 37 @CSVPlug ::ISA = ('SplitPlug');37 @CSVPlugin::ISA = ('SplitTextFile'); 38 38 } 39 39 … … 41 41 my $arguments = 42 42 [ { 'name' => "process_exp", 43 'desc' => "{Bas Plug.process_exp}",43 'desc' => "{BasePlugin.process_exp}", 44 44 'type' => "regexp", 45 45 'reqd' => "no", 46 46 'deft' => &get_default_process_exp() }, 47 47 { 'name' => "split_exp", 48 'desc' => "{Split Plug.split_exp}",48 'desc' => "{SplitTextFile.split_exp}", 49 49 'type' => "regexp", 50 50 'reqd' => "no", … … 54 54 55 55 56 my $options = { 'name' => "CSVPlug ",57 'desc' => "{CSVPlug .desc}",56 my $options = { 'name' => "CSVPlugin", 57 'desc' => "{CSVPlugin.desc}", 58 58 'abstract' => "no", 59 59 'inherits' => "yes", … … 80 80 push(@$pluginlist, $class); 81 81 82 if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments});}83 if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options)};82 push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); 83 push(@{$hashArgOptLists->{"OptList"}}, $options); 84 84 85 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);85 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 86 86 87 87 return bless $self, $class; … … 98 98 open(FILE, $filename); 99 99 my $reader = new multiread(); 100 $reader->set_handle('CSVPlug ::FILE');100 $reader->set_handle('CSVPlugin::FILE'); 101 101 $reader->set_encoding($encoding); 102 102 $reader->read_file($textref); … … 144 144 145 145 # Report that we're processing the file 146 print STDERR "\n<Processing n='$file' p='CSVPlug '>\n" if ($gli);147 print $outhandle "CSVPlug : processing $file\n" if ($self->{'verbosity'}) > 1;146 print STDERR "\n<Processing n='$file' p='CSVPlugin'>\n" if ($gli); 147 print $outhandle "CSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1; 148 148 149 149 # Add the raw line as the document text -
gsdl/trunk/perllib/plugins/ConvertToRogPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ConvertToRogPlug .pm -- plugin that inherits from RogPlug3 # ConvertToRogPlugin.pm -- plugin that inherits from RogPlugin 4 4 # 5 5 # A component of the Greenstone digital library software … … 26 26 27 27 28 package ConvertToRogPlug; 29 30 use BasPlug; 31 use RogPlug; 28 package ConvertToRogPlugin; 29 30 use RogPlugin; 32 31 use strict; 33 32 no strict 'refs'; # allow filehandles to be variables and viceversa 34 33 35 34 sub BEGIN { 36 @ConvertToRogPlug ::ISA = ('RogPlug');35 @ConvertToRogPlugin::ISA = ('RogPlugin'); 37 36 } 38 37 39 38 my $arguments = [ 40 39 ]; 41 my $options = { 'name' => "ConvertToRogPlug ",42 'desc' => "{ConvertToRogPlug .desc}",40 my $options = { 'name' => "ConvertToRogPlugin", 41 'desc' => "{ConvertToRogPlugin.desc}", 43 42 'abstract' => "yes", 44 43 'inherits' => "yes" }; … … 49 48 push(@$pluginlist, $class); 50 49 51 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}52 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};53 54 my $self = new RogPlug ($pluginlist, $inputargs, $hashArgOptLists);50 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 51 push(@{$hashArgOptLists->{"OptList"}},$options); 52 53 my $self = new RogPlugin($pluginlist, $inputargs, $hashArgOptLists); 55 54 56 55 $self->{'convert_to'} = "Rog"; … … 171 170 172 171 173 # Exact copy of read_rog_record from RogPlug 172 # Exact copy of read_rog_record from RogPlugin 174 173 # Needed for FILE in right scope 175 174 … … 271 270 } 272 271 273 # Override RogPlug function so rog files are stored as sections (not docs)272 # Override RogPlugin function so rog files are stored as sections (not docs) 274 273 275 274 sub process_rog_record … … 307 306 308 307 309 # Override Bas Plugread308 # Override BasePlugin read 310 309 # We don't want to get language encoding stuff until after we've converted 311 310 # our file to Rog format … … 347 346 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 348 347 my ($filemeta) = $file =~ /([^\\\/]+)$/; 349 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta)); 348 $self->set_Source_metadata($doc_obj, $filemeta); 349 350 350 if ($self->{'cover_image'}) { 351 351 $self->associate_cover_image($doc_obj, $filename); … … 436 436 my $ret_val = 1; 437 437 438 # $ret_val = &RogPlug ::process($self, $textref, $pluginfo,438 # $ret_val = &RogPlugin::process($self, $textref, $pluginfo, 439 439 # $tmp_dirname, $tmp_tailname, 440 440 # $metadata, $doc_obj); -
gsdl/trunk/perllib/plugins/DBPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # DBPlug .pm -- plugin to import records from a database3 # DBPlugin.pm -- plugin to import records from a database 4 4 # 5 5 # A component of the Greenstone digital library software … … 34 34 # Mar, Apr 2003 35 35 36 package DBPlug ;36 package DBPlugin; 37 37 38 38 use strict; 39 39 no strict 'refs'; # allow variable as a filehandle 40 40 41 use BasPlug;41 use AutoExtractMetadata; 42 42 use unicode; 43 43 44 #use DBI; # database independent stuff45 46 44 sub BEGIN { 47 @DBPlug ::ISA = ('BasPlug');45 @DBPlugin::ISA = ('AutoExtractMetadata'); 48 46 } 49 47 50 48 my $arguments = 51 49 [ { 'name' => "process_exp", 52 'desc' => "{ BasPlug.process_exp}",50 'desc' => "{AutoExtractMetadata.process_exp}", 53 51 'type' => "regexp", 54 52 'deft' => &get_default_process_exp(), 55 53 'reqd' => "no" }]; 56 54 57 my $options = { 'name' => "DBPlug ",58 'desc' => "{DBPlug .desc}",55 my $options = { 'name' => "DBPlugin", 56 'desc' => "{DBPlugin.desc}", 59 57 'abstract' => "no", 60 58 'inherits' => "yes", … … 66 64 push(@$pluginlist, $class); 67 65 68 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}69 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};70 71 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);66 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 67 push(@{$hashArgOptLists->{"OptList"}},$options); 68 69 my $self = new AutoExtractMetadata($pluginlist, $inputargs, $hashArgOptLists); 72 70 73 71 return bless $self, $class; … … 78 76 79 77 return q^(?i)\.dbi$^; 80 }81 # we don't have a per-greenstone document process() function!82 sub process {83 84 78 } 85 79 … … 95 89 my $verbosity = $self->{'verbosity'}; 96 90 97 print $outhandle "DBPlug : processing $file\n"91 print $outhandle "DBPlugin: processing $file\n" 98 92 if $self->{'verbosity'} > 1; 99 93 … … 114 108 my $db=undef; 115 109 116 # get id of pages from "nonempty", get latest version number from "recent", and 117 #then get pagename from "page" and content from "version" !110 # get id of pages from "nonempty", get latest version number from 111 # "recent", and then get pagename from "page" and content from "version" ! 118 112 119 113 my $sql_query_prime = undef ; … … 126 120 # read in config file. 127 121 if (!open (CONF, $filename)) { 128 print $outhandle "DBPlug : can't read $filename: $!\n";122 print $outhandle "DBPlugin: can't read $filename: $!\n"; 129 123 return 0; 130 124 } … … 145 139 $callback =~ /[\`]|\|\-/) { 146 140 # no backticks or functions that start new processes allowed 147 print $outhandle "DBPlug : bad function in callback\n";141 print $outhandle "DBPlugin: bad function in callback\n"; 148 142 return 0; 149 143 } … … 152 146 my $ret = eval "\$callbacks{'$fieldname'} = $callback ; 1"; 153 147 if (!defined($ret)) { 154 print $outhandle "DBPlug : error eval'ing callback: $@\n";148 print $outhandle "DBPlugin: error eval'ing callback: $@\n"; 155 149 exit(1); 156 150 } 157 151 $callback=""; 158 print $outhandle "DBPlug : callback registered for '$fieldname'\n"152 print $outhandle "DBPlugin: callback registered for '$fieldname'\n" 159 153 if $dbplug_debug; 160 154 } elsif ($callback) { … … 176 170 chomp $err; 177 171 $err =~ s/\.$//; # remove a trailing . 178 print $outhandle "DBPlug : error evaluating `$statement'\n";172 print $outhandle "DBPlugin: error evaluating `$statement'\n"; 179 173 print $outhandle " $err (in $filename)\n"; 180 174 return 0; # there was an error reading the config file … … 185 179 $statement = ""; 186 180 } else { 187 print $outhandle "DBPlug : skipping statement `$statement'\n";181 print $outhandle "DBPlugin: skipping statement `$statement'\n"; 188 182 } 189 183 $statement = ""; … … 194 188 195 189 if (!defined($db)) { 196 print $outhandle "DBPlug : error: $filename does not specify a db!\n";190 print $outhandle "DBPlugin: error: $filename does not specify a db!\n"; 197 191 return 0; 198 192 } 199 193 if (!defined($sql_query)) { 200 print $outhandle "DBPlug : error: no SQL query specified!\n";194 print $outhandle "DBPlugin: error: no SQL query specified!\n"; 201 195 return 0; 202 196 } … … 205 199 206 200 if (!defined($dbhandle)) { 207 die "DBPlug : could not connect to database, exiting.\n";201 die "DBPlugin: could not connect to database, exiting.\n"; 208 202 } 209 203 if (defined($dbplug_debug) && $dbplug_debug==1) { 210 print $outhandle "DBPlug (debug): connected ok\n";204 print $outhandle "DBPlugin (debug): connected ok\n"; 211 205 } 212 206 … … 238 232 if (defined($db_to_greenstone_fields{$fieldname})) { 239 233 if (defined($dbplug_debug) && $dbplug_debug==1) { 240 print $outhandle "DBPlug (debug): mapping db field "234 print $outhandle "DBPlugin (debug): mapping db field " 241 235 . "'$fieldname' to " 242 236 . $db_to_greenstone_fields{$fieldname} . "\n"; … … 255 249 while (scalar(@row_array)) { 256 250 if (defined($dbplug_debug) && $dbplug_debug==1) { 257 print $outhandle "DBPlug (debug): retrieved a row from query\n";251 print $outhandle "DBPlugin (debug): retrieved a row from query\n"; 258 252 } 259 253 … … 263 257 my $cursection = $doc_obj->get_top_section(); 264 258 265 # if $language not set in config file, will use Bas Plug's default259 # if $language not set in config file, will use BasePlugin's default 266 260 if (defined($language)) { 267 261 $doc_obj->add_utf8_metadata($cursection, "Language", $language); 268 262 } 269 # if $encoding not set in config file, will use Bas Plug's default263 # if $encoding not set in config file, will use BasePlugin's default 270 264 if (defined($encoding)) { 271 265 # allow some common aliases … … 274 268 $doc_obj->add_utf8_metadata($cursection, "Encoding", $encoding); 275 269 } 276 $ doc_obj->add_utf8_metadata($cursection,277 "Source", &ghtml::dmsafe($db)); 270 $self->set_Source_metadata($doc_obj, $db, $encoding); 271 278 272 if ($self->{'cover_image'}) { 279 273 $self->associate_cover_image($doc_obj, $filename); … … 358 352 # check "$sth->err" if empty array for error 359 353 if ($statement_hand->err) { 360 print $outhandle "DBPlug : received error: \"" .354 print $outhandle "DBPlugin: received error: \"" . 361 355 $statement_hand->errstr . "\"\n"; 362 356 } … … 370 364 371 365 if (defined($dbplug_debug) && $dbplug_debug==1) { 372 print $outhandle "DBPlug : imported $count DB records as documents.\n";366 print $outhandle "DBPlugin: imported $count DB records as documents.\n"; 373 367 } 374 368 $count; -
gsdl/trunk/perllib/plugins/DSpacePlugin.pm
r15865 r15872 2 2 ########################################################################### 3 3 # 4 # DSpacePlug .pm -- plugin for import thecollection from DSpace4 # DSpacePlugin.pm -- plugin for importing a collection from DSpace 5 5 # 6 6 # A component of the Greenstone digital library software … … 8 8 # University of Waikato, New Zealand. 9 9 # 10 # Copyright (C) 1999New Zealand Digital Library Project10 # Copyright (C) 2004 New Zealand Digital Library Project 11 11 # 12 12 # This program is free software; you can redistribute it and/or modify … … 26 26 ########################################################################### 27 27 28 # DSpace Plug - 10/2004 29 # 30 # 28 31 29 # This plugin takes "contents" and dublin_core.xml file, which contain 32 30 # Metadata and lists of associated files for a particular document … … 47 45 # 48 46 49 package DSpacePlug ;50 51 use Bas Plug;47 package DSpacePlugin; 48 49 use BasePlugin; 52 50 use plugin; 53 #use ghtml;54 51 use XMLParser; 55 52 use strict; … … 57 54 58 55 sub BEGIN { 59 @DSpacePlug ::ISA = ('BasPlug');56 @DSpacePlugin::ISA = ('BasePlugin'); 60 57 } 61 58 62 59 my $arguments = 63 60 [ { 'name' => "process_exp", 64 'desc' => "{Bas Plug.process_exp}",61 'desc' => "{BasePlugin.process_exp}", 65 62 'type' => "string", 66 63 'deft' => &get_default_process_exp(), 67 64 'reqd' => "no" }, 68 65 { 'name' => "only_first_doc", 69 'desc' => "{DSpacePlug .only_first_doc}",66 'desc' => "{DSpacePlugin.only_first_doc}", 70 67 'type' => "flag", 71 68 'reqd' => "no" }, 72 69 { 'name' => "first_inorder_ext", 73 'desc' => "{DSpacePlug .first_inorder_ext}",70 'desc' => "{DSpacePlugin.first_inorder_ext}", 74 71 'type' => "string", 75 72 'reqd' => "no" }, 76 73 { 'name' => "first_inorder_mime", 77 'desc' => "{DSpacePlug .first_inorder_mime}",74 'desc' => "{DSpacePlugin.first_inorder_mime}", 78 75 'type' => "flag", 79 76 'reqd' => "no" }, 80 77 { 'name' => "block_exp", 81 'desc' => "{Bas Plug.block_exp}",78 'desc' => "{BasePlugin.block_exp}", 82 79 'type' => "regexp", 83 80 'deft' => &get_default_block_exp(), … … 85 82 86 83 87 my $options = { 'name' => "DSpacePlug ",88 'desc' => "{DSpacePlug .desc}",84 my $options = { 'name' => "DSpacePlugin", 85 'desc' => "{DSpacePlugin.desc}", 89 86 'inherits' => "yes", 90 87 'abstract' => "no", … … 104 101 push(@$pluginlist, $class); 105 102 106 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 108 109 $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 110 103 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 104 push(@{$hashArgOptLists->{"OptList"}},$options); 105 106 $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 107 108 if ($self->{'info_only'}) { 109 # don't worry about creating the XML parser as all we want is the 110 # list of plugin options 111 return bless $self, $class; 112 } 113 111 114 #create XML::Parser object for parsing dublin_core.xml files 112 115 my $parser = new XML::Parser('Style' => 'Stream', … … 252 255 } 253 256 254 print $outhandle "DSpacePlug : extracting metadata from $file\n"257 print $outhandle "DSpacePlugin: extracting metadata from $file\n" 255 258 if $self->{'verbosity'} > 1; 256 259 … … 262 265 263 266 if ($@) { 264 die "DSpacePlug : ERROR $filename is not a well formed dublin_core.xml file ($@)\n";267 die "DSpacePlugin: ERROR $filename is not a well formed dublin_core.xml file ($@)\n"; 265 268 } 266 269 … … 287 290 288 291 # Temporarily store associate file info in metadata table 289 # This will be removed in 'extra_metadata' in Bas Plugand used292 # This will be removed in 'extra_metadata' in BasePlugin and used 290 293 # to perform the actual file association (once the doc obj has 291 294 # been formed … … 313 316 314 317 315 # The DSpacePlug read() function. This function does all the right things 316 # to make general options work for a given plugin. It calls the process() 317 # function which does all the work specific to a plugin (like the old 318 # read functions used to do). Most plugins should define their own 319 # process() function and let this read() function keep control. 320 # 321 # DSpace overrides read() because there is no need to read the actual 322 # text of the file in, because the contents of the file is not text... 323 # 324 # Return number of files processed, undef if can't process 325 # Note that $base_dir might be "" and that $file might 326 # include directories 327 318 # The DSpacePlugin read() function. We are not actually reading any documents 319 # here, just blocking ones that have been processed by metadata read. 320 # 321 # Returns 0 for a file its blocking, undef for any other 328 322 sub read { 329 323 my $self = shift (@_); … … 340 334 return 0 if (defined $self->{'extra_blocks'}->{$filename}); 341 335 return undef; 342 }343 344 # do plugin specific processing of doc_obj345 sub process {346 my $self = shift (@_);347 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;348 my $outhandle = $self->{'outhandle'};349 350 return 1;351 336 } 352 337 -
gsdl/trunk/perllib/plugins/EmailPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # E MAILPlug.pm - a plugin for parsing email files3 # EmailPlugin.pm - a plugin for parsing email files 4 4 # 5 5 # A component of the Greenstone digital library software … … 27 27 28 28 29 # E MAILPlug29 # EmailPlugin 30 30 # 31 31 # by Gordon Paynter ([email protected]) … … 63 63 64 64 # 12/05/02 Added usage datastructure - John Thompson 65 package E MAILPlug;65 package EmailPlugin; 66 66 67 67 use strict; … … 69 69 70 70 71 use Split Plug;71 use SplitTextFile; 72 72 use unicode; # gs conv functions 73 73 use gsprintf 'gsprintf'; # translations … … 77 77 78 78 sub BEGIN { 79 @E MAILPlug::ISA = ('SplitPlug');79 @EmailPlugin::ISA = ('SplitTextFile'); 80 80 } 81 81 … … 83 83 my $arguments = 84 84 [ { 'name' => "process_exp", 85 'desc' => "{Bas Plug.process_exp}",85 'desc' => "{BasePlugin.process_exp}", 86 86 'type' => "regexp", 87 87 'reqd' => "no", 88 88 'deft' => &get_default_process_exp() }, 89 89 { 'name' => "no_attachments", 90 'desc' => "{E MAILPlug.no_attachments}",90 'desc' => "{EmailPlugin.no_attachments}", 91 91 'type' => "flag", 92 92 'reqd' => "no" }, 93 93 { 'name' => "headers", 94 'desc' => "{E MAILPlug.headers}",94 'desc' => "{EmailPlugin.headers}", 95 95 'type' => "flag", 96 96 'reqd' => "no" }, 97 97 { 'name' => "split_exp", 98 'desc' => "{E MAILPlug.split_exp}",98 'desc' => "{EmailPlugin.split_exp}", 99 99 'type' => "regexp", 100 100 'reqd' => "no", … … 102 102 ]; 103 103 104 my $options = { 'name' => "E MAILPlug",105 'desc' => "{E MAILPlug.desc}",104 my $options = { 'name' => "EmailPlugin", 105 'desc' => "{EmailPlugin.desc}", 106 106 'abstract' => "no", 107 107 'inherits' => "yes", 108 108 'args' => $arguments }; 109 109 110 # Create a new E MAILPlugobject with which to parse a file.111 # Accomplished by creating a new Bas Plugand using bless to112 # turn it into an E MAILPlug.110 # Create a new EmailPlugin object with which to parse a file. 111 # Accomplished by creating a new BasePlugin and using bless to 112 # turn it into an EmailPlugin. 113 113 114 114 sub new { … … 117 117 push(@$pluginlist, $class); 118 118 119 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}120 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};121 122 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);119 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 120 push(@{$hashArgOptLists->{"OptList"}},$options); 121 122 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 123 123 124 124 $self->{'assoc_filenames'} = {}; # to save attach names so we don't clobber … … 166 166 167 167 168 print STDERR "<Processing n='$file' p='E MAILPlug'>\n" if ($gli);169 170 gsprintf($outhandle, "E MAILPlug: {common.processing} $file\n")168 print STDERR "<Processing n='$file' p='EmailPlugin'>\n" if ($gli); 169 170 gsprintf($outhandle, "EmailPlugin: {common.processing} $file\n") 171 171 if $self->{'verbosity'} > 1; 172 172 … … 524 524 } 525 525 } else { 526 print $outhandle "E MAILPlug: (warning) couldn't parse MIME boundary\n";526 print $outhandle "EmailPlugin: (warning) couldn't parse MIME boundary\n"; 527 527 } 528 528 # parts start with "--$boundary" … … 540 540 # make sure it is only -- and whitespace 541 541 if ($last !~ /^\-\-\s*$/ms) { 542 print $outhandle "E MAILPlug: (warning) last part of MIME message isn't empty\n";542 print $outhandle "EmailPlugin: (warning) last part of MIME message isn't empty\n"; 543 543 } 544 544 foreach my $message_part (@message_parts) { … … 579 579 # or it was an empty message... 580 580 # do nothing... 581 gsprintf($outhandle, "{Bas Plug.empty_file} - empty body?\n");581 gsprintf($outhandle, "{BasePlugin.empty_file} - empty body?\n"); 582 582 } else { 583 583 $text = $part_text; … … 814 814 } 815 815 open (SAVE, ">$tmpdir/$save_filename") || 816 warn "E MAILPlug: Can't save attachment as $tmpdir/$save_filename: $!";816 warn "EmailPlugin: Can't save attachment as $tmpdir/$save_filename: $!"; 817 817 my $part_text = $message_part; 818 818 $part_text =~ s/(.*?)\r?\n\r?\n//s; # remove header … … 834 834 # &util::rm("$tmpdir/$save_filename"); 835 835 my $outhandle=$self->{'outhandle'}; 836 print $outhandle "E MAILPlug: saving attachment \"$filename\"\n"; #836 print $outhandle "EmailPlugin: saving attachment \"$filename\"\n"; # 837 837 838 838 # be nice if "download" was a translatable macro :( … … 905 905 # rfc2045 also allows binary, which we ignore (for now). 906 906 my $outhandle=$self->{'outhandle'}; 907 print $outhandle "E MAILPlug: unknown transfer encoding: $encoding\n";907 print $outhandle "EmailPlugin: unknown transfer encoding: $encoding\n"; 908 908 return ""; 909 909 } … … 1067 1067 if ($badbytesfound==1) { 1068 1068 # claims to be utf8, but it isn't! 1069 print $outhandle "E MAILPlug: Headers claim utf-8 but bad bytes "1069 print $outhandle "EmailPlugin: Headers claim utf-8 but bad bytes " 1070 1070 . "detected and removed.\n"; 1071 1071 … … 1092 1092 # 1252 has characters between 0x80 and 0x9f, 8859-1 doesn't 1093 1093 if ($$textref =~ m/[\x80-\x9f]/) { 1094 print $outhandle "E MAILPlug: Headers claim ISO charset but MS ";1094 print $outhandle "EmailPlugin: Headers claim ISO charset but MS "; 1095 1095 print $outhandle "codepage 1252 detected.\n"; 1096 1096 $charset = "windows_1252"; … … 1106 1106 # characters out here if this causes problems... 1107 1107 my $outhandle=$self->{'outhandle'}; 1108 print $outhandle "E MAILPlug: falling back to iso-8859-1\n";1108 print $outhandle "EmailPlugin: falling back to iso-8859-1\n"; 1109 1109 $$textref=&unicode::unicode2utf8(&unicode::convert2unicode("iso_8859_1",$textref)); 1110 1110 -
gsdl/trunk/perllib/plugins/ExcelPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ExcelPlug .pm -- plugin for importing Microsoft Excel files.3 # ExcelPlugin.pm -- plugin for importing Microsoft Excel files. 4 4 # (currently only versions 95 and 97) 5 5 # … … 26 26 ########################################################################### 27 27 28 package ExcelPlug ;28 package ExcelPlugin; 29 29 30 use Convert ToPlug;30 use ConvertBinaryFile; 31 31 use strict; 32 32 no strict 'refs'; # allow filehandles to be variables and viceversa 33 33 34 34 sub BEGIN { 35 @ExcelPlug ::ISA = ('ConvertToPlug');35 @ExcelPlugin::ISA = ('ConvertBinaryFile'); 36 36 } 37 37 38 38 my $arguments = 39 39 [ { 'name' => "process_exp", 40 'desc' => "{Bas Plug.process_exp}",40 'desc' => "{BasePlugin.process_exp}", 41 41 'type' => "regexp", 42 42 'reqd' => "no", … … 44 44 ]; 45 45 46 my $options = { 'name' => "ExcelPlug ",47 'desc' => "{ExcelPlug .desc}",46 my $options = { 'name' => "ExcelPlugin", 47 'desc' => "{ExcelPlugin.desc}", 48 48 'abstract' => "no", 49 49 'inherits' => "yes", … … 59 59 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 60 60 61 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);61 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 62 62 63 63 if ($self->{'info_only'}) { … … 66 66 } 67 67 68 $self->{'filename_extension'} = "xls"; 69 $self->{'file_type'} = "Excel"; 70 68 71 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 69 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {70 $secondary_plugin_options->{'HTMLPlug '} = [];72 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 73 $secondary_plugin_options->{'HTMLPlugin'} = []; 71 74 } 72 my $html_options = $secondary_plugin_options->{'HTMLPlug '};75 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 73 76 74 #$self->{'input_encoding'} = "utf8";75 #$self->{'extract_language'} = 1;76 77 push(@$html_options, "-input_encoding", "utf8"); 77 78 push(@$html_options,"-extract_language") if $self->{'extract_language'}; … … 82 83 } 83 84 84 sub convert_post_process 85 sub convert_post_process_old 85 86 { 86 87 my $self = shift (@_); … … 107 108 } 108 109 109 sub process {110 my $self = shift (@_);111 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;112 113 return $self->process_type("xls",$base_dir,$file,$doc_obj);114 }115 110 116 111 1; -
gsdl/trunk/perllib/plugins/FOXPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # FOXPlug .pm3 # FOXPlugin.pm 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 29 29 # the appropriate fields in the file. 30 30 31 # 12/05/02 Added usage datastructure - John Thompson 32 33 package FOXPlug; 34 35 use BasPlug; 31 package FOXPlugin; 32 33 use BasePlugin; 36 34 use util; 37 35 use doc; 38 36 use unicode; 39 use cnseg;40 # use gb;41 37 42 38 use strict; … … 45 41 46 42 sub BEGIN { 47 @FOXPlug ::ISA = ('BasPlug');43 @FOXPlugin::ISA = ('BasePlugin'); 48 44 } 49 45 50 46 my $arguments = 51 47 [ { 'name' => "process_exp", 52 'desc' => "{Bas Plug.process_exp}",48 'desc' => "{BasePlugin.process_exp}", 53 49 'type' => "regexp", 54 50 'reqd' => "no", 55 51 'deft' => &get_default_process_exp() }, 56 52 { 'name' => "block_exp", 57 'desc' => "{Bas Plug.block_exp}",53 'desc' => "{BasePlugin.block_exp}", 58 54 'type' => "regexp", 59 55 'reqd' => "no", 60 56 'deft' => &get_default_block_exp() } ]; 61 57 62 my $options = { 'name' => "FOXPlug ",63 'desc' => "{FOXPlug .desc}",58 my $options = { 'name' => "FOXPlugin", 59 'desc' => "{FOXPlugin.desc}", 64 60 'abstract' => "no", 65 61 'inherits' => "yes", … … 71 67 push(@$pluginlist, $class); 72 68 73 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}74 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};75 76 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);69 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 70 push(@{$hashArgOptLists->{"OptList"}},$options); 71 72 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 77 73 78 74 return bless $self, $class; … … 103 99 return $block_status if ((!defined $block_status) || ($block_status==0)); 104 100 105 print STDERR "<Processing n='$file' p='FOXPlug '>\n" if ($gli);106 print STDERR "FOXPlug : processing $file\n" if $self->{'verbosity'} > 1;101 print STDERR "<Processing n='$file' p='FOXPlugin'>\n" if ($gli); 102 print STDERR "FOXPlugin: processing $file\n" if $self->{'verbosity'} > 1; 107 103 108 104 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+\.dbf$/i; … … 113 109 print STDERR "<ProcessingError n='$file' r='Could not read $fullname'>\n"; 114 110 } 115 print STDERR "FOXPlug ::read - couldn't read $fullname\n";111 print STDERR "FOXPlugin::read - couldn't read $fullname\n"; 116 112 return -1; # error in processing 117 113 } … … 125 121 print STDERR "<ProcessingError n='$file' r='EOF while reading database header'>\n"; 126 122 } 127 print STDERR "FOXPlug ::read - eof while reading database header\n";123 print STDERR "FOXPlugin::read - eof while reading database header\n"; 128 124 close (FOXBASEIN); 129 125 return -1; … … 145 141 print STDERR "<ProcessingError n='$file' r='Does not seem to be a Foxbase file'>\n"; 146 142 } 147 print STDERR "FOXPlug :read - $fullname doesn't seem to be a Foxbase file\n";143 print STDERR "FOXPlugin:read - $fullname doesn't seem to be a Foxbase file\n"; 148 144 return -1; 149 145 } … … 177 173 print STDERR "<ProcessingError n='$file' r='Could not read $dbtfullname'>\n"; 178 174 } 179 print STDERR "FOXPlug ::read - couldn't read $dbtfullname\n";175 print STDERR "FOXPlugin::read - couldn't read $dbtfullname\n"; 180 176 close (FOXBASEIN); 181 177 return -1; -
gsdl/trunk/perllib/plugins/FavouritesPlugin.pm
r15865 r15872 28 28 # especially SRCPlug by John McPherson Nov 2000 29 29 30 package FavouritesPlug ;30 package FavouritesPlugin; 31 31 32 use BasPlug;32 use ReadTextFile; 33 33 use strict; 34 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 35 36 36 sub BEGIN { 37 @FavouritesPlug ::ISA = ('BasPlug');37 @FavouritesPlugin::ISA = ('ReadTextFile'); 38 38 } 39 39 40 40 my $arguments = 41 41 [ { 'name' => "process_exp", 42 'desc' => "{ BasPlug.process_exp}",42 'desc' => "{ReadTextFile.process_exp}", 43 43 'type' => "regexp", 44 44 'deft' => &get_default_process_exp(), 45 45 'reqd' => "no" } ]; 46 46 47 my $options = { 'name' => "FavouritesPlug ",48 'desc' => " FavouritesPlug imports Internet Explorer style Favourites. Favourites are often found in the \"C:\\Documents and Settings\\[your username]\\Favorites\" folder on your computer, but can also be made by dragging a bookmark or location from your browser (any) to the desktop.",47 my $options = { 'name' => "FavouritesPlugin", 48 'desc' => "{FavouritesPlugin.desc}", 49 49 'abstract' => "no", 50 50 'inherits' => "yes", … … 57 57 push(@$pluginlist, $class); 58 58 59 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}60 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 60 push(@{$hashArgOptLists->{"OptList"}},$options); 61 61 62 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);62 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 63 63 64 64 return bless $self, $class; … … 80 80 81 81 my $section = $doc_obj->get_top_section(); 82 print STDERR "<Processing n='$file' p='FavouritesPlug '>\n" if ($gli);83 print $outhandle "FavouritesPlug : processing $file\n" if $self->{'verbosity'} > 1;82 print STDERR "<Processing n='$file' p='FavouritesPlugin'>\n" if ($gli); 83 print $outhandle "FavouritesPlugin: processing $file\n" if $self->{'verbosity'} > 1; 84 84 85 85 # don't want mg to turn escape chars into actual values -
gsdl/trunk/perllib/plugins/GAPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # GAPlug .pm3 # GAPlugin.pm 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 29 29 # to their DTD. 30 30 31 package GAPlug ;32 33 use XMLPlug;31 package GAPlugin; 32 33 use ReadXMLFile; 34 34 35 35 use strict; … … 37 37 38 38 sub BEGIN { 39 @GAPlug ::ISA = ('XMLPlug');39 @GAPlugin::ISA = ('ReadXMLFile'); 40 40 } 41 41 … … 49 49 my $arguments = 50 50 [ { 'name' => "process_exp", 51 'desc' => "{Bas Plug.process_exp}",51 'desc' => "{BasePlugin.process_exp}", 52 52 'type' => "regexp", 53 53 'deft' => &get_default_process_exp(), 54 54 'reqd' => "no" } ]; 55 55 56 my $options = { 'name' => "GAPlug ",57 'desc' => "{GAPlug .desc}",56 my $options = { 'name' => "GAPlugin", 57 'desc' => "{GAPlugin.desc}", 58 58 'abstract' => "no", 59 59 'inherits' => "yes", … … 65 65 push(@$pluginlist, $class); 66 66 67 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}68 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};69 70 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);67 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 68 push(@{$hashArgOptLists->{"OptList"}},$options); 69 70 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 71 71 72 72 $self->{'section'} = ""; … … 106 106 107 107 my $outhandle = $self->{'outhandle'}; 108 print $outhandle "GAPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;109 print STDERR "<Processing n='$self->{'file'}' p='GAPlug '>\n" if $self->{'gli'};108 print $outhandle "GAPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 109 print STDERR "<Processing n='$self->{'file'}' p='GAPlugin'>\n" if $self->{'gli'}; 110 110 111 111 } -
gsdl/trunk/perllib/plugins/GISExtractor.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # GIS BasPlug.pm --base class to enhance plugins with GIS capabilities3 # GISExtractor.pm -- extension base class to enhance plugins with GIS capabilities 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 package GISBasPlug; 26 package GISExtractor; 27 28 use PrintInfo; 27 29 28 30 use util; 29 use locale;30 31 31 32 use gsprintf 'gsprintf'; … … 33 34 no strict 'refs'; # allow filehandles to be variables and viceversa 34 35 no strict 'subs'; 36 35 37 #field categories in DataBase files 36 38 #$LAT = 3; … … 42 44 43 45 BEGIN { 44 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; 45 } 46 47 48 use BasPlug; # uses BasPlug, but is not inherited 49 50 51 my $options = { 'name' => "GISBasPlug", 52 'desc' => "{GISBasPlug.desc}", 46 @GISExtractor::ISA = ('PrintInfo'); 47 } 48 49 50 my $arguments = 51 [ { 'name' => "extract_placenames", 52 'desc' => "{GISExtractor.extract_placenames}", 53 'type' => "flag", 54 'reqd' => "no" }, 55 { 'name' => "gazetteer", 56 'desc' => "{GISExtractor.gazetteer}", 57 'type' => "string", 58 'reqd' => "no" }, 59 { 'name' => "place_list", 60 'desc' => "{GISExtractor.place_list}", 61 'type' => "flag", 62 'reqd' => "no" } ]; 63 64 65 my $options = { 'name' => "GISExtractor", 66 'desc' => "{GISExtractor.desc}", 53 67 'abstract' => "yes", 54 'inherits' => "no" }; 68 'inherits' => "yes", 69 'args' => $arguments }; 55 70 56 71 57 72 sub new { 58 my $class = shift (@_); 59 my $plugin_name = shift (@_); 60 61 my $self = {}; 62 $self->{'plugin_type'} = "GISBasPlug"; 63 64 $self->{'option_list'} = [ $options ]; 73 my ($class) = shift (@_); 74 my ($pluginlist,$inputargs,$hashArgOptLists) = @_; 75 push(@$pluginlist, $class); 76 77 # can we indicate that these are not available if the map data is not there?? 78 #if (has_mapdata()) { 79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 80 push(@{$hashArgOptLists->{"OptList"}},$options); 81 #} 82 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists); 83 84 if ($self->{'extract_placenames'}) { 85 86 my $outhandle = $self->{'outhandle'}; 87 88 my $places_ref 89 = $self->loadGISDatabase($outhandle,$self->{'gazetteer'}); 90 91 if (!defined $places_ref) { 92 print $outhandle "Warning: Error loading mapdata gazetteer \"$self->{'gazetteer'}\"\n"; 93 print $outhandle " No placename extraction will take place.\n"; 94 $self->{'extract_placenames'} = undef; 95 } 96 else { 97 $self->{'places'} = $places_ref; 98 } 99 } 65 100 66 101 return bless $self, $class; 67 } 68 69 sub init { 70 } 71 72 sub print_xml_usage 102 103 } 104 105 106 sub extract_gis_metadata 73 107 { 74 BasPlug::print_xml_usage(@_); 75 } 76 77 sub print_xml 78 { 79 BasPlug::print_xml(@_); 80 } 81 82 sub print_txt_usage 83 { 84 BasPlug::print_txt_usage(@_); 85 } 86 87 sub determine_description_offset 88 { 89 BasPlug::determine_description_offset(@_); 90 } 91 sub print_plugin_usage 92 { 93 my $plugindesc = $options->{'desc'}; 94 95 if (defined($plugindesc)) { 96 gsprintf(STDERR, "$plugindesc\n\n"); 97 } 98 99 } 100 101 sub set_incremental 102 { 103 BasPlug::set_incremental(@_); 108 my $self = shift (@_); 109 my ($doc_obj) = @_; 110 111 if ($self->{'extract_placenames'}) { 112 my $thissection = $doc_obj->get_top_section(); 113 while (defined $thissection) { 114 my $text = $doc_obj->get_text($thissection); 115 $self->extract_placenames (\$text, $doc_obj, $thissection) if $text =~ /./; 116 $thissection = $doc_obj->get_next_section ($thissection); 117 } 118 } 119 104 120 } 105 121 … … 255 271 $doc_obj->associate_file($tempfile, "places.txt", "text/plain"); 256 272 $self->{'places_filename'} = $tempfile; 273 257 274 my %countries = (); 258 275 … … 283 300 284 301 #this line removes apostrophes from placenames (they break the javascript function) 285 $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; 302 $$textref =~ s/(javascript:popUp.*?)(\w)'(\w)/$1$2$3/g; #' (to get emacs colours back) 286 303 287 304 #for displaying map of document, count num of places from each country … … 314 331 if ($self->{'verbosity'} > 2); 315 332 } 333 334 sub clean_up_temp_files { 335 my $self = shift(@_); 336 337 if(defined($self->{'places_filename'}) && -e $self->{'places_filename'}){ 338 &util::rm($self->{'places_filename'}); 339 } 340 $self->{'places_filename'} = undef; 341 342 } -
gsdl/trunk/perllib/plugins/GMLPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # GMLPlug .pm --3 # GMLPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 29 29 # 12/05/02 Added usage datastructure - John Thompson 30 30 31 package GMLPlug ;32 33 use Bas Plug;31 package GMLPlugin; 32 33 use BasePlugin; 34 34 use util; 35 35 use doc; … … 39 39 40 40 sub BEGIN { 41 @GMLPlug ::ISA = ('BasPlug');41 @GMLPlugin::ISA = ('BasePlugin'); 42 42 } 43 43 44 44 my $arguments = 45 45 [ { 'name' => "process_exp", 46 'desc' => "{Bas Plug.process_exp}",46 'desc' => "{BasePlugin.process_exp}", 47 47 'type' => "regexp", 48 48 'deft' => &get_default_process_exp() } 49 49 ]; 50 50 51 my $options = { 'name' => "GMLPlug ",52 'desc' => "{GMLPlug .desc}",51 my $options = { 'name' => "GMLPlugin", 52 'desc' => "{GMLPlugin.desc}", 53 53 'abstract' => "no", 54 54 'inherits' => "yes", … … 60 60 push(@$pluginlist, $class); 61 61 62 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}63 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};64 65 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);62 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 63 push(@{$hashArgOptLists->{"OptList"}},$options); 64 65 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 66 66 67 67 return bless $self, $class; … … 88 88 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 89 89 90 print STDERR "<Processing n='$file' p='GMLPlug '>\n" if ($gli);91 print $outhandle "GMLPlug : processing $file\n";90 print STDERR "<Processing n='$file' p='GMLPlugin'>\n" if ($gli); 91 print $outhandle "GMLPlugin: processing $file\n"; 92 92 93 93 my $parent_dir = $file; … … 99 99 print STDERR "<ProcessingError n='$file' r='Could not read $filename'>\n"; 100 100 } 101 print $outhandle "GMLPlug ::read - couldn't read $filename\n";101 print $outhandle "GMLPlugin::read - couldn't read $filename\n"; 102 102 return -1; 103 103 } … … 130 130 if ($gml =~ /^\s*([^>]*)>(.*)$/so) { 131 131 $tags = $1 if defined $1; 132 $text = &GMLPlug ::_unescape_text($2);132 $text = &GMLPlugin::_unescape_text($2); 133 133 134 134 } else { 135 print $outhandle "GMLPlug ::read - error in file $filename\n";135 print $outhandle "GMLPlugin::read - error in file $filename\n"; 136 136 print $outhandle "text: \"$gml\"\n"; 137 137 last; … … 158 158 # could be stored as either attributes or .... 159 159 while ((defined $tags) && ($tags =~ s/^\s*(\S+)=\"([^\"]*)\"//o)) { 160 $doc_obj->add_utf8_metadata($section, $1, &GMLPlug ::_unescape_text($2))160 $doc_obj->add_utf8_metadata($section, $1, &GMLPlugin::_unescape_text($2)) 161 161 if (defined $1 and defined $2); 162 162 … … 183 183 $tagname =~ s/^&\#47;/\//; 184 184 185 $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlug ::_unescape_text($tagvalue));185 $doc_obj->add_utf8_metadata($section, $tagname, &GMLPlugin::_unescape_text($tagvalue)); 186 186 } 187 187 } -
gsdl/trunk/perllib/plugins/HBPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # HBPlug .pm --3 # HBPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 38 38 # Humanity Library collections 39 39 40 package HBPlug ;40 package HBPlugin; 41 41 42 42 use ghtml; 43 use Bas Plug;43 use BasePlugin; 44 44 use unicode; 45 45 use util; … … 50 50 51 51 sub BEGIN { 52 @HBPlug::ISA = ('BasPlug'); 53 } 54 52 @HBPlugin::ISA = ('BasePlugin'); 53 } 54 my $encoding_list = 55 [ { 'name' => "ascii", 56 'desc' => "{ReadTextFile.input_encoding.ascii}" }, 57 { 'name' => "iso_8859_1", 58 'desc' => "Latin1 (western languages)" } ]; 59 55 60 my $arguments = 56 61 [ { 'name' => "process_exp", 57 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 58 63 'type' => "regexp", 59 64 'reqd' => "no", 60 'deft' => &get_default_process_exp() } 65 'deft' => &get_default_process_exp() }, 66 { 'name' => "input_encoding", 67 'desc' => "{ReadTextFile.input_encoding}", 68 'type' => "enum", 69 'deft' => "iso_8859_1", 70 'list' => $encoding_list, 71 'reqd' => "no" } 61 72 ]; 62 73 63 my $options = { 'name' => "HBPlug ",64 'desc' => "{HBPlug .desc}",74 my $options = { 'name' => "HBPlugin", 75 'desc' => "{HBPlugin.desc}", 65 76 'abstract' => "no", 66 77 'inherits' => "yes", … … 72 83 push(@$pluginlist, $class); 73 84 74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};76 77 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);85 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 86 push(@{$hashArgOptLists->{"OptList"}},$options); 87 88 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 78 89 79 90 return bless $self, $class; 80 91 } 81 92 82 sub init {83 my $self = shift (@_);84 my ($verbosity, $outhandle) = @_;85 86 $self->BasPlug::init($verbosity, $outhandle);87 $self->{'input_encoding'} = "iso_8859_1";88 89 # this plugin only handles ascii encodings90 if ($self->{'input_encoding'} !~ /^(iso_8859_1|ascii)$/) {91 die "ERROR: HBPlug can handle only iso_8859_1 or ascii encodings.\n" .92 $self->{'input_encoding'} . " is not an acceptable input_encoding value\n";93 }94 }95 96 93 # this is included only to prevent warnings being printed out 97 # from Bas Plug::init. The process_exp is not used by this plugin94 # from BasePlugin::init. The process_exp is not used by this plugin 98 95 sub get_default_process_exp { 99 96 my $self = shift (@_); … … 148 145 if ($line =~ /<font [^>]*?face\s*=\s*\"?(\w+)\"?/i) { 149 146 my $font = $1; 150 print $outhandle "HBPlug ::HB_gettext - warning removed font $font\n"147 print $outhandle "HBPlugin::HB_gettext - warning removed font $font\n" 151 148 if ($font !~ /^arial$/i); 152 149 } … … 217 214 } 218 215 219 # if input_encoding is ascii we can call add_utf8_metadata220 # directly but if it's iso_8859_1 (the default) we need to call221 # add_metadata so that the ascii2utf8 conversion is done first222 # this should speed things up a little if processing an ascii only223 # document with input_encoding set to ascii224 sub HB_add_metadata {225 my $self = shift (@_);226 my ($doc_obj, $cursection, $field, $value) = @_;227 228 # All text should now be in utf-8229 # if ($self->{'input_encoding'} eq "ascii") {230 $doc_obj->add_utf8_metadata ($cursection, $field, $value);231 # } else {232 # $doc_obj->add_metadata ($cursection, $field, $value);233 # }234 }235 236 216 # return number of files processed, undef if can't process 237 217 # Note that $base_dir might be "" and that $file might … … 251 231 return undef unless -e $htmlfile; 252 232 253 print STDERR "<Processing n='$file' p='HBPlug '>\n" if ($gli);254 print $outhandle "HBPlug : processing $file\n";233 print STDERR "<Processing n='$file' p='HBPlugin'>\n" if ($gli); 234 print $outhandle "HBPlugin: processing $file\n"; 255 235 256 236 # read in the file and do basic html cleaning (removing header etc) … … 276 256 # $metadata->{$field} may be an array reference 277 257 if (ref ($metadata->{$field}) eq "ARRAY") { 278 map { 279 $ self->HB_add_metadata ($doc_obj, $cursection, $field, $_);258 map { 259 $doc_obj->add_utf8_metadata($cursection, $field, $_); 280 260 } @{$metadata->{$field}}; 281 261 } else { 282 $ self->HB_add_metadata ($doc_obj,$cursection, $field, $metadata->{$field});262 $doc_obj->add_utf8_metadata($cursection, $field, $metadata->{$field}); 283 263 } 284 264 } … … 321 301 322 302 # add the metadata to this section 323 $ self->HB_add_metadata ($doc_obj,$cursection, "Title", $title);303 $doc_obj->add_utf8_metadata($cursection, "Title", $title); 324 304 325 305 # clean up the section html … … 332 312 333 313 # add the text for this section 334 # All read text should now be in utf-8335 # if ($self->{'input_encoding'} eq "ascii") {336 314 $doc_obj->add_utf8_text ($cursection, $sectiontext); 337 # } else {338 # $doc_obj->add_text ($cursection, $sectiontext);339 # }340 315 } else { 341 316 print $outhandle "WARNING - leftover text\n" , $self->shorten($html), -
gsdl/trunk/perllib/plugins/HTMLPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # HTMLPlug .pm -- basic html plugin3 # HTMLPlugin.pm -- basic html plugin 4 4 # 5 5 # A component of the Greenstone digital library software … … 34 34 # 35 35 36 package HTMLPlug; 37 38 use BasPlug; 36 package HTMLPlugin; 37 38 use ReadTextFile; 39 use HBPlugin; 39 40 use ghtml; 40 41 use unicode; … … 46 47 47 48 sub BEGIN { 48 @HTMLPlug ::ISA = ('BasPlug');49 @HTMLPlugin::ISA = ('ReadTextFile', 'HBPlugin'); 49 50 } 50 51 … … 54 55 my $arguments = 55 56 [ { 'name' => "process_exp", 56 'desc' => "{Bas Plug.process_exp}",57 'desc' => "{BasePlugin.process_exp}", 57 58 'type' => "regexp", 58 59 'deft' => &get_default_process_exp() }, 59 60 { 'name' => "block_exp", 60 'desc' => "{Bas Plug.block_exp}",61 'desc' => "{BasePlugin.block_exp}", 61 62 'type' => 'regexp', 62 63 'deft' => &get_default_block_exp() }, 63 64 { 'name' => "nolinks", 64 'desc' => "{HTMLPlug .nolinks}",65 'desc' => "{HTMLPlugin.nolinks}", 65 66 'type' => "flag" }, 66 67 { 'name' => "keep_head", 67 'desc' => "{HTMLPlug .keep_head}",68 'desc' => "{HTMLPlugin.keep_head}", 68 69 'type' => "flag" }, 69 70 { 'name' => "no_metadata", 70 'desc' => "{HTMLPlug .no_metadata}",71 'desc' => "{HTMLPlugin.no_metadata}", 71 72 'type' => "flag" }, 72 73 { 'name' => "metadata_fields", 73 'desc' => "{HTMLPlug .metadata_fields}",74 'desc' => "{HTMLPlugin.metadata_fields}", 74 75 'type' => "string", 75 76 'deft' => "Title" }, 76 77 { 'name' => "hunt_creator_metadata", 77 'desc' => "{HTMLPlug .hunt_creator_metadata}",78 'desc' => "{HTMLPlugin.hunt_creator_metadata}", 78 79 'type' => "flag" }, 79 80 { 'name' => "file_is_url", 80 'desc' => "{HTMLPlug .file_is_url}",81 'desc' => "{HTMLPlugin.file_is_url}", 81 82 'type' => "flag" }, 82 83 { 'name' => "assoc_files", 83 'desc' => "{HTMLPlug .assoc_files}",84 'desc' => "{HTMLPlugin.assoc_files}", 84 85 'type' => "regexp", 85 86 'deft' => &get_default_block_exp() }, 86 87 { 'name' => "rename_assoc_files", 87 'desc' => "{HTMLPlug .rename_assoc_files}",88 'desc' => "{HTMLPlugin.rename_assoc_files}", 88 89 'type' => "flag" }, 89 90 { 'name' => "title_sub", 90 'desc' => "{HTMLPlug .title_sub}",91 'desc' => "{HTMLPlugin.title_sub}", 91 92 'type' => "string", 92 93 'deft' => "" }, 93 94 { 'name' => "description_tags", 94 'desc' => "{HTMLPlug .description_tags}",95 'desc' => "{HTMLPlugin.description_tags}", 95 96 'type' => "flag" }, 96 97 # retain this for backward compatibility (w3mir option was replaced by 97 98 # file_is_url) 98 99 { 'name' => "w3mir", 99 # 'desc' => "{HTMLPlug .w3mir}",100 # 'desc' => "{HTMLPlugin.w3mir}", 100 101 'type' => "flag", 101 102 'hiddengli' => "yes"}, 102 103 { 'name' => "no_strip_metadata_html", 103 'desc' => "{HTMLPlug .no_strip_metadata_html}",104 'desc' => "{HTMLPlugin.no_strip_metadata_html}", 104 105 'type' => "string", 105 106 'deft' => "", 106 107 'reqd' => "no"}, 107 108 { 'name' => "sectionalise_using_h_tags", 108 'desc' => "{HTMLPlug .sectionalise_using_h_tags}",109 'desc' => "{HTMLPlugin.sectionalise_using_h_tags}", 109 110 'type' => "flag" }, 110 111 { 'name' => "use_realistic_book", 111 'desc' => "{HTMLPlug .tidy_html}",112 'desc' => "{HTMLPlugin.tidy_html}", 112 113 'type' => "flag"}, 113 { 'name' => "is_old_HDL_tags", 114 'desc' => "{HTMLPlug.old_style_HDL}", 115 'type' => "flag"}, 116 { 'name' => "no_image_links", # in future think about removing this option, 117 'desc' => "{HTMLPlug.no_image_links}", # since it has become the default behaviour 118 'type' => "flag"}, 114 { 'name' => "old_style_HDL", 115 'desc' => "{HTMLPlugin.old_style_HDL}", 116 'type' => "flag"} 119 117 ]; 120 118 121 my $options = { 'name' => "HTMLPlug ",122 'desc' => "{HTMLPlug .desc}",119 my $options = { 'name' => "HTMLPlugin", 120 'desc' => "{HTMLPlugin.desc}", 123 121 'abstract' => "no", 124 122 'inherits' => "yes", … … 506 504 if (($self->{'tidy_html'}) || ($self->{'old_style_HDL'})) 507 505 { 508 # because the document has to be sectionalized set the description tags 509 $self->{'description_tags'} = 1; 510 511 # set the file to be tidied 512 $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/; 513 514 # get the tidied file 515 #my $tidy_filename = $self->tmp_tidy_file($input_filename); 516 my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename); 517 518 # derive tmp filename from input filename 519 my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$"); 506 # because the document has to be sectionalized set the description tags 507 $self->{'description_tags'} = 1; 520 508 521 # set the new input file and base_dir to be from the tidied file 522 $file = "$tailname$suffix"; 523 $base_dir = $dirname; 509 # set the file to be tidied 510 $input_filename = &util::filename_cat($base_dir,$file) if $base_dir =~ /\w/; 511 512 # get the tidied file 513 #my $tidy_filename = $self->tmp_tidy_file($input_filename); 514 my $tidy_filename = $self->convert_tidy_or_oldHDL_file($input_filename); 515 516 # derive tmp filename from input filename 517 my ($tailname, $dirname, $suffix) = &File::Basename::fileparse($tidy_filename, "\\.[^\\.]+\$"); 518 519 # set the new input file and base_dir to be from the tidied file 520 $file = "$tailname$suffix"; 521 $base_dir = $dirname; 524 522 } 525 523 526 524 # call the parent read_into_doc_obj 527 my ($process_status,$doc_obj) = &BasPlug::read_into_doc_obj($self,$pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli);525 my ($process_status,$doc_obj) = $self->SUPER::read_into_doc_obj($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 528 526 529 527 return ($process_status,$doc_obj); … … 535 533 push(@$pluginlist, $class); 536 534 537 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}538 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};535 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 536 push(@{$hashArgOptLists->{"OptList"}},$options); 539 537 540 538 541 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);539 my $self = new ReadTextFile($pluginlist,$inputargs,$hashArgOptLists); 542 540 543 541 if ($self->{'w3mir'}) { … … 618 616 my $outhandle = $self->{'outhandle'}; 619 617 620 print STDERR "<Processing n='$file' p='HTMLPlug '>\n" if ($gli);621 622 print $outhandle "HTMLPlug : processing $file\n"618 print STDERR "<Processing n='$file' p='HTMLPlugin'>\n" if ($gli); 619 620 print $outhandle "HTMLPlugin: processing $file\n" 623 621 if $self->{'verbosity'} > 1; 624 622 … … 669 667 # URL metadata (even invalid ones) are used to support internal 670 668 # links, so even if 'file_is_url' is off, still need to store info 671 672 $file = &BasPlug::filename_to_metadata($self, $file); # ensures filename is in UTF8 character encoding673 my $web_url = "http://$ file";674 $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); # will eventually ensure it is utf8 anyway669 670 my $utf8_file = $self->filename_to_utf8_metadata($file); 671 my $web_url = "http://$utf8_file"; 672 $doc_obj->add_utf8_metadata($cursection, "URL", $web_url); 675 673 676 674 if ($self->{'file_is_url'}) { … … 752 750 } 753 751 if ($cursection ne "") { 754 print $outhandle "HTMLPlug : WARNING: $file contains unmatched <Section></Section> tags\n";752 print $outhandle "HTMLPlugin: WARNING: $file contains unmatched <Section></Section> tags\n"; 755 753 } 756 754 … … 760 758 if (!$found_something) { 761 759 if ($self->{'verbosity'} > 2) { 762 print $outhandle "HTMLPlug : WARNING: $file appears to contain no Section tags so\n";760 print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags so\n"; 763 761 print $outhandle " will be processed as a single section document\n"; 764 762 } … … 775 773 776 774 } else { 777 print $outhandle "HTMLPlug : WARNING: $file contains the following text outside\n";775 print $outhandle "HTMLPlugin: WARNING: $file contains the following text outside\n"; 778 776 print $outhandle " of the final closing </Section> tag. This text will\n"; 779 777 print $outhandle " be ignored."; … … 795 793 # been processed already but we should print the warning 796 794 # as above and extract metadata 797 print $outhandle "HTMLPlug : WARNING: $file appears to contain no Section tags and\n";795 print $outhandle "HTMLPlugin: WARNING: $file appears to contain no Section tags and\n"; 798 796 print $outhandle " is blank or empty. Metadata will be assigned if present.\n"; 799 797 } … … 892 890 # trap images 893 891 894 # Previously, by default, HTMLPlug would embed <img> tags inside anchor tags892 # Previously, by default, HTMLPlugin would embed <img> tags inside anchor tags 895 893 # i.e. <a href="image><img src="image"></a> in order to overcome a problem that 896 894 # turned regular text succeeding images into links. That is, by embedding <imgs> … … 907 905 908 906 # If at any time, there is a need for having images embedded in <a> anchor tags, 909 # then it might be better to turn that into an HTMLPlug option rather than make907 # then it might be better to turn that into an HTMLPlugin option rather than make 910 908 # it the default behaviour. Also, eventually, no_image_links needs to become 911 # a deprecated option for HTMLPlug as it has now become the default behaviour.909 # a deprecated option for HTMLPlugin as it has now become the default behaviour. 912 910 913 911 #if(!$self->{'no_image_links'}){ 914 912 $$textref =~ s/(<(?:img|embed|table|tr|td)[^>]*?(?:src|background)\s*=\s*)([\"][^\"]+[\"]|[\'][^\']+[\']|[^\s\/>]+)([^>]*>)/ 915 913 $self->replace_images ($1, $2, $3, $base_dir, $file, $doc_obj, $cursection)/isge; 916 914 #} 917 915 … … 936 934 $back="\"$back"; 937 935 } 936 938 937 $link =~ s/\n/ /g; 939 938 … … 1074 1073 1075 1074 my ($before_hash, $hash_part) = $link =~ /^([^\#]*)(\#?.*)$/; 1076 1075 1077 1076 $hash_part = "" if !defined $hash_part; 1078 1077 if (!defined $before_hash || $before_hash !~ /[\w\.\/]/) { 1079 1078 my $outhandle = $self->{'outhandle'}; 1080 print $outhandle "HTMLPlug : ERROR - badly formatted tag ignored ($link)\n"1079 print $outhandle "HTMLPlugin: ERROR - badly formatted tag ignored ($link)\n" 1081 1080 if $self->{'verbosity'}; 1082 1081 return ($link, "", 0); … … 1257 1256 1258 1257 if (!defined $tag) { 1259 print $outhandle "HTMLPlug : can't find NAME in \"$metatag\"\n";1258 print $outhandle "HTMLPlugin: can't find NAME in \"$metatag\"\n"; 1260 1259 next; 1261 1260 } … … 1274 1273 } 1275 1274 if (!defined $value) { 1276 print $outhandle "HTMLPlug : can't find VALUE in \"$metatag\"\n";1275 print $outhandle "HTMLPlugin: can't find VALUE in \"$metatag\"\n"; 1277 1276 next; 1278 1277 } … … 1425 1424 1426 1425 1427 # Extend the BasPlugread_file so that strings like é are1426 # Extend read_file so that strings like é are 1428 1427 # converted to UTF8 internally. 1429 1428 # … … 1432 1431 1433 1432 sub read_file { 1434 my ($self, $filename, $encoding, $language, $textref) = @_; 1435 1436 &BasPlug::read_file($self, $filename, $encoding, $language, $textref); 1433 my $self = shift(@_); 1434 my ($filename, $encoding, $language, $textref) = @_; 1435 1436 $self->SUPER::read_file($filename, $encoding, $language, $textref); 1437 1437 1438 1438 # Convert entities to their UTF8 equivalents -
gsdl/trunk/perllib/plugins/ISISPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ISISPlug .pm -- A plugin for CDS/ISIS databases3 # ISISPlugin.pm -- A plugin for CDS/ISIS databases 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 package ISISPlug ;27 package ISISPlugin; 28 28 29 29 30 30 use multiread; 31 use Split Plug;31 use SplitTextFile; 32 32 33 33 use strict; 34 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 35 36 # ISISPlug is a sub-class of SplitPlug.36 # ISISPlugin is a sub-class of SplitTextFile. 37 37 sub BEGIN { 38 @ISISPlug ::ISA = ('SplitPlug');38 @ISISPlugin::ISA = ('SplitTextFile'); 39 39 } 40 40 … … 42 42 my $arguments = 43 43 [ { 'name' => "process_exp", 44 'desc' => "{Bas Plug.process_exp}",44 'desc' => "{BasePlugin.process_exp}", 45 45 'type' => "regexp", 46 46 'reqd' => "no", 47 47 'deft' => &get_default_process_exp() }, 48 48 { 'name' => "block_exp", 49 'desc' => "{Bas Plug.block_exp}",49 'desc' => "{BasePlugin.block_exp}", 50 50 'type' => "regexp", 51 51 'reqd' => "no", … … 53 53 'hiddengli' => "yes" }, 54 54 { 'name' => "split_exp", 55 'desc' => "{Split Plug.split_exp}",55 'desc' => "{SplitTextFile.split_exp}", 56 56 'type' => "regexp", 57 57 'reqd' => "no", … … 61 61 # The interesting options 62 62 { 'name' => "entry_separator", 63 'desc' => "{ISISPlug .entry_separator}",63 'desc' => "{ISISPlugin.entry_separator}", 64 64 'type' => "string", 65 65 'reqd' => "no", 66 66 'deft' => "<br>" }, 67 67 { 'name' => "subfield_separator", 68 'desc' => "{ISISPlug .subfield_separator}",68 'desc' => "{ISISPlugin.subfield_separator}", 69 69 'type' => "string", 70 70 'reqd' => "no", … … 72 72 ]; 73 73 74 my $options = { 'name' => "ISISPlug ",75 'desc' => "{ISISPlug .desc}",74 my $options = { 'name' => "ISISPlugin", 75 'desc' => "{ISISPlugin.desc}", 76 76 'abstract' => "no", 77 77 'inherits' => "yes", … … 104 104 push(@$pluginlist, $class); 105 105 106 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};108 109 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);106 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 107 push(@{$hashArgOptLists->{"OptList"}},$options); 108 109 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 110 110 111 111 if ($self->{'info_only'}) { … … 157 157 158 158 my $reader = new multiread(); 159 $reader->set_handle('ISISPlug ::FILE');159 $reader->set_handle('ISISPlugin::FILE'); 160 160 $reader->set_encoding($encoding); 161 161 $reader->read_file($textref); … … 186 186 187 187 # Report that we're processing the file 188 print STDERR "\n<Processing n='$file' p='ISISPlug '>\n" if ($gli);188 print STDERR "\n<Processing n='$file' p='ISISPlugin'>\n" if ($gli); 189 189 print $outhandle "IsisPlug: processing $file\n" if ($self->{'verbosity'}) > 1; 190 190 … … 348 348 my $fdtfiletext = ""; 349 349 my $reader = new multiread(); 350 $reader->set_handle('ISISPlug ::FDT_FILE');350 $reader->set_handle('ISISPlugin::FDT_FILE'); 351 351 $reader->set_encoding($encoding); 352 352 $reader->read_file($fdtfiletext); -
gsdl/trunk/perllib/plugins/ImagePlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ImagePlug .pm -- simple text plugin3 # ImagePlugin.pm -- simple text plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 package ImagePlug ;26 package ImagePlugin; 27 27 28 use BasPlug; 28 use BasePlugin; 29 use ImageConverter; 29 30 30 31 use strict; … … 32 33 33 34 sub BEGIN { 34 @ImagePlug ::ISA = ('BasPlug');35 @ImagePlugin::ISA = ('BasePlugin', 'ImageConverter'); 35 36 } 36 37 37 38 my $arguments = 38 39 [ { 'name' => "process_exp", 39 'desc' => "{Bas Plug.process_exp}",40 'desc' => "{BasePlugin.process_exp}", 40 41 'type' => "regexp", 41 42 'deft' => &get_default_process_exp(), 42 43 'reqd' => "no" }, 43 { 'name' => "cache_generated_images", 44 'desc' => "{ImagePlug.cache_generated_image}", 45 'type' => "flag", 46 'reqd' => "no" }, 47 { 'name' => "noscaleup", 48 'desc' => "{ImagePlug.noscaleup}", 49 'type' => "flag", 50 'reqd' => "no" }, 51 { 'name' => "nothumbnail", 52 'desc' => "{ImagePlug.generatethumbnail}", 53 'type' => "flag", 54 'reqd' => "no" }, 55 { 'name' => "thumbnailsize", 56 'desc' => "{ImagePlug.thumbnailsize}", 57 'type' => "int", 58 'deft' => "100", 59 'range' => "1,", 60 'reqd' => "no" }, 61 { 'name' => "thumbnailtype", 62 'desc' => "{ImagePlug.thumbnailtype}", 63 'type' => "string", 64 'deft' => "gif", 65 'reqd' => "no" }, 66 { 'name' => "noscreenview", 67 'desc' => "{ImagePlug.generatescreenview}", 68 'type' => "flag", 69 'reqd' => "no" }, 70 { 'name' => "screenviewsize", 71 'desc' => "{ImagePlug.screenviewsize}", 72 'type' => "int", 73 'deft' => "0", 74 'range' => "1,", 75 'reqd' => "no" }, 76 { 'name' => "screenviewtype", 77 'desc' => "{ImagePlug.screenviewtype}", 78 'type' => "string", 79 'deft' => "jpg", 80 'reqd' => "no" }, 81 { 'name' => "converttotype", 82 'desc' => "{ImagePlug.converttotype}", 83 'type' => "string", 84 'deft' => "", 85 'reqd' => "no" }, 86 { 'name' => "minimumsize", 87 'desc' => "{ImagePlug.minimumsize}", 88 'type' => "int", 89 'deft' => "100", 90 'range' => "1,", 91 'reqd' => "no" } ]; 44 ]; 92 45 93 my $options = { 'name' => "ImagePlug ",94 'desc' => "{ImagePlug .desc}",46 my $options = { 'name' => "ImagePlugin", 47 'desc' => "{ImagePlugin.desc}", 95 48 'abstract' => "no", 96 49 'inherits' => "yes", … … 104 57 push(@$pluginlist, $class); 105 58 106 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 60 push(@{$hashArgOptLists->{"OptList"}},$options); 108 61 109 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);110 $self->{'tmp_file_paths'} = ();62 new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 63 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 111 64 112 # Check that ImageMagick is installed and available on the path (except for Windows 95/98) 113 if (!($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT())) { 114 my $result = `identify 2>&1`; 115 if ($? == -1 || $? == 256) { # Linux and Windows return different values for "program not found" 116 $self->{'imagemagick_not_installed'} = 1; 117 } 118 } 65 return bless $self, $class; 66 } 119 67 120 121 return bless $self, $class; 68 sub init { 69 my $self = shift (@_); 70 my ($verbosity, $outhandle, $failhandle) = @_; 71 72 $self->SUPER::init(@_); 73 $self->ImageConverter::init(); 122 74 } 123 75 … … 136 88 return; 137 89 } 138 # Create the thumbnail and screenview images, and discover the Image's139 # size, width, and height using the convert utility.140 141 sub generate_images142 {143 my $self = shift (@_);144 my $filename = shift (@_); # filename with full path145 my $file = shift (@_); # filename without path146 my $doc_obj = shift (@_);147 my $section = $doc_obj->get_top_section();148 149 my $verbosity = $self->{'verbosity'};150 my $outhandle = $self->{'outhandle'};151 152 # check the filename is okay153 return 0 if ($file eq "" || $filename eq "");154 155 # Code now extended to quote filenames in 'convert' commnads156 # Allows spaces in filenames, but note needs spaces to be escaped in URL as well157 # if ($filename =~ m/ /) {158 # print $outhandle "ImagePlug: \"$filename\" contains a space. choking.\n";159 # return undef;160 # }161 162 my $minimumsize = $self->{'minimumsize'};163 if (defined $minimumsize && (-s $filename < $minimumsize)) {164 print $outhandle "ImagePlug: \"$filename\" too small, skipping\n"165 if ($verbosity > 1);166 }167 168 169 # Convert the image to a new type (if required).170 my $converttotype = $self->{'converttotype'};171 my $originalfilename = ""; # only set if we do a conversion172 my $type = "unknown";173 174 if ($converttotype ne "" && $filename !~ m/$converttotype$/) {175 $originalfilename = $filename;176 177 my $result = $self->convert($originalfilename, $converttotype, "", "");178 ($filename) = ($result =~ /=>(.*\.$converttotype)/);179 180 $type = $converttotype;181 $file =~ s/\..*$/\.$type/;182 }183 184 185 # Add the image metadata186 my $url = $file;187 188 ##not know why it is required at the first place, it seems all works fine without it, so I comment it out189 ##$url =~ s/ /%20/g;190 191 my $utf8_filename_meta = $self->filename_to_metadata($url);192 $doc_obj->add_utf8_metadata ($section, "Image", $utf8_filename_meta);193 194 # Also want to set filename as 'Source' metadata to be195 # consistent with other plugins196 $doc_obj->add_utf8_metadata ($section, "Source", $utf8_filename_meta);197 198 my ($image_type, $image_width, $image_height, $image_size)199 = &identify($filename, $outhandle, $verbosity);200 201 if ($image_type ne " ") {202 $type = $image_type;203 }204 205 $doc_obj->add_metadata ($section, "FileFormat", $type);206 $doc_obj->add_metadata ($section, "FileSize", $image_size);207 208 $doc_obj->add_metadata ($section, "ImageType", $image_type);209 $doc_obj->add_metadata ($section, "ImageWidth", $image_width);210 $doc_obj->add_metadata ($section, "ImageHeight", $image_height);211 $doc_obj->add_metadata ($section, "ImageSize", $image_size);212 $doc_obj->add_metadata ($section, "NoText", "1");213 214 $doc_obj->add_metadata ($section, "srclink",215 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">");216 $doc_obj->add_metadata ($section, "/srclink", "</a>");217 218 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\" width=100>");219 220 221 # Add the image as an associated file222 $doc_obj->associate_file($filename,$file,"image/$type",$section);223 224 225 if (!$self->{'nothumbnail'}) {226 227 # Make the thumbnail image228 my $thumbnailsize = $self->{'thumbnailsize'} || 100;229 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif';230 231 # Generate the thumbnail with convert232 my $result = $self->convert($filename, $thumbnailtype, "-geometry $thumbnailsize" . "x$thumbnailsize", "THUMB");233 my ($thumbnailfile) = ($result =~ /=>(.*\.$thumbnailtype)/);234 235 # Add the thumbnail as an associated file ...236 if (-e "$thumbnailfile") {237 $doc_obj->associate_file("$thumbnailfile", "thumbnail.$thumbnailtype",238 "image/$thumbnailtype",$section);239 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype);240 $doc_obj->add_metadata ($section, "Thumb", "thumbnail.$thumbnailtype");241 242 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>");243 }244 245 # Extract Thumnail metadata from convert output246 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {247 $doc_obj->add_metadata ($section, "ThumbWidth", $1);248 $doc_obj->add_metadata ($section, "ThumbHeight", $2);249 }250 251 }252 253 254 # Make a screen-sized version of the picture if requested255 if (!$self->{'noscreenview'}) {256 257 # To do: if the actual image smaller than the screenview size,258 # we should use the original !259 260 my $screenviewsize = $self->{'screenviewsize'};261 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg';262 263 # make the screenview image264 my $result = $self->convert($filename, $screenviewtype, "-geometry $screenviewsize" . "x$screenviewsize", "SCREEN");265 my ($screenviewfilename) = ($result =~ /=>(.*\.$screenviewtype)/);266 267 # get screenview dimensions, size and type268 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) {269 $doc_obj->add_metadata ($section, "ScreenWidth", $1);270 $doc_obj->add_metadata ($section, "ScreenHeight", $2);271 }272 else {273 $doc_obj->add_metadata ($section, "ScreenWidth", $image_width);274 $doc_obj->add_metadata ($section, "ScreenHeight", $image_height);275 }276 277 #add the screenview as an associated file ...278 if (-e "$screenviewfilename") {279 $doc_obj->associate_file("$screenviewfilename", "screenview.$screenviewtype",280 "image/$screenviewtype",$section);281 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype);282 $doc_obj->add_metadata ($section, "Screen", "screenview.$screenviewtype");283 284 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>");285 } else {286 print $outhandle "ImagePlug: couldn't find \"$screenviewfilename\"\n";287 }288 }289 290 return $type;291 292 293 }294 295 296 297 # Discover the characteristics of an image file with the ImageMagick298 # "identify" command.299 300 sub identify {301 my ($image, $outhandle, $verbosity) = @_;302 303 # Use the ImageMagick "identify" command to get the file specs304 my $command = "identify \"$image\" 2>&1";305 print $outhandle "$command\n" if ($verbosity > 2);306 my $result = '';307 $result = `$command`;308 print $outhandle "$result\n" if ($verbosity > 3);309 310 # Read the type, width, and height311 my $type = 'unknown';312 my $width = 'unknown';313 my $height = 'unknown';314 315 my $image_safe = quotemeta $image;316 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) {317 $type = $1;318 $width = $2;319 $height = $3;320 }321 322 # Read the size323 my $size = "unknown";324 if ($result =~ m/^.* ([0-9]+)b/) {325 $size = $1;326 }327 elsif ($result =~ m/^.* ([0-9]+)(\.([0-9]+))?kb?/) {328 $size = 1024 * $1;329 if (defined($2)) {330 $size = $size + (1024 * $2);331 # Truncate size (it isn't going to be very accurate anyway)332 $size = int($size);333 }334 }335 elsif ($result =~ m/^.* (([0-9]+)(\.([0-9]+))?e\+([0-9]+))(kb|b)?/) {336 # Deals with file sizes on Linux of type "3.4e+02kb" where e+02 is 1*10^2.337 # 3.4e+02 therefore evaluates to 3.4 x 1 x 10^2 = 340kb.338 # Programming languages including Perl know how that 3.4e+02 is a number,339 # so we don't need to do any calculations.340 $size = $1*1; # turn the string into a number by multiplying it by 1341 #if we did $size = $1; $size would be merely the string "3.4e+02"342 $size = int($size); # truncate size343 }344 print $outhandle "file: $image:\t $type, $width, $height, $size\n"345 if ($verbosity > 2);346 347 # Return the specs348 return ($type, $width, $height, $size);349 }350 351 352 sub convert353 {354 my $self = shift(@_);355 my $source_file_path = shift(@_);356 my $target_file_type = shift(@_);357 my $convert_options = shift(@_) || "";358 my $convert_type = shift(@_) || "";359 360 my $outhandle = $self->{'outhandle'};361 my $verbosity = $self->{'verbosity'};362 363 # Determine the full name and path of the output file364 my $target_file_path = &util::get_tmp_filename() . "." . $target_file_type;365 push(@{$self->{'tmp_file_paths'}}, $target_file_path);366 367 # Generate and run the convert command368 my $convert_command = "convert -interlace plane -verbose $convert_options \"$source_file_path\" \"$target_file_path\"";369 print $outhandle "$convert_type $convert_command\n" if ($verbosity > 2);370 my $result = `$convert_command 2>&1`;371 print $outhandle "$convert_type RESULT = $result\n" if ($verbosity > 2);372 373 return $result;374 }375 376 377 # The ImagePlug read() function.378 # ImagePlug overrides read() because there is no need to read the actual379 # text of the file in, because the contents of the file is not text...380 #381 # Return number of files processed, undef if can't process382 # Note that $base_dir might be "" and that $file might383 # include directories384 385 sub read {386 my $self = shift (@_);387 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;388 389 my $outhandle = $self->{'outhandle'};390 391 #check process and block exps, smart block, etc392 my ($block_status,$filename) = $self->read_block(@_);393 return $block_status if ((!defined $block_status) || ($block_status==0));394 395 print STDERR "<Processing n='$file' p='ImagePlug'>\n" if ($gli);396 print $outhandle "ImagePlug processing $file\n"397 if $self->{'verbosity'} > 1;398 399 # None of this works very well on Windows 95/98...400 if ($ENV{'GSDLOS'} eq "windows" && !Win32::IsWinNT()) {401 if ($gli) {402 print STDERR "<ProcessingError n='$file' r='Windows 95/98 not supported'>\n";403 }404 print $outhandle "ImagePlug: Windows 95/98 not supported\n";405 return -1;406 }407 408 # None of this is going to work very well without ImageMagick...409 if ($self->{'imagemagick_not_installed'}) {410 if ($gli) {411 print STDERR "<ProcessingError n='$file' r='ImageMagick not installed'>\n";412 }413 print $outhandle "ImagePlug: ImageMagick not installed\n";414 return -1;415 }416 417 #if there's a leading directory name, eat it...418 $file =~ s/^.*[\/\\]//;419 420 # create a new document421 my $doc_obj = new doc ($filename, "indexed_doc");422 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});423 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");424 425 #run convert to get the thumbnail and extract size and type info426 my $result = generate_images($self, $filename, $file, $doc_obj);427 428 if (!defined $result)429 {430 if ($gli) {431 print STDERR "<ProcessingError n='$file'>\n";432 }433 print $outhandle "ImagePlug: couldn't process \"$filename\"\n";434 return -1; # error during processing435 }436 437 438 #create an empty text string so we don't break downstream plugins439 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);440 441 # include any metadata passed in from previous plugins442 # note that this metadata is associated with the top level section443 my $section = $doc_obj->get_top_section();444 $self->extra_metadata ($doc_obj, $section, $metadata);445 446 # do plugin specific processing of doc_obj447 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {448 print STDERR "<ProcessingError n='$file'>\n" if ($gli);449 return -1;450 }451 452 # do any automatic metadata extraction453 $self->auto_extract_metadata ($doc_obj);454 455 # if we haven't found any Title so far, assign one456 # this was shifted to here from inside read()457 $self->title_fallback($doc_obj,$section,$file);458 # add an OID459 $doc_obj->set_OID();460 $doc_obj->add_utf8_text($section, $text);461 462 # process the document463 $processor->process($doc_obj);464 465 # clean up temporary files - we do this here instead of in466 # generate_images becuase associated files aren't actually copied467 # until after process has been run.468 foreach my $tmp_file_path (@{$self->{'tmp_file_paths'}})469 {470 if (-e $tmp_file_path)471 {472 &util::rm($tmp_file_path);473 }474 }475 476 $self->{'num_processed'}++;477 478 return 1;479 }480 90 481 91 # do plugin specific processing of doc_obj 482 92 sub process { 483 93 my $self = shift (@_); 484 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 94 # options?? 95 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 96 485 97 my $outhandle = $self->{'outhandle'}; 98 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 99 if ($self->check_image_magick()) { 100 $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $doc_obj->get_top_section()); # should we check the return value? 101 } else { 102 # do some basic stuff 103 # associate the image, fileformat, mimetype, srclink, srcicon 104 # do this if image magick not installed. but also if generate hasn't worked?? what about images too small? 105 } 106 #we have no text - adds dummy text and NoText metadata 107 $self->add_dummy_text($doc_obj, $doc_obj->get_top_section()); 108 109 return 1; 110 111 } 112 113 sub clean_up_after_doc_obj_processing { 114 my $self = shift(@_); 486 115 487 return 1;116 $self->ImageConverter::clean_up_temporary_files(); 488 117 } 489 118 -
gsdl/trunk/perllib/plugins/IndexPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # IndexPlug .pm --3 # IndexPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 50 50 # named 'Subject'. 51 51 52 # 12/05/02 Added usage datastructure - John Thompson 53 54 package IndexPlug; 52 package IndexPlugin; 55 53 56 54 use plugin; 57 use Bas Plug;55 use BasePlugin; 58 56 use doc; 59 57 use util; … … 64 62 65 63 sub BEGIN { 66 @IndexPlug ::ISA = ('BasPlug');64 @IndexPlugin::ISA = ('BasePlugin'); 67 65 } 68 66 69 my $arguments = [70 ];67 #my $arguments = [ 68 # ]; 71 69 72 my $options = { 'name' => "IndexPlug ",73 'desc' => "{IndexPlug .desc}",70 my $options = { 'name' => "IndexPlugin", 71 'desc' => "{IndexPlugin.desc}", 74 72 'abstract' => "no", 75 73 'inherits' => "yes" }; … … 80 78 push(@$pluginlist, $class); 81 79 82 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}83 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};80 #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 81 push(@{$hashArgOptLists->{"OptList"}},$options); 84 82 85 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);83 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 86 84 87 85 return bless $self, $class; … … 110 108 111 109 # found an index.txt file 112 print STDERR "<Processing n='$file' p='IndexPlug '>\n" if ($gli);113 print $outhandle "IndexPlug : processing $indexfile\n";110 print STDERR "<Processing n='$file' p='IndexPlugin'>\n" if ($gli); 111 print $outhandle "IndexPlugin: processing $indexfile\n"; 114 112 115 113 # read in the index.txt -
gsdl/trunk/perllib/plugins/LOMPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # LOMPlug .pm -- plugin for import the collection from LOM3 # LOMPlugin.pm -- plugin for import the collection from LOM 4 4 # 5 5 # A component of the Greenstone digital library software … … 27 27 ### Note this plugin currently can't download source documents from outside if you are behind a firewall. 28 28 29 package LOMPlug ;30 31 use BasPlug;29 package LOMPlugin; 30 31 use ReadTextFile; 32 32 use MetadataPass; 33 33 use XMLParser; … … 35 35 36 36 sub BEGIN { 37 @ISA = (' BasPlug', 'MetadataPass');37 @ISA = ('ReadTextFile', 'MetadataPass'); 38 38 } 39 39 … … 44 44 my $arguments = 45 45 [ { 'name' => "process_exp", 46 'desc' => "{ BasPlug.process_exp}",46 'desc' => "{ReadTextFile.process_exp}", 47 47 'type' => "string", 48 48 'deft' => &get_default_process_exp(), 49 49 'reqd' => "no" }, 50 50 { 'name' => "root_tag", 51 'desc' => "{LOMPlug .root_tag}",51 'desc' => "{LOMPlugin.root_tag}", 52 52 'type' => "regexp", 53 53 'deft' => q/^(?i)lom$/, 54 54 'reqd' => "no" }, 55 55 { 'name' => "check_timestamp", 56 'desc' => "{LOMPlug .check_timestamp}",56 'desc' => "{LOMPlugin.check_timestamp}", 57 57 'type' => "flag" }, 58 58 { 'name' => "download_srcdocs", 59 'desc' => "{LOMPlug .download_srcdocs}",59 'desc' => "{LOMPlugin.download_srcdocs}", 60 60 'type' => "regexp", 61 61 'deft' => "", 62 62 'reqd' => "no" }]; 63 63 64 my $options = { 'name' => "LOMPlug ",65 'desc' => "{LOMPlug .desc}",64 my $options = { 'name' => "LOMPlugin", 65 'desc' => "{LOMPlugin.desc}", 66 66 'inherits' => "yes", 67 67 'args' => $arguments }; … … 75 75 push(@$pluginlist, $class); 76 76 77 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}78 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};77 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 78 push(@{$hashArgOptLists->{"OptList"}},$options); 79 79 80 $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists); 80 $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 81 82 if ($self->{'info_only'}) { 83 # don't worry about creating the XML parser as all we want is the 84 # list of plugin options 85 return bless $self, $class; 86 } 81 87 82 88 #create XML::Parser object for parsing dublin_core.xml files … … 120 126 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 121 127 122 print $outhandle "LOMPlug : extracting metadata from $file\n"128 print $outhandle "LOMPlugin: extracting metadata from $file\n" 123 129 if $self->{'verbosity'} > 1; 124 130 … … 131 137 132 138 if ($@) { 133 print $outhandle "LOMPlug : skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);139 print $outhandle "LOMPlugin: skipping $filename as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1); 134 140 print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2); 135 141 return 0; … … 262 268 my $outhandle = $self->{'outhandle'}; 263 269 264 print STDERR "<Processing n='$file' p='LOMPlug '>\n" if ($gli);270 print STDERR "<Processing n='$file' p='LOMPlugin'>\n" if ($gli); 265 271 266 272 print $outhandle "LOMPLug: processing $file\n"; -
gsdl/trunk/perllib/plugins/LaTeXPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # LaTeXPlug .pm3 # LaTeXPlugin.pm 4 4 # 5 5 # A component of the Greenstone digital library software … … 26 26 # parse/remove tex \if ... macros 27 27 28 package LaTeXPlug ;28 package LaTeXPlugin; 29 29 30 30 # System complains about $arguments if the strict is set … … 33 33 34 34 # greenstone packages 35 use BasPlug;35 use ReadTextFile; 36 36 use unicode; 37 37 use util; … … 39 39 my $arguments = 40 40 [ { 'name' => "process_exp", 41 'desc' => "{ BasPlug.process_exp}",41 'desc' => "{ReadTextFile.process_exp}", 42 42 'type' => "regexp", 43 43 'reqd' => "no", 44 44 'deft' => &get_default_process_exp() } ]; 45 45 46 my $options = { 'name' => 'LaTeXPlug ',47 'desc' => '{LaTeXPlug .desc}',46 my $options = { 'name' => 'LaTeXPlugin', 47 'desc' => '{LaTeXPlugin.desc}', 48 48 'abstract' => 'no', 49 49 'inherits' => 'yes', … … 51 51 52 52 sub BEGIN { 53 @LaTeXPlug::ISA = ('BasPlug'); 54 } 55 56 sub print_usage { 57 print STDERR "\n usage: plugin LaTeXPlug [options]\n\n"; 53 @LaTeXPlugin::ISA = ('ReadTextFile'); 58 54 } 59 55 … … 63 59 push(@$pluginlist, $class); 64 60 65 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}66 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};67 68 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);61 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 62 push(@{$hashArgOptLists->{"OptList"}},$options); 63 64 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 69 65 70 66 $self->{'aux_files'} = {}; … … 99 95 my $outhandle = $self->{'outhandle'}; 100 96 if ($gli) { 101 print STDERR "<Processing n='$file' p='LaTeXPlug '>\n";97 print STDERR "<Processing n='$file' p='LaTeXPlugin'>\n"; 102 98 } elsif ($self->{'verbosity'} > 1) { 103 print $outhandle "LaTeXPlug : processing $file\n"99 print $outhandle "LaTeXPlugin: processing $file\n" 104 100 } 105 101 my $cursection = $doc_obj->get_top_section(); -
gsdl/trunk/perllib/plugins/MARCPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MARCPlug .pm -- basic MARC plugin3 # MARCPlugin.pm -- basic MARC plugin 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 package MARCPlug ;28 29 use Split Plug;27 package MARCPlugin; 28 29 use SplitTextFile; 30 30 31 31 use unicode; … … 36 36 37 37 sub BEGIN { 38 @MARCPlug ::ISA = ('SplitPlug');38 @MARCPlugin::ISA = ('SplitTextFile'); 39 39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 40 40 } … … 42 42 my $arguments = 43 43 [ { 'name' => "metadata_mapping", 44 'desc' => "{MARCPlug .metadata_mapping}",44 'desc' => "{MARCPlugin.metadata_mapping}", 45 45 'type' => "string", 46 46 'deft' => "marctodc.txt", … … 53 53 'reqd' => "no" }, 54 54 { 'name' => "process_exp", 55 'desc' => "{Bas Plug.process_exp}",55 'desc' => "{BasePlugin.process_exp}", 56 56 'type' => "regexp", 57 57 'reqd' => "no", 58 58 'deft' => &get_default_process_exp() }, 59 59 { 'name' => "split_exp", 60 'desc' => "{Split Plug.split_exp}",60 'desc' => "{SplitTextFile.split_exp}", 61 61 'type' => "regexp", 62 62 'reqd' => "no", … … 64 64 ]; 65 65 66 my $options = { 'name' => "MARCPlug ",67 'desc' => "{MARCPlug .desc}",66 my $options = { 'name' => "MARCPlugin", 67 'desc' => "{MARCPlugin.desc}", 68 68 'abstract' => "no", 69 69 'inherits' => "yes", … … 81 81 push(@$pluginlist, $class); 82 82 83 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}84 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};85 86 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);83 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 84 push(@{$hashArgOptLists->{"OptList"}},$options); 85 86 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 87 87 88 88 # 'metadata_mapping' was used in two ways in the plugin: as a plugin … … 119 119 { 120 120 121 my $msg = "MARCPlug ERROR: Can't locate mapping file \"" .121 my $msg = "MARCPlugin ERROR: Can't locate mapping file \"" . 122 122 $self->{'metadata_mapping_file'} . "\".\n" . 123 123 " No marc files can be processed.\n"; … … 245 245 push(@marc_entries,$marc); 246 246 $$textref .= $marc->as_formatted(); 247 $$textref .= "\n\n"; # for Split Plug- see default_split_exp above...247 $$textref .= "\n\n"; # for SplitTextFile - see default_split_exp above... 248 248 } 249 249 … … 254 254 255 255 # do plugin specific processing of doc_obj 256 # This gets done for each record found by Split Plugin marc files.256 # This gets done for each record found by SplitTextFile in marc files. 257 257 sub process { 258 258 my $self = shift (@_); … … 264 264 if (! defined($self->{'metadata_mapping'})) 265 265 { 266 print $outhandle "MARCPlug : no metadata file! Can't process $file\n";266 print $outhandle "MARCPlugin: no metadata file! Can't process $file\n"; 267 267 return undef; 268 268 } 269 269 270 print STDERR "<Processing n='$file' p='MARCPlug '>\n" if ($gli);271 print $outhandle "MARCPlug : processing $file\n"270 print STDERR "<Processing n='$file' p='MARCPlugin'>\n" if ($gli); 271 print $outhandle "MARCPlugin: processing $file\n" 272 272 if $self->{'verbosity'} > 1; 273 273 -
gsdl/trunk/perllib/plugins/MARCXMLPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MARCXMLPlug .pm3 # MARCXMLPlugin.pm 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 28 28 # well-formedness). 29 29 30 package MARCXMLPlug ;31 32 use XMLPlug;30 package MARCXMLPlugin; 31 32 use ReadXMLFile; 33 33 34 34 use strict; … … 36 36 37 37 sub BEGIN { 38 @MARCXMLPlug ::ISA = ('XMLPlug');38 @MARCXMLPlugin::ISA = ('ReadXMLFile'); 39 39 } 40 40 41 41 my $arguments = [{'name' => "metadata_mapping_file", 42 'desc' => "{MARCXMLPlug .metadata_mapping_file}",42 'desc' => "{MARCXMLPlugin.metadata_mapping_file}", 43 43 'type' => "string", 44 44 'deft' => "marctodc.txt", 45 45 'reqd' => "no" }]; 46 46 47 my $options = { 'name' => "MARCXMLPlug ",48 'desc' => "{MARCXMLPlug .desc}",47 my $options = { 'name' => "MARCXMLPlugin", 48 'desc' => "{MARCXMLPlugin.desc}", 49 49 'abstract' => "no", 50 50 'inherits' => "yes", … … 57 57 push(@$pluginlist, $class); 58 58 59 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}60 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};61 62 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 60 push(@{$hashArgOptLists->{"OptList"}},$options); 61 62 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 63 63 64 64 $self->{'content'} = ""; … … 222 222 if (scalar(@$mm_files)==0) 223 223 { 224 my $msg = "MARCXMLPlug ERROR: Can't locate mapping file \"" .224 my $msg = "MARCXMLPlugin ERROR: Can't locate mapping file \"" . 225 225 $self->{'metadata_mapping_file'} . "\".\n " . 226 226 " No marc files can be processed.\n"; … … 269 269 $self->{'indent'} = 0; 270 270 my $outhandle = $self->{'outhandle'}; 271 print $outhandle "MARCXMLPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;272 print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlug '>\n" if $self->{'gli'};271 print $outhandle "MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 272 print STDERR "<Processing n='$self->{'file'}' p='MARCXMLPlugin'>\n" if $self->{'gli'}; 273 273 274 274 } … … 310 310 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 311 311 my ($filemeta) = $file =~ /([^\\\/]+)$/; 312 $ doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));312 $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 313 313 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$self->{'record_count'}"); 314 314 if ($self->{'cover_image'}) { … … 319 319 320 320 my $outhandle = $self->{'outhandle'}; 321 print $outhandle "Record $self->{'record_count'} - MARCXMLPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;321 print $outhandle "Record $self->{'record_count'} - MARCXMLPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 322 322 323 323 $self->{'record_count'}++; -
gsdl/trunk/perllib/plugins/METSPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # METSPlug .pm3 # METSPlugin.pm 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 30 30 31 31 32 package METSPlug ;32 package METSPlugin; 33 33 34 34 use ghtml; … … 37 37 no strict 'refs'; # allow filehandles to be variables and viceversa 38 38 39 use XMLPlug;39 use ReadXMLFile; 40 40 use XML::XPath; 41 41 use XML::XPath::XMLParser; 42 42 43 43 sub BEGIN { 44 @METSPlug ::ISA = ('XMLPlug');44 @METSPlugin::ISA = ('ReadXMLFile'); 45 45 } 46 46 47 47 my $arguments = [ 48 48 ]; 49 my $options = { 'name' => "METSPlug ",50 'desc' => "{METSPlug .desc}",49 my $options = { 'name' => "METSPlugin", 50 'desc' => "{METSPlugin.desc}", 51 51 'abstract' => "no", 52 52 'inherits' => "yes" }; … … 64 64 push(@$pluginlist, $class); 65 65 66 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 67 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 68 69 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 66 # have no args - do we still want this? 67 #push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 68 push(@{$hashArgOptLists->{"OptList"}},$options); 69 70 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 70 71 71 72 $self->{'section'} = ""; … … 117 118 } 118 119 my $outhandle = $self->{'outhandle'}; 119 print $outhandle "METSPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;120 print STDERR "<Processing n='$self->{'file'}' p='METSPlug '>\n" if ($self->{'gli'});120 print $outhandle "METSPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 121 print STDERR "<Processing n='$self->{'file'}' p='METSPlugin'>\n" if ($self->{'gli'}); 121 122 122 123 } -
gsdl/trunk/perllib/plugins/MP3Plugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MP3Plug .pm -- Plugin for MP3 files (MPEG audio layer 3).3 # MP3Plugin.pm -- Plugin for MP3 files (MPEG audio layer 3). 4 4 # 5 5 # A component of the Greenstone digital library software from the New … … 26 26 27 27 28 package MP3Plug ;29 30 use UnknownPlug;28 package MP3Plugin; 29 30 use BasePlugin; 31 31 32 32 use strict; 33 33 no strict 'refs'; # allow filehandles to be variables and viceversa 34 no strict 'subs'; 34 35 35 36 use MP3::Info; … … 38 39 39 40 sub BEGIN { 40 @MP3Plug ::ISA = ('UnknownPlug');41 @MP3Plugin::ISA = ('BasePlugin'); 41 42 } 42 43 43 44 my $arguments = 44 45 [ { 'name' => "process_exp", 45 'desc' => "{Bas Plug.process_exp}",46 'desc' => "{BasePlugin.process_exp}", 46 47 'type' => "regexp", 47 48 'deft' => &get_default_process_exp(), 48 49 'reqd' => "no" }, 49 50 { 'name' => "assoc_images", 50 'desc' => "{MP3Plug .assoc_images}",51 'desc' => "{MP3Plugin.assoc_images}", 51 52 'type' => "flag", 52 53 'deft' => "", 53 54 'reqd' => "no" }, 54 55 { 'name' => "applet_metadata", 55 'desc' => "{MP3Plug .applet_metadata}",56 'desc' => "{MP3Plugin.applet_metadata}", 56 57 'type' => "flag", 57 58 'deft' => "" }, 58 59 { 'name' => "metadata_fields", 59 'desc' => "{MP3Plug .metadata_fields}",60 'desc' => "{MP3Plugin.metadata_fields}", 60 61 'type' => "string", 61 62 'deft' => "Title,Artist,Genre" } ]; 62 63 63 my $options = { 'name' => "MP3Plug ",64 'desc' => "{MP3Plug .desc}",64 my $options = { 'name' => "MP3Plugin", 65 'desc' => "{MP3Plugin.desc}", 65 66 'abstract' => "no", 66 67 'inherits' => "yes", … … 72 73 push(@$pluginlist, $class); 73 74 74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};76 77 my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);75 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 76 push(@{$hashArgOptLists->{"OptList"}},$options); 77 78 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 78 79 79 80 return bless $self, $class; … … 82 83 sub get_default_process_exp { 83 84 return q^(?i)\.mp3$^; 85 } 86 87 sub process { 88 my $self = shift (@_); 89 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 90 91 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 92 # do something about OIDtype so no hashing 93 94 # old code was in effect the following. 95 if ($doc_obj->{'OIDtype'} =~ /^hash$/) { 96 $doc_obj->set_OIDtype ("incremental"); 97 } 98 99 100 # associate the file with the document 101 if ($self->associate_mp3_file($filename_full_path, $filename_no_path, $doc_obj) != 1) 102 { 103 print "MP3Plugin: couldn't process \"$filename_full_path\"\n"; 104 return 0; 105 } 106 107 #whats this crap? 108 my $text = &gsprintf::lookup_string("{BasePlugin.dummy_text}",1); 109 if ($self->{'assoc_images'}) { 110 $text .= "[img1]<br>"; 111 $text .= "[img2]<br>"; 112 } 113 $doc_obj->add_utf8_text($doc_obj->get_top_section(), $text); 114 84 115 } 85 116 … … 155 186 156 187 $doc_obj->associate_file($filename, $dst_file, $mime_type, $section); 157 $doc_obj->add_metadata ($section, "Source", $file);158 188 $doc_obj->add_metadata ($section, $assoc_field, $assoc_name); 159 189 $doc_obj->add_metadata ($section, "srcurl", $assoc_url); … … 295 325 296 326 297 298 # The MP3Plug read() function is based on UnknownPlug read(). This 299 # function does all the right things to make general options work for 300 # a given plugin. 301 302 my $mp3_doc_count = 0; ## is this used anywhere now !!??? 303 304 sub read { 305 my $self = shift (@_); 306 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 307 308 my $outhandle = $self->{'outhandle'}; 309 310 #check for associate_ext, blocking etc 311 my ($block_status,$filename) = $self->read_block(@_); 312 return $block_status if ((!defined $block_status) || ($block_status==0)); 313 314 print STDERR "<Processing n='$file' p='MP3Plug'>\n" if ($gli); 315 print $outhandle "MP3Plug processing \"$filename\"\n" 316 if $self->{'verbosity'} > 1; 317 318 #if there's a leading directory name, eat it... 319 $file =~ s/^.*[\/\\]//; 320 321 # create a new document 322 my $doc_obj = new doc ($filename, "indexed_doc"); 323 $mp3_doc_count++; 324 325 ## $doc_obj->set_OIDtype ($processor->{'OIDtype'}); 326 if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) { 327 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 328 } 329 else { 330 $doc_obj->set_OIDtype ("incremental"); # this is done to avoid hashing content of file 331 } 332 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}"); 333 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename)); 334 335 # associate the file with the document 336 if (associate_mp3_file($self, $filename, $file, $doc_obj) != 1) 337 { 338 print "MP3Plug: couldn't process \"$filename\"\n"; 339 return 0; 340 } 341 342 #create an empty text string so we don't break downstream plugins 343 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 344 if ($self->{'assoc_images'}) { 345 $text .= "[img1]<br>"; 346 $text .= "[img2]<br>"; 347 } 348 # include any metadata passed in from previous plugins 349 my $section = $doc_obj->get_top_section(); 350 $self->extra_metadata ($doc_obj, $section, $metadata); 351 352 $self->title_fallback($doc_obj,$section,$file); 353 354 # do plugin specific processing of doc_obj 355 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, 356 $file, $metadata, $doc_obj)); 357 358 # do any automatic metadata extraction 359 $self->auto_extract_metadata ($doc_obj); 360 361 # add an OID 362 $doc_obj->set_OID(); 363 $doc_obj->add_utf8_text($section, $text); 364 365 # process the document 366 $processor->process($doc_obj); 367 368 $self->{'num_processed'} ++; 369 return 1; 370 } 371 372 327 # we want to use mp3:Title if its there, otherwise we'll use BasePlugin method 373 328 sub title_fallback 374 329 { … … 382 337 } 383 338 else { 384 &BasPlug::title_fallback($self,$doc_obj, $section, $file);339 $self->BasePlugin::title_fallback($doc_obj, $section, $file); 385 340 } 386 341 } -
gsdl/trunk/perllib/plugins/MediaWikiPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MediaWikiPlug .pm -- html plugin with extra facilities for wiki page3 # MediaWikiPlugin.pm -- html plugin with extra facilities for wiki page 4 4 # 5 5 # A component of the Greenstone digital library software … … 31 31 # collection's Home page. 32 32 33 package MediaWikiPlug ;34 35 use HTMLPlug ;36 # use ImagePlug ;33 package MediaWikiPlugin; 34 35 use HTMLPlugin; 36 # use ImagePlugin; 37 37 # use File::Copy; 38 38 use unicode; … … 43 43 44 44 sub BEGIN { 45 @MediaWikiPlug ::ISA = ('HTMLPlug');45 @MediaWikiPlugin::ISA = ('HTMLPlugin'); 46 46 } 47 47 … … 50 50 # show the table of contents on collection's home page 51 51 { 'name' => "show_toc", 52 'desc' => "{MediaWikiPlug .show_toc}",52 'desc' => "{MediaWikiPlugin.show_toc}", 53 53 'type' => "flag", 54 54 'reqd' => "no"}, 55 55 # set to delete the table of contents section on each MediaWiki page 56 56 { 'name' => "delete_toc", 57 'desc' => "{MediaWikiPlug .delete_toc}",57 'desc' => "{MediaWikiPlugin.delete_toc}", 58 58 'type' => "flag", 59 59 'reqd' => "no"}, 60 60 # regexp to match the table of contents 61 61 { 'name' => "toc_exp", 62 'desc' => "{MediaWikiPlug .toc_exp}",62 'desc' => "{MediaWikiPlugin.toc_exp}", 63 63 'type' => "regexp", 64 64 'reqd' => "no", … … 66 66 # set to delete the navigation section 67 67 { 'name' => "delete_nav", 68 'desc' => "{MediaWikiPlug .delete_nav}",68 'desc' => "{MediaWikiPlugin.delete_nav}", 69 69 'type' => "flag", 70 70 'reqd' => "no", … … 72 72 # regexp to match the navigation section 73 73 { 'name' => "nav_div_exp", 74 'desc' => "{MediaWikiPlug .nav_div_exp}",74 'desc' => "{MediaWikiPlugin.nav_div_exp}", 75 75 'type' => "regexp", 76 76 'reqd' => "no", … … 78 78 # set to delete the searchbox section 79 79 { 'name' => "delete_searchbox", 80 'desc' => "{MediaWikiPlug .delete_searchbox}",80 'desc' => "{MediaWikiPlugin.delete_searchbox}", 81 81 'type' => "flag", 82 82 'reqd' => "no", … … 84 84 # regexp to match the searchbox section 85 85 { 'name' => "searchbox_div_exp", 86 'desc' => "{MediaWikiPlug .searchbox_div_exp}",86 'desc' => "{MediaWikiPlugin.searchbox_div_exp}", 87 87 'type' => "regexp", 88 88 'reqd' => "no", 89 89 'deft' => "<div([^>]*)id=(\\\"|')p-search(\\\"|')(.|\\n)*?<\/div>"}, 90 90 # regexp to match title suffix 91 # can't use the title_sub option in HTMLPlug instead91 # can't use the title_sub option in HTMLPlugin instead 92 92 # because title_sub always matches from the begining 93 93 { 'name' => "remove_title_suffix_exp", 94 'desc' => "{MediaWikiPlug .remove_title_suffix_exp}",94 'desc' => "{MediaWikiPlugin.remove_title_suffix_exp}", 95 95 'type' => "regexp", 96 96 'reqd' => "no", … … 98 98 ]; 99 99 100 my $options = { 'name' => "MediaWikiPlug ",101 'desc' => "{MediaWikiPlug .desc}",100 my $options = { 'name' => "MediaWikiPlugin", 101 'desc' => "{MediaWikiPlugin.desc}", 102 102 'abstract' => "no", 103 103 'inherits' => "yes", … … 109 109 push(@$pluginlist, $class); 110 110 111 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}112 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};113 114 my $self = new HTMLPlug ($pluginlist, $inputargs, $hashArgOptLists);111 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 112 push(@{$hashArgOptLists->{"OptList"}},$options); 113 114 my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists); 115 115 return bless $self, $class; 116 116 } … … 123 123 my $outhandle = $self->{'outhandle'}; 124 124 125 print $outhandle "MediaWikiPlug : processing $file\n" if $self->{'verbosity'} > 1;125 print $outhandle "MediaWikiPlugin: processing $file\n" if $self->{'verbosity'} > 1; 126 126 127 127 my @head_and_body = split(/<body/i,$$textref); … … 205 205 # linux: /research/lh92/greenstone/greenstone2.73/collect/wiki/import 206 206 # $file use different delimiters : forward slash for linux; backward slash for windows 207 # print "\nfile : $file\n\n"; # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlug .html207 # print "\nfile : $file\n\n"; # windows: greenstone.sourceforge.net\wiki\index.php\Access_Processing_using_DBPlugin.html 208 208 # linux: greenstone.sourceforge.net/wiki/index.php/Using_GreenstoneWiki.html 209 209 … … 618 618 $value = $1; 619 619 if (!defined $value || !defined $tag){ 620 #print $outhandle " StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";620 #print $outhandle "MediaWikiPlugin: can't find VALUE in \"$tag\"\n"; 621 621 next; 622 622 } else { -
gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MetadataCSVPlug .pm -- A plugin for metadata in comma-separated value format3 # MetadataCSVPlugin.pm -- A plugin for metadata in comma-separated value format 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 package MetadataCSVPlug ;27 package MetadataCSVPlugin; 28 28 29 29 30 use Bas Plug;30 use BasePlugin; 31 31 use strict; 32 32 33 33 34 34 sub BEGIN { 35 @MetadataCSVPlug ::ISA = ('BasPlug');35 @MetadataCSVPlugin::ISA = ('BasePlugin'); 36 36 } 37 37 … … 39 39 my $arguments = 40 40 [ { 'name' => "block_exp", 41 'desc' => "{Bas Plug.block_exp}",41 'desc' => "{BasePlugin.block_exp}", 42 42 'type' => "regexp", 43 43 'reqd' => "no", … … 45 45 46 46 47 my $options = { 'name' => "MetadataCSVPlug ",48 'desc' => "{MetadataCSVPlug .desc}",47 my $options = { 'name' => "MetadataCSVPlugin", 48 'desc' => "{MetadataCSVPlugin.desc}", 49 49 'abstract' => "no", 50 50 'inherits' => "yes", … … 58 58 push(@$pluginlist, $class); 59 59 60 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}61 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 61 push(@{$hashArgOptLists->{"OptList"}},$options); 62 62 63 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);63 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 64 64 65 65 return bless $self, $class; … … 74 74 75 75 76 # We don't want any other plugins to see .csv files76 # Used by BasePlugin read to block this file 77 77 sub get_default_block_exp 78 78 { … … 91 91 return undef; 92 92 } 93 print STDERR "\n<Processing n='$file' p='MetadataCSVPlug '>\n" if ($gli);94 print STDERR "MetadataCSVPlug : processing $file\n" if ($self->{'verbosity'}) > 1;93 print STDERR "\n<Processing n='$file' p='MetadataCSVPlugin'>\n" if ($gli); 94 print STDERR "MetadataCSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1; 95 95 96 96 # Read the CSV file to get the metadata … … 98 98 open(CSV_FILE, "$filename"); 99 99 my $csv_file_reader = new multiread(); 100 $csv_file_reader->set_handle('MetadataCSVPlug ::CSV_FILE');100 $csv_file_reader->set_handle('MetadataCSVPlugin::CSV_FILE'); 101 101 $csv_file_reader->read_file(\$csv_file_content); 102 102 close(CSV_FILE); … … 118 118 119 119 if (!$found_filename_field) { 120 print STDERR "MetadataCSVPlug Error: No Filename field in CSV file: $filename\n";120 print STDERR "MetadataCSVPlugin Error: No Filename field in CSV file: $filename\n"; 121 121 return -1; # error 122 122 } … … 153 153 # The line must be formatted incorrectly 154 154 else { 155 print STDERR "MetadataCSVPlug Error: Badly formatted CSV line: $csv_line.\n";155 print STDERR "MetadataCSVPlugin Error: Badly formatted CSV line: $csv_line.\n"; 156 156 last; 157 157 } … … 163 163 my $csv_line_filename_array = $csv_line_metadata{"Filename"}; 164 164 if (!defined $csv_line_filename_array) { 165 print STDERR "MetadataCSVPlug Error: No Filename metadata in CSV line: $orig_csv_line\n";165 print STDERR "MetadataCSVPlugin Error: No Filename metadata in CSV line: $orig_csv_line\n"; 166 166 next; 167 167 } -
gsdl/trunk/perllib/plugins/MetadataPass.pm
r12970 r15872 29 29 no strict 'refs'; # allow filehandles to be variables and viceversa 30 30 31 use BasPlug; # uses BasPlug, but is not inherited31 use PrintInfo; # uses PrintInfo, but is not inherited 32 32 33 33 … … 55 55 sub print_xml_usage 56 56 { 57 BasPlug::print_xml_usage(@_);57 PrintInfo::print_xml_usage(@_); 58 58 } 59 59 60 60 sub print_xml 61 61 { 62 BasPlug::print_xml(@_);62 PrintInfo::print_xml(@_); 63 63 } 64 64 65 65 sub set_incremental 66 66 { 67 BasPlug::set_incremental(@_);67 PrintInfo::set_incremental(@_); 68 68 } 69 69 -
gsdl/trunk/perllib/plugins/MetadataXMLPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # MetadataXMLPlug .pm --3 # MetadataXMLPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 24 24 ########################################################################### 25 25 26 # MetadataXMLPlug process metadata.xml files in a collection26 # MetadataXMLPlugin process metadata.xml files in a collection 27 27 28 28 # Here's an example of a metadata file that uses three FileSet structures … … 85 85 # metadata is explictly overridden later in the import. 86 86 87 package MetadataXMLPlug ;87 package MetadataXMLPlugin; 88 88 89 89 use strict; 90 90 no strict 'refs'; 91 use Bas Plug;91 use BasePlugin; 92 92 use util; 93 93 use metadatautil; 94 94 95 95 sub BEGIN { 96 @MetadataXMLPlug ::ISA = ('BasPlug');96 @MetadataXMLPlugin::ISA = ('BasePlugin'); 97 97 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 98 98 } … … 102 102 my $arguments = [ 103 103 { 'name' => "block_exp", 104 'desc' => "{Bas Plug.block_exp}",104 'desc' => "{BasePlugin.block_exp}", 105 105 'type' => "regexp", 106 106 'reqd' => "no", … … 108 108 ]; 109 109 110 my $options = { 'name' => "MetadataXMLPlug ",111 'desc' => "{MetadataXMLPlug .desc}",110 my $options = { 'name' => "MetadataXMLPlugin", 111 'desc' => "{MetadataXMLPlugin.desc}", 112 112 'abstract' => "no", 113 113 'inherits' => "yes", … … 121 121 push(@$pluginlist, $class); 122 122 123 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}124 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};125 126 $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);123 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 124 push(@{$hashArgOptLists->{"OptList"}},$options); 125 126 $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 127 127 128 128 if ($self->{'info_only'}) { … … 180 180 } 181 181 182 print STDERR "\n<Processing n='$file' p='MetadataXMLPlug '>\n" if ($gli);183 print STDERR "MetadataXMLPlug : processing $file\n" if ($self->{'verbosity'})> 1;182 print STDERR "\n<Processing n='$file' p='MetadataXMLPlugin'>\n" if ($gli); 183 print STDERR "MetadataXMLPlugin: processing $file\n" if ($self->{'verbosity'})> 1; 184 184 185 185 $self->{'metadataref'} = $extrametadata; -
gsdl/trunk/perllib/plugins/NulPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # N ULPlug.pm -- Plugin for dummy (.nul) files3 # NulPlugin.pm -- Plugin for dummy (.nul) files 4 4 # 5 5 # A component of the Greenstone digital library software from the New … … 26 26 ########################################################################### 27 27 28 # N ULPlug- a plugin for dummy files28 # NulPlugin - a plugin for dummy files 29 29 30 30 # This is a simple Plugin for importing dummy files, along with … … 35 35 # databases 36 36 37 package N ULPlug;37 package NulPlugin; 38 38 39 use Bas Plug;39 use BasePlugin; 40 40 41 41 use strict; … … 43 43 44 44 sub BEGIN { 45 @N ULPlug::ISA = ('BasPlug');45 @NulPlugin::ISA = ('BasePlugin'); 46 46 } 47 47 48 48 my $arguments = 49 49 [ { 'name' => "process_exp", 50 'desc' => "{Bas Plug.process_exp}",50 'desc' => "{BasePlugin.process_exp}", 51 51 'type' => "regexp", 52 52 'reqd' => "no", 53 53 'deft' => &get_default_process_exp() }, 54 54 { 'name' => "assoc_field", 55 'desc' => "{N ULPlug.assoc_field}",55 'desc' => "{NulPlugin.assoc_field}", 56 56 'type' => "string", 57 'deft' => " ",57 'deft' => "null_file", 58 58 'reqd' => "no" }, 59 59 { 'name' => "add_metadata_as_text", 60 'desc' => "{N ULPlug.add_metadata_as_text}",60 'desc' => "{NulPlugin.add_metadata_as_text}", 61 61 'type' => "flag" }, 62 62 { 'name' => "remove_namespace_for_text", 63 'desc' => "{N ULPlug.remove_namespace_for_text}",63 'desc' => "{NulPlugin.remove_namespace_for_text}", 64 64 'type' => "flag" } 65 65 ]; 66 66 67 my $options = { 'name' => "N ULPlug",68 'desc' => "{N ULPlug.desc}",67 my $options = { 'name' => "NulPlugin", 68 'desc' => "{NulPlugin.desc}", 69 69 'abstract' => "no", 70 70 'inherits' => "yes", … … 77 77 push(@$pluginlist, $class); 78 78 79 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}80 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 80 push(@{$hashArgOptLists->{"OptList"}},$options); 81 81 82 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);82 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 83 83 84 84 return bless $self, $class; … … 89 89 } 90 90 91 # The NULPlug read() function. This function does all the right92 # things to make general options work for a given plugin. NULPlug 93 # overrides read() because there is no need to read the actual text of 94 # the file in, because the contents of the file is not text... 95 # 96 # 97 # Return number of files processed, undef if can't process98 # 99 # Note that $base_dir might be "" and that $file might include directories 91 # NulPlugin specific processing of doc_obj. 92 sub process { 93 my $self = shift (@_); 94 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 95 96 my $topsection = $doc_obj->get_top_section(); 97 98 my $assoc_field = $self->{'assoc_field'}; # || "null_file"; TODO, check this 99 $doc_obj->add_metadata ($topsection, $assoc_field, $file); 100 100 101 sub read {102 my $self = shift (@_);103 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;104 105 my $outhandle = $self->{'outhandle'};106 107 #check for associate_ext, blocking etc108 my ($block_status,$filename) = $self->read_block(@_);109 return $block_status if ((!defined $block_status) || ($block_status==0));110 111 print STDERR "<Processing n='$file' p='NULPlug'>\n" if ($gli);112 print $outhandle "NULPlug processing \"$filename\"\n"113 if $self->{'verbosity'} > 1;114 115 #if there's a leading directory name, eat it...116 $file =~ s/^.*[\/\\]//;117 118 # create a new document119 my $doc_obj = new doc ($filename, "indexed_doc");120 my $top_section = $doc_obj->get_top_section();121 122 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});123 #$doc_obj->set_OIDtype ("incremental");124 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");125 $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins126 127 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));128 129 # the metadata NoText is used to suppress the dummy text 'This document has no text.'130 $doc_obj->add_metadata ($top_section, "NoText", "1");131 132 my $assoc_field = $self->{'assoc_field'} || "null_file";133 $doc_obj->add_metadata ($top_section, $assoc_field, $file);134 135 if ($self->{'cover_image'}) {136 $self->associate_cover_image($doc_obj, $filename);137 }138 139 # include any metadata passed in from previous plugins140 my $section = $doc_obj->get_top_section();141 $self->extra_metadata ($doc_obj, $section, $metadata);142 143 101 # format the metadata passed in (presumably from metadata.xml) 144 102 my $text = ""; 145 103 if ($self->{'add_metadata_as_text'}) { 146 104 $text = &metadatautil::format_metadata_as_table($metadata, $self->{'remove_namespace_for_text'}); 105 $doc_obj->add_utf8_text($topsection, $text); 147 106 } else { 148 #create an empty text string so we don't break downstream plugins 149 $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 107 $self->add_dummy_text($doc_obj, $topsection); 150 108 } 151 $self->title_fallback($doc_obj,$section,$file);152 153 # do plugin specific processing of doc_obj154 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {155 print STDERR "<ProcessingError n='$file'>\n" if ($gli);156 return -1;157 }158 159 # do any automatic metadata extraction160 $self->auto_extract_metadata ($doc_obj);161 162 # add an OID163 $doc_obj->set_OID();164 $doc_obj->add_utf8_text($section, $text);165 166 # process the document167 $processor->process($doc_obj);168 169 $self->{'num_processed'} ++;170 return 1;171 }172 173 174 # NULPlug processing of doc_obj. In practice we don't need to do175 # anything here because the read function takes care of everything.176 177 sub process {178 my $self = shift (@_);179 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;180 my $outhandle = $self->{'outhandle'};181 109 182 110 return 1; -
gsdl/trunk/perllib/plugins/OAIPlugin.pm
r15865 r15872 25 25 ########################################################################### 26 26 27 package OAIPlug; 28 29 use BasPlug; 27 package OAIPlugin; 28 30 29 use unicode; 31 30 use util; … … 34 33 no strict 'refs'; # allow filehandles to be variables and viceversa 35 34 36 use XMLPlug;35 use ReadXMLFile; 37 36 38 37 sub BEGIN { 39 @OAIPlug ::ISA = ('XMLPlug');38 @OAIPlugin::ISA = ('ReadXMLFile'); 40 39 } 41 40 … … 49 48 ]; 50 49 51 my $options = { 'name' => "OAIPlug ",52 'desc' => "{OAIPlug .desc}",50 my $options = { 'name' => "OAIPlugin", 51 'desc' => "{OAIPlugin.desc}", 53 52 'abstract' => "no", 54 53 'inherits' => "yes", … … 61 60 push(@$pluginlist, $class); 62 61 63 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}64 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};65 66 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);62 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 63 push(@{$hashArgOptLists->{"OptList"}},$options); 64 65 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 67 66 68 67 return bless $self, $class; … … 98 97 99 98 my $outhandle = $self->{'outhandle'}; 100 print $outhandle "OAIPlug : processing $self->{'file'}\n" if $self->{'verbosity'} > 1;101 print STDERR "<Processing n='$self->{'file'}' p='OAIPlug '>\n" if $self->{'gli'};99 print $outhandle "OAIPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1; 100 print STDERR "<Processing n='$self->{'file'}' p='OAIPlugin'>\n" if $self->{'gli'}; 102 101 103 102 } … … 199 198 if ($srcdoc_exists) 200 199 { 201 print $outhandle "OAIPlug : passing metadata on to $url_array->[0]\n"200 print $outhandle "OAIPlugin: passing metadata on to $url_array->[0]\n" 202 201 if ($self->{'verbosity'}>1); 203 202 … … 265 264 my $outhandle = $self->{'outhandle'}; 266 265 267 print STDERR "<Processing n='$file' p='OAIPlug '>\n" if ($gli);268 print $outhandle "OAIPlug : processing $file\n"266 print STDERR "<Processing n='$file' p='OAIPlugin'>\n" if ($gli); 267 print $outhandle "OAIPlugin: processing $file\n" 269 268 if $self->{'verbosity'} > 1; 270 269 … … 411 410 412 411 if ($top_level_prefix !~ /dc$/) { 413 print $outhandle "Warning: OAIPlug currently only designed for Dublin Core (or variant) metadata\n";412 print $outhandle "Warning: OAIPlugin currently only designed for Dublin Core (or variant) metadata\n"; 414 413 print $outhandle " This recorded metadata section '$top_level_prefix' does not appear to match.\n"; 415 414 print $outhandle " Metadata assumed to be in form: <prefix:tag>value</prefix:tag> and will be converted\n"; -
gsdl/trunk/perllib/plugins/OggVorbisPlugin.pm
r15865 r15872 27 27 ########################################################################### 28 28 29 package OggVorbisPlug ;29 package OggVorbisPlugin; 30 30 31 31 32 use UnknownPlug;32 use BasePlugin; 33 33 use Ogg::Vorbis::Header::PurePerl; 34 34 35 35 use strict; 36 36 no strict 'refs'; # allow filehandles to be variables and viceversa 37 no strict 'subs'; 37 38 38 39 sub BEGIN { 39 @OggVorbisPlug ::ISA = ('UnknownPlug');40 @OggVorbisPlugin::ISA = ('BasePlugin'); 40 41 } 41 42 … … 43 44 my $arguments = 44 45 [ { 'name' => "process_exp", 45 'desc' => "{Bas Plug.process_exp}",46 'desc' => "{BasePlugin.process_exp}", 46 47 'type' => "string", 47 48 'deft' => &get_default_process_exp(), 48 49 'reqd' => "no" }, 49 50 { 'name' => "add_technical_metadata", 50 'desc' => "{OggVorbisPlug .add_technical_metadata}",51 'desc' => "{OggVorbisPlugin.add_technical_metadata}", 51 52 'type' => "flag", 52 53 'deft' => "" } ]; 53 54 54 my $options = { 'name' => "OggVorbisPlug ",55 'desc' => "{OggVorbisPlug .desc}",55 my $options = { 'name' => "OggVorbisPlugin", 56 'desc' => "{OggVorbisPlugin.desc}", 56 57 'inherits' => "yes", 57 58 'abstract' => "no", … … 72 73 push(@$pluginlist, $class); 73 74 74 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}75 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};75 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 76 push(@{$hashArgOptLists->{"OptList"}},$options); 76 77 77 my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);78 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 78 79 79 80 return bless $self, $class; 80 81 } 81 82 82 83 sub read 83 sub process 84 84 { 85 85 my $self = shift (@_); 86 my ($pluginfo, $base_dir, $file, $metadata, $ processor, $maxdocs, $total_count, $gli) = @_;86 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 87 87 88 my $outhandle = $self->{'outhandle'}; 88 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 89 # do something about OIDtype so no hashing 90 91 # old code was in effect the following. 92 if ($doc_obj->{'OIDtype'} =~ /^hash$/) { 93 $doc_obj->set_OIDtype ("incremental"); 94 } 89 95 90 #check process and block exps, smart block, etc 91 my ($block_status,$filename) = $self->read_block(@_); 92 return $block_status if ((!defined $block_status) || ($block_status==0)); 93 94 # Report that we're processing the file 95 print STDERR "<Processing n='$file' p='OggVorbisPlug'>\n" if ($gli); 96 print $outhandle "OggVorbisPlug: processing $file\n" 97 if ($self->{'verbosity'}) > 1; 98 99 # file is just the name of the file (need to get rid off any leading directory names) 100 $file =~ s/^.*[\/\\]//; 101 102 # create a new index document 103 my $doc_obj = new doc ($filename, "indexed_doc"); 104 if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) { 105 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 106 } 107 else { 108 $doc_obj->set_OIDtype ("incremental"); # this is done to avoid hashing content of file 109 } 110 my $section = $doc_obj->get_top_section(); 111 112 # replace spaces in filename with %20 in url for metadata entry 113 my $url = $file; 114 ##$url =~ s/ /%20/g; 115 116 # Source (filename) to be consistent with other plugins 117 $doc_obj->add_metadata ($section, "Source", $url); 118 96 my $top_section = $doc_obj->get_top_section(); 119 97 # Extract metadata 120 my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename );98 my $ogg = Ogg::Vorbis::Header::PurePerl->new($filename_full_path); 121 99 122 100 # Comments added to the file … … 128 106 { 129 107 if (defined $value && $value ne "") { 130 $doc_obj->add_metadata($ section, $keytc, $value);108 $doc_obj->add_metadata($top_section, $keytc, $value); 131 109 } 132 110 } … … 141 119 my $value = $ogg->info->{$key}; 142 120 if (defined $value && $value ne "") { 143 $doc_obj->add_metadata($ section, $keytc, $value);121 $doc_obj->add_metadata($top_section, $keytc, $value); 144 122 } 145 123 } 146 124 } 147 125 148 # srclink 149 $doc_obj->add_metadata ($section, "FileFormat", "OggVorbis"); 150 $doc_obj->add_metadata ($section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">"); 151 $doc_obj->add_metadata ($section, "/srclink", "</a>"); 126 $doc_obj->add_metadata ($top_section, "FileFormat", "OggVorbis"); 127 $doc_obj->add_metadata ($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">"); 128 $doc_obj->add_metadata ($top_section, "/srclink", "</a>"); 152 129 # srcicon (need to include "iogg.gif" in the greenstone images directory 153 $doc_obj->add_metadata ($ section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>");130 $doc_obj->add_metadata ($top_section, "srcicon", "<img src=\"_httpprefix_/images/iogg.gif\" title=\"Download\" border=0>"); 154 131 155 132 # add NoText metadata which can be used to suppress the dummy text 156 $doc_obj->add_metadata ($ section, "NoText", "1");133 $doc_obj->add_metadata ($top_section, "NoText", "1"); 157 134 158 135 # Add the actual file as an associated file 159 $doc_obj->associate_file($filename , $file, "VORBIS", $section);136 $doc_obj->associate_file($filename_full_path, $filename_no_path, "VORBIS", $top_section); 160 137 161 # Create an empty text string so we don't break downstream plugins162 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);163 164 # include any metadata passed in from previous plugins165 $self->extra_metadata ($doc_obj, $section, $metadata);166 167 # do plugin specific processing of doc_obj168 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));169 170 # do any automatic metadata extraction171 $self->auto_extract_metadata($doc_obj);172 173 # add an OID174 $doc_obj->set_OID();175 $doc_obj->add_utf8_text($section, $text);176 177 # process the document178 $processor->process($doc_obj);179 180 $self->{'num_processed'}++;181 return 1;182 138 } 183 139 -
gsdl/trunk/perllib/plugins/OpenDocumentPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # OpenDocumentPlug .pm -- The Open Document plugin3 # OpenDocumentPlugin.pm -- The Open Document plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 33 33 #This basically extracts any text out of the document, but not much else. 34 34 35 package OpenDocumentPlug; 35 # this inherits ReadXMLFile, and therefore offers -xslt option, but does 36 # nothing with it. 37 38 package OpenDocumentPlugin; 36 39 37 40 use strict; 38 41 no strict 'refs'; # allow filehandles to be variables and viceversa 39 42 40 use XMLPlug;43 use ReadXMLFile; 41 44 use XML::XPath; 42 45 use XML::XPath::XMLParser; … … 46 49 47 50 sub BEGIN { 48 @OpenDocumentPlug::ISA = ('XMLPlug'); 49 } 50 51 52 #our @filesAssoc = (); 51 @OpenDocumentPlugin::ISA = ('ReadXMLFile'); 52 } 53 53 54 our @filesProcess = ( "content.xml" , "meta.xml" ); 54 #XML plug has this so we need it too55 our ($self);56 55 57 56 my $arguments = [ … … 62 61 ]; 63 62 64 my $options = { 'name' => "OpenDocumentPlug ",65 'desc' => "{OpenDocumentPlug .desc}",63 my $options = { 'name' => "OpenDocumentPlugin", 64 'desc' => "{OpenDocumentPlugin.desc}", 66 65 'abstract' => "no", 67 66 'inherits' => "yes", … … 75 74 push(@$pluginlist, $class); 76 75 77 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}78 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};79 80 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);76 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 77 push(@{$hashArgOptLists->{"OptList"}},$options); 78 79 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 81 80 82 81 $self->{'section'} = ""; … … 160 159 161 160 sub read { 162 # $self must be global to work with XML callback routines. 163 $self = shift (@_); 161 my $self = shift (@_); 164 162 165 163 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; … … 188 186 189 187 $self->unzip ("\"$file_only\""); 190 foreach my $xmlFile (@OpenDocumentPlug ::filesProcess) {188 foreach my $xmlFile (@OpenDocumentPlugin::filesProcess) { 191 189 if (-e $xmlFile) { 192 $self-> parse_file($xmlFile);190 $self->{'parser'}->parsefile($xmlFile); 193 191 } 194 192 } … … 203 201 204 202 # parsefile may either croak somewhere in XML::Parser (e.g. because 205 # the document is not well formed) or die somewhere in XMLPlugor a203 # the document is not well formed) or die somewhere in ReadXMLFile or a 206 204 # derived plugin (e.g. because we're attempting to process a 207 205 # document whose DOCTYPE is not meant for this plugin). For the … … 255 253 $doc_obj->add_utf8_metadata ("", "srcicon", "<img border=\"0\" align=\"absmiddle\" src=\"_httpprefix_/collect/[collection]/index/assoc/[archivedir]/thumbnail.png\" alt=\"View the Open document\" title=\"View the Open document\">"); 256 254 $doc_obj->add_utf8_metadata ("", "/srclink", "</a>"); 257 $ doc_obj->add_utf8_metadata ("", "Source", &ghtml::dmsafe($file_only));258 $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename));255 $self->set_Source_metadata($doc_obj, $file_only); 256 $doc_obj->set_utf8_metadata_element("", "FileSize", (-s $filename)); 259 257 260 258 # include any metadata passed in from previous plugins … … 268 266 269 267 # add an OID 270 $ doc_obj->set_OID();268 $self->add_OID($doc_obj); 271 269 272 270 $doc_obj->add_utf8_metadata("", "Plugin", "$self->{'plugin_type'}"); -
gsdl/trunk/perllib/plugins/PDFPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # PDFPlug .pm -- reasonably with-it pdf plugin3 # PDFPlugin.pm -- reasonably with-it pdf plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 23 23 # 24 24 ########################################################################### 25 package PDFPlug; 26 27 use ConvertToPlug; 25 package PDFPlugin; 26 27 use ConvertBinaryFile; 28 use ReadTextFile; 28 29 use unicode; 29 30 use strict; … … 31 32 32 33 sub BEGIN { 33 @PDFPlug ::ISA = ('ConvertToPlug');34 @PDFPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 34 35 } 35 36 36 37 my $convert_to_list = 37 38 [ { 'name' => "auto", 38 'desc' => "{Convert ToPlug.convert_to.auto}" },39 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 39 40 { 'name' => "html", 40 'desc' => "{Convert ToPlug.convert_to.html}" },41 'desc' => "{ConvertBinaryFile.convert_to.html}" }, 41 42 { 'name' => "text", 42 'desc' => "{Convert ToPlug.convert_to.text}" },43 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 43 44 { 'name' => "pagedimg_jpg", 44 'desc' => "{Convert ToPlug.convert_to.pagedimg_jpg}"},45 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}"}, 45 46 { 'name' => "pagedimg_gif", 46 'desc' => "{Convert ToPlug.convert_to.pagedimg_gif}"},47 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}"}, 47 48 { 'name' => "pagedimg_png", 48 'desc' => "{Convert ToPlug.convert_to.pagedimg_png}"},49 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}"}, 49 50 ]; 50 51 … … 53 54 [ 54 55 { 'name' => "convert_to", 55 'desc' => "{Convert ToPlug.convert_to}",56 'desc' => "{ConvertBinaryFile.convert_to}", 56 57 'type' => "enum", 57 58 'reqd' => "yes", … … 59 60 'deft' => "html" }, 60 61 { 'name' => "process_exp", 61 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 62 63 'type' => "regexp", 63 64 'deft' => &get_default_process_exp(), 64 65 'reqd' => "no" }, 65 66 { 'name' => "block_exp", 66 'desc' => "{Bas Plug.block_exp}",67 'desc' => "{BasePlugin.block_exp}", 67 68 'type' => "regexp", 68 69 'deft' => &get_default_block_exp() }, 69 70 { 'name' => "metadata_fields", 70 'desc' => "{HTMLPlug .metadata_fields}",71 'desc' => "{HTMLPlugin.metadata_fields}", 71 72 'type' => "string", 72 73 'deft' => "" }, 73 74 { 'name' => "noimages", 74 'desc' => "{PDFPlug .noimages}",75 'desc' => "{PDFPlugin.noimages}", 75 76 'type' => "flag" }, 76 77 { 'name' => "allowimagesonly", 77 'desc' => "{PDFPlug .allowimagesonly}",78 'desc' => "{PDFPlugin.allowimagesonly}", 78 79 'type' => "flag" }, 79 80 { 'name' => "complex", 80 'desc' => "{PDFPlug .complex}",81 'desc' => "{PDFPlugin.complex}", 81 82 'type' => "flag" }, 82 83 { 'name' => "nohidden", 83 'desc' => "{PDFPlug .nohidden}",84 'desc' => "{PDFPlugin.nohidden}", 84 85 'type' => "flag" }, 85 86 { 'name' => "zoom", 86 'desc' => "{PDFPlug .zoom}",87 'desc' => "{PDFPlugin.zoom}", 87 88 'deft' => "2", 88 89 'range' => "1,3", # actually the range is 0.5-3 89 90 'type' => "int" }, 90 91 { 'name' => "use_sections", 91 'desc' => "{PDFPlug .use_sections}",92 'desc' => "{PDFPlugin.use_sections}", 92 93 'type' => "flag" }, 93 94 { 'name' => "description_tags", 94 'desc' => "{HTMLPlug .description_tags}",95 'desc' => "{HTMLPlugin.description_tags}", 95 96 'type' => "flag" } 96 97 ]; 97 98 98 my $options = { 'name' => "PDFPlug ",99 'desc' => "{PDFPlug .desc}",99 my $options = { 'name' => "PDFPlugin", 100 'desc' => "{PDFPlugin.desc}", 100 101 'abstract' => "no", 101 102 'inherits' => "yes", … … 111 112 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 112 113 113 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}114 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};114 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 115 push(@{$hashArgOptLists->{"OptList"}},$options); 115 116 116 117 my @arg_array = @$inputargs; 117 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);118 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 118 119 119 120 if ($self->{'info_only'}) { … … 122 123 } 123 124 124 # these are passed through to gsConvert.pl by ConvertToPlug.pm 125 $self->{'filename_extension'} = "pdf"; 126 $self->{'file_type'} = "PDF"; 127 128 # these are passed through to gsConvert.pl by ConvertBinaryFile.pm 125 129 my $zoom = $self->{"zoom"}; 126 130 $self->{'convert_options'} = "-pdf_zoom $zoom"; … … 132 136 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 133 137 134 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {135 $secondary_plugin_options->{'HTMLPlug '} = [];136 } 137 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {138 $secondary_plugin_options->{'T EXTPlug'} = [];138 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 139 $secondary_plugin_options->{'HTMLPlugin'} = []; 140 } 141 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 142 $secondary_plugin_options->{'TextPlugin'} = []; 139 143 } 140 144 if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 141 if (!defined $secondary_plugin_options->{'PagedIm gPlug'}){142 $secondary_plugin_options->{'PagedIm gPlug'} = [];143 my $pagedimg_options = $secondary_plugin_options->{'PagedIm gPlug'};145 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 146 $secondary_plugin_options->{'PagedImagePlugin'} = []; 147 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 144 148 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 145 149 } 146 150 } 147 my $html_options = $secondary_plugin_options->{'HTMLPlug '};148 my $text_options = $secondary_plugin_options->{'T EXTPlug'};149 my $pagedimg_options = $secondary_plugin_options->{'PagedIm gPlug'};151 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 152 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 153 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 150 154 151 155 if ($self->{'input_encoding'} eq "auto") { … … 227 231 && $self->{'converted_to'} eq "HTML") { 228 232 229 print $outhandle "PDFPlug : Calculating sections...\n";233 print $outhandle "PDFPlugin: Calculating sections...\n"; 230 234 231 235 # we have "<a name=1></a>" etc for each page … … 236 240 237 241 if (scalar (@sections) == 1) { #only one section - no split! 238 print $outhandle "PDFPlug : warning - no sections found\n";242 print $outhandle "PDFPlugin: warning - no sections found\n"; 239 243 } else { 240 244 $top_section .= shift @sections; # keep HTML header etc as top_section … … 274 278 $title = " "; # get rid of the undefined warning in next line 275 279 } 276 my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n";280 my $newsection = "<!-- from PDFPlugin -->\n<!-- <Section>\n"; 277 281 $newsection .= "<Metadata name=\"Title\">" . $title 278 282 . "</Metadata>\n--><p>\n"; … … 296 300 sub process { 297 301 my $self = shift (@_); 298 my ($ textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;302 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 299 303 300 304 my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj); -
gsdl/trunk/perllib/plugins/PPTPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # PPTPlug .pm -- plugin for importing Microsoft PowerPoint files.3 # PPTPlugin.pm -- plugin for importing Microsoft PowerPoint files. 4 4 # (currently only versions 95 and 97) 5 5 # … … 26 26 ########################################################################### 27 27 28 package PPTPlug ;28 package PPTPlugin; 29 29 30 use ConvertToPlug; 30 use ConvertBinaryFile; 31 use ReadTextFile; # for read_file in convert_post_process. do we need it? 32 31 33 use strict; 32 34 no strict 'refs'; # allow filehandles to be variables and viceversa 33 35 34 36 sub BEGIN { 35 @PPTPlug ::ISA = ('ConvertToPlug');37 @PPTPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 36 38 } 37 39 38 40 my $convert_to_list = 39 41 [ { 'name' => "auto", 40 'desc' => "{Convert ToPlug.convert_to.auto}" },42 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 41 43 { 'name' => "html", 42 'desc' => "{Convert ToPlug.convert_to.html}" },44 'desc' => "{ConvertBinaryFile.convert_to.html}" }, 43 45 { 'name' => "text", 44 'desc' => "{Convert ToPlug.convert_to.text}" },46 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 45 47 { 'name' => "pagedimg_jpg", 46 'desc' => "{Convert ToPlug.convert_to.pagedimg_jpg}" },48 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" }, 47 49 { 'name' => "pagedimg_gif", 48 'desc' => "{Convert ToPlug.convert_to.pagedimg_gif}" },50 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" }, 49 51 { 'name' => "pagedimg_png", 50 'desc' => "{Convert ToPlug.convert_to.pagedimg_png}" }52 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" } 51 53 ]; 52 54 53 55 my $arguments = 54 56 [ { 'name' => "process_exp", 55 'desc' => "{Bas Plug.process_exp}",57 'desc' => "{BasePlugin.process_exp}", 56 58 'type' => "regexp", 57 59 'reqd' => "no", … … 59 61 ]; 60 62 61 my $options = { 'name' => "PPTPlug ",62 'desc' => "{PPTPlug .desc}",63 my $options = { 'name' => "PPTPlugin", 64 'desc' => "{PPTPlugin.desc}", 63 65 'abstract' => "no", 64 66 'inherits' => "yes", … … 73 75 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 74 76 my $ws_arg =[{ 'name' => "convert_to", 75 'desc' => "{Convert ToPlug.convert_to}",77 'desc' => "{ConvertBinaryFile.convert_to}", 76 78 'type' => "enum", 77 79 'reqd' => "yes", … … 79 81 'deft' => "html" }, 80 82 { 'name' => "windows_scripting", 81 'desc' => "{PPTPlug .windows_scripting}",83 'desc' => "{PPTPlugin.windows_scripting}", 82 84 'type' => "flag", 83 85 'reqd' => "no" } … … 86 88 } 87 89 88 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}89 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};90 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 91 push(@{$hashArgOptLists->{"OptList"}},$options); 90 92 91 93 92 my @arg_array = @$inputargs; 93 my $self = new ConvertToPlug($pluginlist, $inputargs, $hashArgOptLists); 94 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 94 95 95 96 if ($self->{'info_only'}) { … … 98 99 } 99 100 101 $self->{'filename_extension'} = "ppt"; 102 $self->{'file_type'} = "PPT"; 103 100 104 # ppthtml outputs utf-8 already. 101 #these are passed through to gsConvert.pl by Convert ToPlug.pm105 #these are passed through to gsConvert.pl by ConvertBinaryFile.pm 102 106 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 103 107 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 104 108 105 109 if ($self->{'windows_scripting'} && ($self->{'convert_to'} eq "PagedImg")) { 106 $secondary_plugin_options->{'PagedIm gPlug'} = [];110 $secondary_plugin_options->{'PagedImagePlugin'} = []; 107 111 } else { 108 $secondary_plugin_options->{'HTMLPlug '} = [];112 $secondary_plugin_options->{'HTMLPlugin'} = []; 109 113 } 110 my $html_options = $secondary_plugin_options->{'HTMLPlug '};111 my $pageimg_options = $secondary_plugin_options->{'PagedIm gPlug'};114 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 115 my $pageimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 112 116 113 117 if ($self->{'input_encoding'} eq "auto") { 114 118 $self->{'input_encoding'} = "utf8"; 115 if (defined $secondary_plugin_options->{'HTMLPlug '}){119 if (defined $secondary_plugin_options->{'HTMLPlugin'}){ 116 120 push(@$html_options,"-input_encoding", "utf8"); 117 121 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 118 122 119 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)123 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 120 124 # to extract these metadata fields from the HEAD META fields 121 125 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); 122 126 } 123 if (defined $secondary_plugin_options->{'PagedIm gPlug'}){127 if (defined $secondary_plugin_options->{'PagedImagePlugin'}){ 124 128 push(@$pageimg_options,"-input_encoding", "utf8"); 125 129 push(@$pageimg_options,"-extract_language") if $self->{'extract_language'}; … … 138 142 } 139 143 140 sub get_file_type { 141 my $self = shift (@_); 142 my $file_type = "PPT"; 143 return $file_type; 144 } 145 144 # do we need this? above states that ppthtml produces utf8 text... 146 145 sub convert_post_process 147 146 { … … 161 160 } 162 161 163 sub process {164 my $self = shift (@_);165 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;166 167 return $self->process_type("ppt",$base_dir,$file,$doc_obj);168 }169 162 170 163 1; -
gsdl/trunk/perllib/plugins/PSPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # PSPlug .pm -- this might look VERY similar to the PDF plugin...3 # PSPlugin.pm -- this might look VERY similar to the PDF plugin... 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 26 26 # 12/05/02 Added usage datastructure - John Thompson 27 27 28 package PSPlug; 29 30 use ConvertToPlug; 28 package PSPlugin; 29 30 use ConvertBinaryFile; 31 use ReadTextFile; # for read_file in convert_post_process. do we need it? 31 32 use sorttools; 32 33 … … 35 36 36 37 sub BEGIN { 37 @PSPlug ::ISA = ('ConvertToPlug');38 @PSPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 38 39 } 39 40 40 41 my $convert_to_list = 41 42 [ { 'name' => "auto", 42 'desc' => "{Convert ToPlug.convert_to.auto}" },43 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 43 44 { 'name' => "text", 44 'desc' => "{Convert ToPlug.convert_to.text}" },45 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 45 46 { 'name' => "pagedimg_jpg", 46 'desc' => "{Convert ToPlug.convert_to.pagedimg_jpg}" },47 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}" }, 47 48 { 'name' => "pagedimg_gif", 48 'desc' => "{Convert ToPlug.convert_to.pagedimg_gif}" },49 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}" }, 49 50 { 'name' => "pagedimg_png", 50 'desc' => "{Convert ToPlug.convert_to.pagedimg_png}" }51 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}" } 51 52 ]; 52 53 53 54 my $arguments = 54 55 [ { 'name' => "convert_to", 55 'desc' => "{Convert ToPlug.convert_to}",56 'desc' => "{ConvertBinaryFile.convert_to}", 56 57 'type' => "enum", 57 58 'reqd' => "yes", … … 59 60 'deft' => "text" }, 60 61 { 'name' => "process_exp", 61 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 62 63 'type' => "regexp", 63 64 'deft' => &get_default_process_exp(), 64 65 'reqd' => "no" }, 65 66 { 'name' => "block_exp", 66 'desc' => "{Bas Plug.block_exp}",67 'desc' => "{BasePlugin.block_exp}", 67 68 'type' => 'regexp', 68 69 'deft' => &get_default_block_exp() }, 69 70 { 'name' => "extract_date", 70 'desc' => "{PSPlug .extract_date}",71 'desc' => "{PSPlugin.extract_date}", 71 72 'type' => "flag" }, 72 73 { 'name' => "extract_pages", 73 'desc' => "{PSPlug .extract_pages}",74 'desc' => "{PSPlugin.extract_pages}", 74 75 'type' => "flag" }, 75 76 { 'name' => "extract_title", 76 'desc' => "{PSPlug .extract_title}",77 'desc' => "{PSPlugin.extract_title}", 77 78 'type' => "flag" } ]; 78 79 79 my $options = { 'name' => "PSPlug ",80 'desc' => "{PSPlug .desc}",80 my $options = { 'name' => "PSPlugin", 81 'desc' => "{PSPlugin.desc}", 81 82 'abstract' => "no", 82 83 'inherits' => "yes", … … 94 95 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 95 96 96 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}97 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};98 99 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);97 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 98 push(@{$hashArgOptLists->{"OptList"}},$options); 99 100 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 100 101 101 102 if ($self->{'info_only'}) { … … 104 105 } 105 106 107 $self->{'filename_extension'} = "ps"; 108 $self->{'file_type'} = "PS"; 109 106 110 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 107 111 108 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {109 $secondary_plugin_options->{'T EXTPlug'} = [];110 } 111 112 my $text_options = $secondary_plugin_options->{'T EXTPlug'};112 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 113 $secondary_plugin_options->{'TextPlugin'} = []; 114 } 115 116 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 113 117 114 118 if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 115 if (!defined $secondary_plugin_options->{'PagedIm gPlug'}){116 $secondary_plugin_options->{'PagedIm gPlug'} = [];117 my $pagedimg_options = $secondary_plugin_options->{'PagedIm gPlug'};119 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 120 $secondary_plugin_options->{'PagedImagePlugin'} = []; 121 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 118 122 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 119 123 } … … 148 152 } 149 153 154 # this has been commented out in other plugins. do we need it here? 150 155 sub convert_post_process 151 156 { … … 179 184 my $date_found = 0; 180 185 181 print STDERR "PSPlug : extracting PostScript metadata from \"$filename\"\n"186 print STDERR "PSPlugin: extracting PostScript metadata from \"$filename\"\n" 182 187 if $self->{'verbosity'} > 1; 183 188 … … 237 242 } 238 243 239 # do plugin specific processing of doc_obj for HTML type244 # do plugin specific processing of doc_obj 240 245 sub process { 241 246 my $self = shift (@_); 242 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 243 244 # my $outhandle = $self->{'outhandle'}; 245 246 # print $outhandle "PSPlug: passing $file on to $self->{'converted_to'}Plug\n" 247 # if $self->{'verbosity'} > 1; 248 # print STDERR "<Processing n='$file' p='PSPlug'>\n" if ($gli); 247 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 249 248 250 249 my $filename = &util::filename_cat($base_dir,$file); 251 250 $self->extract_metadata_from_postscript($filename, $doc_obj); 252 251 253 return $self->process_type("ps",$base_dir,$file,$doc_obj); 252 return $self->SUPER::process(@_); 253 254 254 } 255 255 -
gsdl/trunk/perllib/plugins/PagedImagePlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # PagedIm gPlug.pm -- plugin for sets of images and OCR text that3 # PagedImagePlugin.pm -- plugin for sets of images and OCR text that 4 4 # make up a document 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 # PagedIm gPlug27 # PagedImagePlugin 28 28 # processes sequences of images, with optional OCR text 29 29 # … … 116 116 # To have it create medium size images for display, use the '-screenview' 117 117 # option. As usual, running 118 # 'perl -S pluginfo.pl PagedIm gPlug' will list all the options.118 # 'perl -S pluginfo.pl PagedImagePlugin' will list all the options. 119 119 120 120 # If you want the resulting documents to be presented with a table of … … 132 132 # FileName (only for document level metadata). 133 133 134 package PagedImgPlug; 135 136 use XMLPlug; 134 package PagedImagePlugin; 135 136 use ReadXMLFile; 137 use ReadTextFile; 138 use ImageConverter; 139 137 140 use strict; 138 141 no strict 'refs'; # allow filehandles to be variables and viceversa 139 142 140 143 sub BEGIN { 141 @PagedIm gPlug::ISA = ('XMLPlug');144 @PagedImagePlugin::ISA = ('ReadXMLFile', 'ReadTextFile', 'ImageConverter'); 142 145 } 143 146 144 147 my $type_list = 145 148 [ { 'name' => "paged", 146 'desc' => "{PagedIm gPlug.documenttype.paged}" },149 'desc' => "{PagedImagePlugin.documenttype.paged}" }, 147 150 { 'name' => "hierarchy", 148 'desc' => "{PagedIm gPlug.documenttype.hierarchy}" } ];151 'desc' => "{PagedImagePlugin.documenttype.hierarchy}" } ]; 149 152 150 153 my $arguments = … … 163 166 'type' => "string", 164 167 'deft' => "" }, 165 { 'name' => "noscaleup",166 'desc' => "{ImagePlug.noscaleup}",167 'type' => "flag",168 'reqd' => "no" },169 { 'name' => "thumbnail",170 'desc' => "{PagedImgPlug.thumbnail}",171 'type' => "flag",172 'reqd' => "no" },173 { 'name' => "thumbnailsize",174 'desc' => "{ImagePlug.thumbnailsize}",175 'type' => "int",176 'deft' => "100",177 'range' => "1,",178 'reqd' => "no" },179 { 'name' => "thumbnailtype",180 'desc' => "{ImagePlug.thumbnailtype}",181 'type' => "string",182 'deft' => "gif",183 'reqd' => "no" },184 { 'name' => "screenview",185 'desc' => "{PagedImgPlug.screenview}",186 'type' => "flag",187 'reqd' => "no" },188 { 'name' => "screenviewsize",189 'desc' => "{PagedImgPlug.screenviewsize}",190 'type' => "int",191 'deft' => "500",192 'range' => "1,",193 'reqd' => "no" },194 { 'name' => "screenviewtype",195 'desc' => "{PagedImgPlug.screenviewtype}",196 'type' => "string",197 'deft' => "jpg",198 'reqd' => "no" },199 { 'name' => "converttotype",200 'desc' => "{ImagePlug.converttotype}",201 'type' => "string",202 'deft' => "",203 'reqd' => "no" },204 { 'name' => "minimumsize",205 'desc' => "{ImagePlug.minimumsize}",206 'type' => "int",207 'deft' => "100",208 'range' => "1,",209 'reqd' => "no" },210 168 { 'name' => "headerpage", 211 'desc' => "{PagedIm gPlug.headerpage}",169 'desc' => "{PagedImagePlugin.headerpage}", 212 170 'type' => "flag", 213 171 'reqd' => "no" }, 214 172 { 'name' => "documenttype", 215 'desc' => "{PagedIm gPlug.documenttype}",173 'desc' => "{PagedImagePlugin.documenttype}", 216 174 'type' => "enum", 217 175 'list' => $type_list, … … 220 178 221 179 222 my $options = { 'name' => "PagedIm gPlug",223 'desc' => "{PagedIm gPlug.desc}",180 my $options = { 'name' => "PagedImagePlugin", 181 'desc' => "{PagedImagePlugin.desc}", 224 182 'abstract' => "no", 225 183 'inherits' => "yes", … … 231 189 push(@$pluginlist, $class); 232 190 233 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 234 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 235 236 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists); 191 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 192 push(@{$hashArgOptLists->{"OptList"}},$options); 193 194 new ImageConverter($pluginlist, $inputargs, $hashArgOptLists); 195 new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 196 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists); 237 197 238 198 return bless $self, $class; 199 } 200 201 202 sub init { 203 my $self = shift (@_); 204 my ($verbosity, $outhandle, $failhandle) = @_; 205 206 $self->SUPER::init(@_); 207 $self->ImageConverter::init(); 239 208 } 240 209 … … 260 229 } 261 230 262 # Create the thumbnail and screenview images, and discover the Image's 263 # size, width, and height using the convert utility. 231 sub rotate_image { 232 my $self = shift (@_); 233 my ($filename_full_path) = @_; 234 235 my ($this_filetype) = $filename_full_path =~ /\.([^\.]*)$/; 236 my $result = $self->convert($filename_full_path, $this_filetype, "-rotate 180", "ROTATE"); 237 my ($new_filename) = ($result =~ /=>(.*\.$this_filetype)/); 238 if (-e "$new_filename") { 239 return $new_filename; 240 } 241 # somethings gone wrong 242 return $filename_full_path; 243 244 } 245 264 246 sub process_image { 265 my $self = shift (@_); 266 my $filename = shift (@_); # filename with full path 267 my $srcfile = shift (@_); # filename without path 268 my $doc_obj = shift (@_); 269 my $section = shift (@_); #the current section 270 my $rotate = shift (@_); # whether to rotate the image or not 271 $rotate = 0 unless defined $rotate; 272 273 # check that the image file exists!! 274 if (!-f $filename) { 275 print "PagedImgPlug: ERROR: File $filename does not exist, skipping\n"; 276 return 0; 277 } 278 279 my $top=0; 280 if ($section eq $doc_obj->get_top_section()) { 281 $top=1; 282 } 283 my $verbosity = $self->{'verbosity'}; 284 my $outhandle = $self->{'outhandle'}; 285 286 # check the filename is okay 287 return 0 if ($srcfile eq "" || $filename eq ""); 288 289 my $minimumsize = $self->{'minimumsize'}; 290 if (defined $minimumsize && (-s $filename < $minimumsize)) { 291 print $outhandle "PagedImgPlug: \"$filename\" too small, skipping\n" 292 if ($verbosity > 1); 293 } 294 295 # Convert the image to a new type (if required), and rotate if required. 296 my $converttotype = $self->{'converttotype'}; 297 my $originalfilename = ""; # only set if we do a conversion 298 my $type = "unknown"; 299 my $converted = 0; 300 my $rotated=0; 301 302 if ($converttotype ne "" && $filename !~ /$converttotype$/) { 303 $converted=1; 304 $originalfilename = $filename; 305 my $filehead = &util::get_tmp_filename(); 306 $filename = $filehead . ".$converttotype"; 307 my $n = 1; 308 while (-e $filename) { 309 $filename = "$filehead$n\.$converttotype"; 310 $n++; 311 } 312 $self->{'tmp_filename1'} = $filename; 313 314 my $rotate_option = ""; 315 if ($rotate eq "r") { 316 $rotate_option = "-rotate 180 "; 317 } 318 319 my $command = "convert -verbose \"$originalfilename\" $rotate_option \"$filename\""; 320 print $outhandle "CONVERT: $command\n" if ($verbosity > 2); 321 my $result = ''; 322 $result = `$command`; 323 print $outhandle "CONVERT RESULT = $result\n" if ($verbosity > 2); 324 325 $type = $converttotype; 326 } elsif ($rotate eq "r") { 327 $rotated=1; 328 $originalfilename = $filename; 329 $filename = &util::get_tmp_filename(); 330 331 my $command = "convert \"$originalfilename\" -rotate 180 \"$filename\""; 332 print $outhandle "ROTATE: $command\n" if ($verbosity > 2); 333 my $result = ''; 334 $result = `$command`; 335 print $outhandle "ROTATE RESULT = $result\n" if ($verbosity > 2); 336 337 } 338 339 340 # Add the image metadata 341 my $file; # the new file name 342 my $id = $srcfile; 343 $id =~ s/\.([^\.]*)$//; # the new file name without an extension 344 if ($converted) { 345 # we have converted the image 346 # add on the new extension 347 $file .= "$id.$converttotype"; 348 } else { 349 $file = $srcfile; 350 } 351 352 my $url =$file; # the new file name prepared for a url 353 my $srcurl = $srcfile; 354 ##$url =~ s/ /%20/g; 355 ##$srcurl =~ s/ /%20/g; 356 357 $doc_obj->add_metadata ($section, "Image", $url); 358 359 # Also want to set filename as 'Source' metadata to be 360 # consistent with other plugins 361 $doc_obj->add_metadata ($section, "Source", $srcurl); 362 363 my ($image_type, $image_width, $image_height, $image_size) 364 = &identify($filename, $outhandle, $verbosity); 365 366 $doc_obj->add_metadata ($section, "ImageType", $image_type); 367 $doc_obj->add_metadata ($section, "ImageWidth", $image_width); 368 $doc_obj->add_metadata ($section, "ImageHeight", $image_height); 369 $doc_obj->add_metadata ($section, "ImageSize", $image_size); 370 $doc_obj->add_metadata ($section, "FileFormat", "PagedImg"); 371 # add NoText metadata which can be used to suppress the dummy text 372 $doc_obj->add_metadata ($section, "NoText", "1"); 373 374 if ($type eq "unknown" && $image_type) { 375 $type = $image_type; 376 } 377 378 if ($top) { 379 $doc_obj->add_metadata ($section, "srclink", 380 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">"); 381 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Image]\">"); 382 383 } else { 384 $doc_obj->add_metadata ($section, "srclink", 385 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">"); 386 $doc_obj->add_metadata ($section, "srcicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Image]\">"); 387 388 } 389 $doc_obj->add_metadata ($section, "/srclink", "</a>"); 390 391 392 # Add the image as an associated file 393 $doc_obj->associate_file($filename,$file,"image/$type",$section); 394 print $outhandle "associating file $filename as name $file\n" if ($verbosity > 2); 395 396 if ($self->{'thumbnail'}) { 397 # Make the thumbnail image 398 my $thumbnailsize = $self->{'thumbnailsize'} || 100; 399 my $thumbnailtype = $self->{'thumbnailtype'} || 'gif'; 400 401 my $filehead = &util::get_tmp_filename(); 402 my $thumbnailfile = $filehead . ".$thumbnailtype"; 403 my $n=1; 404 while (-e $thumbnailfile) { 405 $thumbnailfile = $filehead . $n . ".$thumbnailtype"; 406 $n++; 407 } 408 409 $self->{'tmp_filename2'} = $thumbnailfile; 410 411 # Generate the thumbnail with convert 412 my $command = "convert -verbose -geometry $thumbnailsize" 413 . "x$thumbnailsize \"$filename\" \"$thumbnailfile\""; 414 print $outhandle "THUMBNAIL: $command\n" if ($verbosity > 2); 415 my $result = ''; 416 $result = `$command 2>&1` ; 417 print $outhandle "THUMB RESULT: $result\n" if ($verbosity > 2); 418 419 # Add the thumbnail as an associated file ... 420 if (-e "$thumbnailfile") { 421 $doc_obj->associate_file("$thumbnailfile", $id."thumb.$thumbnailtype", "image/$thumbnailtype",$section); 422 $doc_obj->add_metadata ($section, "ThumbType", $thumbnailtype); 423 $doc_obj->add_metadata ($section, "Thumb", $id."thumb.$thumbnailtype"); 424 if ($top) { 425 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 426 } else { 427 $doc_obj->add_metadata ($section, "thumbicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Thumb]\" width=[ThumbWidth] height=[ThumbHeight]>"); 428 } 429 } 430 431 # Extract Thumnail metadata from convert output 432 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) { 433 $doc_obj->add_metadata ($section, "ThumbWidth", $1); 434 $doc_obj->add_metadata ($section, "ThumbHeight", $2); 435 } 436 } 437 # Make a screen-sized version of the picture if requested 438 if ($self->{'screenview'}) { 439 440 # To do: if the actual image is smaller than the screenview size, 441 # we should use the original ! 442 443 my $screenviewsize = $self->{'screenviewsize'} || 500; 444 my $screenviewtype = $self->{'screenviewtype'} || 'jpeg'; 445 my $filehead = &util::get_tmp_filename(); 446 my $screenviewfilename = $filehead . ".$screenviewtype"; 447 my $n=1; 448 while (-e $screenviewfilename) { 449 $screenviewfilename = "$filehead$n\.$screenviewtype"; 450 $n++; 451 } 452 $self->{'tmp_filename3'} = $screenviewfilename; 453 454 # make the screenview image 455 my $command = "convert -verbose -geometry $screenviewsize" 456 . "x$screenviewsize \"$filename\" \"$screenviewfilename\""; 457 print $outhandle "SCREENVIEW: $command\n" if ($verbosity > 2); 458 my $result = ""; 459 $result = `$command 2>&1` ; 460 print $outhandle "SCREENVIEW RESULT: $result\n" if ($verbosity > 3); 461 462 # get screenview dimensions, size and type 463 if ($result =~ m/[0-9]+x[0-9]+=>([0-9]+)x([0-9]+)/) { 464 $doc_obj->add_metadata ($section, "ScreenWidth", $1); 465 $doc_obj->add_metadata ($section, "ScreenHeight", $2); 466 }elsif ($result =~ m/([0-9]+)x([0-9]+)/) { 467 #if the image hasn't changed size, the previous regex doesn't match 468 $doc_obj->add_metadata ($section, "ScreenWidth", $1); 469 $doc_obj->add_metadata ($section, "ScreenHeight", $2); 470 } 471 472 #add the screenview as an associated file ... 473 if (-e "$screenviewfilename") { 474 $doc_obj->associate_file("$screenviewfilename", $id."sv.$screenviewtype", 475 "image/$screenviewtype",$section); 476 print $outhandle "associating screen file $screenviewfilename as name $id sv.$screenviewtype\n" if ($verbosity > 2); 477 478 $doc_obj->add_metadata ($section, "ScreenType", $screenviewtype); 479 $doc_obj->add_metadata ($section, "Screen", $id."sv.$screenviewtype"); 480 481 if ($top) { 482 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>"); 483 } else { 484 $doc_obj->add_metadata ($section, "screenicon", "<img src=\"_httpprefix_/collect/[collection]/index/assoc/[parent(Top):assocfilepath]/[Screen]\" width=[ScreenWidth] height=[ScreenHeight]>"); 485 486 } 487 } else { 488 print $outhandle "PagedImgPlug: couldn't find \"$screenviewfilename\"\n"; 489 } 490 } 491 492 return $type; 493 494 495 } 496 497 498 499 # Discover the characteristics of an image file with the ImageMagick 500 # "identify" command. 501 502 sub identify { 503 my ($image, $outhandle, $verbosity) = @_; 504 505 # Use the ImageMagick "identify" command to get the file specs 506 my $command = "identify \"$image\" 2>&1"; 507 print $outhandle "$command\n" if ($verbosity > 2); 508 my $result = ''; 509 $result = `$command`; 510 print $outhandle "$result\n" if ($verbosity > 3); 511 512 # Read the type, width, and height 513 my $type = 'unknown'; 514 my $width = 'unknown'; 515 my $height = 'unknown'; 516 517 my $image_safe = quotemeta $image; 518 if ($result =~ /^$image_safe (\w+) (\d+)x(\d+)/) { 519 $type = $1; 520 $width = $2; 521 $height = $3; 522 } 523 524 # Read the size 525 my $size = "unknown"; 526 if ($result =~ m/^.* ([0-9]+)b/) { 527 $size = $1; 528 } elsif ($result =~ m/^.* ([0-9]+)kb/) { 529 $size = 1024 * $1; 530 } 531 532 print $outhandle "file: $image:\t $type, $width, $height, $size\n" 533 if ($verbosity > 3); 534 535 # Return the specs 536 return ($type, $width, $height, $size); 537 } 538 539 540 # The PagedImgPlug read() function. This function does all the right things 541 # to make general options work for a given plugin. It calls the process() 542 # function which does all the work specific to a plugin (like the old 543 # read functions used to do). Most plugins should define their own 544 # process() function and let this read() function keep control. 545 # 546 # PagedImgPlug overrides read() because there is no need to read the actual 547 # text of the file in, because the contents of the file is not text... 548 # 549 # Return number of files processed, undef if can't process 550 # Note that $base_dir might be "" and that $file might 551 # include directories 552 553 sub read_into_doc_obj { 554 my $self = shift (@_); 555 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 556 my $outhandle = $self->{'outhandle'}; 557 558 #check process and block exps, smart block, etc 559 my ($block_status,$filename) = $self->read_block(@_); 560 return $block_status if ((!defined $block_status) || ($block_status==0)); 561 562 print $outhandle "PagedImgPlug processing \"$filename\"\n" 563 if $self->{'verbosity'} > 1; 564 print STDERR "<Processing n='$file' p='PagedImgPlug'>\n" if ($gli); 565 566 # here we need to decide if we have an old text .item file, or a new xml 567 # .item file - for now the test is if the first non-empty line is 568 # <PagedDocument> then its xml 569 my $xml_version = 0; 570 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 571 572 my $backup_filename = "backup.item"; 573 open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n"; 574 my $line = ""; 575 my $num = 0; 576 $line = <ITEMFILE>; 577 while ($line !~ /\w/) { 578 $line = <ITEMFILE>; 579 } 580 chomp $line; 581 if ($line =~ /<PagedDocument/) { 582 $xml_version = 1; 583 } 584 close ITEMFILE; 585 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 586 $line = <ITEMFILE>; 587 $line =~ s/^\xEF\xBB\xBF//; # strip BOM 588 $line =~ s/\x0B+//ig; 589 $line =~ s/&/&/g; 590 print BACKUP ($line); 591 #Tidy up the item file some metadata title contains \vt-vertical tab 592 while ($line = <ITEMFILE>) { 593 $line =~ s/\x0B+//ig; 594 $line =~ s/&/&/g; 595 print BACKUP ($line); 596 } 597 close ITEMFILE; 598 close BACKUP; 599 &File::Copy::copy ($backup_filename, $filename); 600 &util::rm($backup_filename); 601 602 my $doc_obj; 603 if ($xml_version) { 604 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 605 $self->{'file'} = $file; 606 $self->{'filename'} = $filename; 607 $self->{'processor'} = $processor; 608 $self->{'metadata'} = $metadata; 609 247 my $self = shift(@_); 248 my ($filename_full_path, $filename_no_path, $doc_obj, $section, $rotate) = @_; 249 # do rotation 250 if ($rotate eq "r") { 251 # check the filenames 252 return 0 if ($filename_no_path eq "" || !-f $filename_full_path); 253 254 # we get a new temporary file which is rotated 255 $filename_full_path = $self->rotate_image($filename_full_path); 256 } 257 258 # do generate images 259 my $result = $self->generate_images($filename_full_path, $filename_no_path, $doc_obj, $section); 260 #overwrite one set in ImageConverter 261 $doc_obj->set_metadata_element ($section, "FileFormat", "PagedImage"); 262 return $result; 263 } 264 265 sub old_read_stuff_for_xml_version { 266 my ($self, $filename, $file, $gli); 267 268 # this bit same as ReadXMLFile read 269 # $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up 270 # $self->{'file'} = $file; 271 # $self->{'filename'} = $filename_full_path; 272 # $self->{'processor'} = $processor; 273 # $self->{'metadata'} = $metadata; 274 # 610 275 eval { 611 276 $@ = ""; … … 616 281 617 282 # feed transformed file (now in memory as string) into XML parser 618 #$self->{'parser'}->parse($transformed_xml);619 $self->parse_string($transformed_xml);283 $self->{'parser'}->parse($transformed_xml); 284 ###$self->parse_string($transformed_xml); 620 285 } 621 286 else { 622 #$self->{'parser'}->parsefile($filename);623 $self->parse_file($filename);287 $self->{'parser'}->parsefile($filename); 288 #$self->parse_file($filename); 624 289 } 625 290 }; … … 650 315 return -1; # error during processing 651 316 } 317 318 } 319 320 321 # The PagedImagePlugin read() function. This function does all the right things 322 # to make general options work for a given plugin. It calls the process() 323 # function which does all the work specific to a plugin (like the old 324 # read functions used to do). Most plugins should define their own 325 # process() function and let this read() function keep control. 326 # 327 # PagedImagePlugin overrides read() because there is no need to read the actual 328 # text of the file in, because the contents of the file is not text... 329 # 330 # Return number of files processed, undef if can't process 331 # Note that $base_dir might be "" and that $file might 332 # include directories 333 334 sub read_into_doc_obj { 335 my $self = shift (@_); 336 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; 337 my $outhandle = $self->{'outhandle'}; 338 339 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 340 341 print $outhandle "PagedImagePlugin processing \"$filename_full_path\"\n" 342 if $self->{'verbosity'} > 1; 343 print STDERR "<Processing n='$file' p='PagedImagePlugin'>\n" if ($gli); 344 345 346 # here we need to decide if we have an old text .item file, or a new xml 347 # .item file 348 my $xml_version = $self->is_xml_item_file($filename_full_path); 349 350 $self->tidy_item_file($filename_full_path); 351 352 my $doc_obj; 353 if ($xml_version) { 354 # careful checking needed here!! are we using local xml handlers or super ones 355 $self->ReadXMLFile::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 652 356 $doc_obj = $self->{'doc_obj'}; 653 357 } else { 654 358 my ($dir); 655 ($dir, $file) = $filename =~ /^(.*?)([^\/\\]*)$/;359 ($dir, $file) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/; 656 360 657 361 #process the .item file 658 $doc_obj = $self->process_item($filename , $dir, $file, $processor);362 $doc_obj = $self->process_item($filename_full_path, $dir, $file, $processor); 659 363 660 364 } 661 662 if ($self->{'cover_image'}) { 663 $self->associate_cover_image($doc_obj, $filename); 664 } 365 366 my $section = $doc_obj->get_top_section(); 367 368 $doc_obj->add_utf8_metadata($section, "Plugin", "$self->{'plugin_type'}"); 369 $doc_obj->add_metadata($section, "FileFormat", "PagedImage"); 665 370 666 371 # include any metadata passed in from previous plugins 667 372 # note that this metadata is associated with the top level section 668 my $section = $doc_obj->get_top_section();373 $self->add_associated_files($doc_obj, $filename_full_path); 669 374 $self->extra_metadata ($doc_obj, $section, $metadata); 670 #my $text="";671 # do plugin specific processing of doc_obj672 #unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {673 #print STDERR "<ProcessingError n='$file'>\n" if ($gli);674 #return -1;675 #}676 # do any automatic metadata extraction677 375 $self->auto_extract_metadata ($doc_obj); 678 376 679 $self->{'num_processed'}++; 377 # if we haven't found any Title so far, assign one 378 $self->title_fallback($doc_obj,$section,$filename_no_path); 379 380 $self->add_OID($doc_obj); 680 381 return (1,$doc_obj); 681 382 } 682 383 384 # for now, the test is if the first non-empty line is <PagedDocument>, then its xml 385 sub is_xml_item_file { 386 my $self = shift(@_); 387 my ($filename) = @_; 388 389 my $xml_version = 0; 390 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 391 392 my $line = ""; 393 my $num = 0; 394 $line = <ITEMFILE>; 395 while ($line !~ /\w/) { 396 $line = <ITEMFILE>; 397 } 398 chomp $line; 399 if ($line =~ /<PagedDocument/) { 400 $xml_version = 1; 401 } 402 close ITEMFILE; 403 return $xml_version; 404 } 405 406 sub tidy_item_file { 407 my $self = shift(@_); 408 my ($filename) = @_; 409 410 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 411 my $backup_filename = "backup.item"; 412 open (BACKUP,">$backup_filename")|| die "couldn't write to $backup_filename\n"; 413 my $line = ""; 414 $line = <ITEMFILE>; 415 $line =~ s/^\xEF\xBB\xBF//; # strip BOM 416 $line =~ s/\x0B+//ig; 417 $line =~ s/&/&/g; 418 print BACKUP ($line); 419 #Tidy up the item file some metadata title contains \vt-vertical tab 420 while ($line = <ITEMFILE>) { 421 $line =~ s/\x0B+//ig; 422 $line =~ s/&/&/g; 423 print BACKUP ($line); 424 } 425 close ITEMFILE; 426 close BACKUP; 427 &File::Copy::copy ($backup_filename, $filename); 428 &util::rm($backup_filename); 429 430 } 431 # de we need this? old read was the same as BasePlug read, not the same as ReadXMLfile read 683 432 sub read 684 433 { 685 434 my $self = shift (@_); 686 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_; my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_); 687 688 if ((defined $process_status) && ($process_status == 1)) { 689 # process the document 690 $processor->process($doc_obj); 691 692 #if(defined($self->{'places_filename'})){ 693 # &util::rm($self->{'places_filename'}); 694 # $self->{'places_filename'} = undef; 695 #} 696 #$self->{'num_processed'} ++; 697 undef $doc_obj; 698 } 699 700 # clean up temporary files - we do this here instead of in 701 # process_image becuase associated files aren't actually copied 702 # until after process has been run. 703 if (defined $self->{'tmp_filename1'} && 704 -e $self->{'tmp_filename1'}) { 705 &util::rm($self->{'tmp_filename1'}) 706 } 707 if (defined $self->{'tmp_filename2'} && 708 -e $self->{'tmp_filename2'}) { 709 &util::rm($self->{'tmp_filename2'}) 710 } 711 if (defined $self->{'tmp_filename3'} && 712 -e $self->{'tmp_filename3'}) { 713 &util::rm($self->{'tmp_filename3'}) 714 } 715 # if process_status == 1, then the file has been processed. 716 return $process_status; 435 $self->BasePlugin::read(@_); 717 436 } 718 437 … … 741 460 if (defined($txtfile)&& $txtfile ne "") { 742 461 $self->process_text ($self->{'base_dir'}.$txtfile, $txtfile, $doc_obj, $self->{'current_section'}); 743 $doc_obj->set_metadata_element($self->{'current_section'},"NoText","0"); 744 } else { 745 # otherwise add in some dummy text 746 #create an empty text string so we don't break downstream plugins 747 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1); 748 $doc_obj->add_utf8_text($self->{'current_section'}, $text); 749 $doc_obj->add_metadata($self->{'current_section'},"NoText","1"); 462 } else { 463 $self->add_dummy_text($doc_obj, $self->{'current_section'}); 750 464 } 751 465 } elsif ($element eq "Metadata") { … … 794 508 # create a new document 795 509 $self->{'doc_obj'} = new doc ($self->{'filename'}, "indexed_doc"); 796 my $doc_obj = $self->{'doc_obj'}; 797 $doc_obj->set_OIDtype ($self->{'processor'}->{'OIDtype'}); 510 # TODO is file filenmae_no_path?? 511 $self->set_initial_doc_fields($self->{'doc_obj'}, $self->{'file'}, $self->{'processor'}); 512 798 513 my ($dir, $file) = $self->{'filename'} =~ /^(.*?)([^\/\\]*)$/; 799 514 $self->{'base_dir'} = $dir; 800 515 $self->{'num_pages'} = 0; 801 my $topsection = $doc_obj->get_top_section();802 if ($self->{'documenttype'} eq 'paged') {803 # set the gsdlthistype metadata to Paged - this ensures this document will804 # be treated as a Paged doc, even if Titles are not numeric805 806 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Paged");807 } else {808 $doc_obj->set_utf8_metadata_element ($topsection, "gsdlthistype", "Hierarchy");809 }810 811 $doc_obj->add_metadata ($topsection, "Source", $file);812 if ($self->{'headerpage'}) {813 $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));814 }815 516 816 517 } … … 819 520 my $self = shift(@_); 820 521 my $doc_obj = $self->{'doc_obj'}; 821 822 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");823 $doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "PagedImg");824 522 825 523 # add numpages metadata 826 524 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(), 'NumPages', $self->{'num_pages'}); 827 525 828 # add an OID 829 $doc_obj->set_OID(); 830 831 } 832 833 sub process_item { 834 my $self = shift (@_); 835 my ($filename, $dir, $file, $processor) = @_; 836 837 my $doc_obj = new doc ($filename, "indexed_doc"); 526 527 } 528 529 530 sub set_initial_doc_fields { 531 my $self = shift(@_); 532 my ($doc_obj, $filename_no_path, $processor) = @_; 533 838 534 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'}); 839 535 my $topsection = $doc_obj->get_top_section(); 840 $doc_obj->add_utf8_metadata($topsection, "Plugin", "$self->{'plugin_type'}");841 $doc_obj->add_metadata($topsection, "FileFormat", "PagedImg");842 536 843 537 if ($self->{'documenttype'} eq 'paged') { … … 849 543 } 850 544 851 $doc_obj->add_metadata ($topsection, "Source", $file); 852 853 open (ITEMFILE, $filename) || die "couldn't open $filename\n"; 545 $self->set_Source_metadata($doc_obj, $filename_no_path); 546 547 # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible 548 if ($self->{'headerpage'}) { 549 $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasePlugin.dummy_text}")); 550 } 551 552 553 } 554 555 556 sub process_item { 557 my $self = shift (@_); 558 my ($filename_full_path, $dir, $filename_no_path, $processor) = @_; 559 560 my $doc_obj = new doc ($filename_full_path, "indexed_doc"); 561 $self->set_initial_doc_fields($doc_obj, $filename_no_path, $processor); 562 my $topsection = $doc_obj->get_top_section(); 563 open (ITEMFILE, $filename_full_path) || die "couldn't open $filename_full_path\n"; 854 564 my $line = ""; 855 565 my $num = 0; … … 879 589 if (!defined $result1) 880 590 { 881 print "PagedIm gPlug: couldn't process image \"$dir.$imgname\" for item \"$filename\"\n";591 print "PagedImagePlugin: couldn't process image \"$dir.$imgname\" for item \"$filename_full_path\"\n"; 882 592 } 883 593 } … … 887 597 888 598 if (!defined $result2) { 889 print "PagedImgPlug: couldn't process text file \"$dir.$txtname\" for item \"$filename\"\n"; 890 } 891 else{ 892 $doc_obj->set_metadata_element($cursection, "NoText", "0"); 599 print "PagedImagePlugin: couldn't process text file \"$dir.$txtname\" for item \"$filename_full_path\"\n"; 600 $self->add_dummy_text($doc_obj, $cursection); 893 601 } 894 602 } else { 895 603 # otherwise add in some dummy text 896 $doc_obj->add_text($cursection, &gsprintf::lookup_string("{BasPlug.dummy_text}")); 897 # add NoText metadata which can be used to suppress the dummy text 898 } 604 $self->add_dummy_text($doc_obj, $cursection); 605 } 899 606 } 900 607 } … … 902 609 close ITEMFILE; 903 610 904 # if we want a header page, we need to add some text into the top section, otherwise this section will become invisible905 if ($self->{'headerpage'}) {906 $doc_obj->add_text($topsection, &gsprintf::lookup_string("{BasPlug.dummy_text}"));907 }908 $file =~ s/\.item//i;909 $doc_obj->set_OID ();910 611 # add numpages metadata 911 612 $doc_obj->set_utf8_metadata_element ($topsection, 'NumPages', "$num"); … … 915 616 sub process_text { 916 617 my $self = shift (@_); 917 my ($f ullpath, $file, $doc_obj, $cursection) = @_;618 my ($filename_full_path, $file, $doc_obj, $cursection) = @_; 918 619 919 620 # check that the text file exists!! 920 if (!-f $f ullpath) {921 print "PagedIm gPlug: ERROR: File $fullpath does not exist, skipping\n";621 if (!-f $filename_full_path) { 622 print "PagedImagePlugin: ERROR: File $filename_full_path does not exist, skipping\n"; 922 623 return 0; 923 624 } 924 625 925 626 # Do encoding stuff 926 my ($language, $encoding) = $self->textcat_get_language_encoding ($f ullpath);627 my ($language, $encoding) = $self->textcat_get_language_encoding ($filename_full_path); 927 628 928 629 my $text=""; 929 & BasPlug::read_file($self, $fullpath, $encoding, $language, \$text);630 &ReadTextFile::read_file($self, $filename_full_path, $encoding, $language, \$text); 930 631 if (!length ($text)) { 931 632 # It's a bit unusual but not out of the question to have no text, so just give a warning 932 print "PagedIm gPlug: WARNING: $fullpath contains no text\n";633 print "PagedImagePlugin: WARNING: $filename_full_path contains no text\n"; 933 634 } 934 635 … … 961 662 962 663 # do plugin specific processing of doc_obj 963 sub process {664 sub process_old { 964 665 my $self = shift (@_); 965 666 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; … … 969 670 } 970 671 672 sub clean_up_after_doc_obj_processing { 673 my $self = shift(@_); 674 675 $self->ImageConverter::clean_up_temporary_files(); 676 } 677 971 678 1; -
gsdl/trunk/perllib/plugins/ProCitePlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ProCitePlug .pm -- A plugin for (exported) ProCite databases3 # ProCitePlugin.pm -- A plugin for (exported) ProCite databases 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 package ProCitePlug ;27 package ProCitePlugin; 28 28 29 29 30 30 use multiread; 31 use Split Plug;31 use SplitTextFile; 32 32 33 33 use strict; 34 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 35 36 # ProCitePlug is a sub-class of SplitPlug36 # ProCitePlugin is a sub-class of SplitTextFile 37 37 sub BEGIN { 38 @ProCitePlug ::ISA = ('SplitPlug');38 @ProCitePlugin::ISA = ('SplitTextFile'); 39 39 } 40 40 … … 42 42 my $arguments = 43 43 [ { 'name' => "process_exp", 44 'desc' => "{Bas Plug.process_exp}",44 'desc' => "{BasePlugin.process_exp}", 45 45 'type' => "regexp", 46 46 'reqd' => "no", 47 47 'deft' => &get_default_process_exp() }, 48 48 { 'name' => "split_exp", 49 'desc' => "{Split Plug.split_exp}",49 'desc' => "{SplitTextFile.split_exp}", 50 50 'type' => "regexp", 51 51 'deft' => &get_default_split_exp(), … … 53 53 ]; 54 54 55 my $options = { 'name' => "ProCitePlug ",56 'desc' => "{ProCitePlug .desc}",55 my $options = { 'name' => "ProCitePlugin", 56 'desc' => "{ProCitePlugin.desc}", 57 57 'abstract' => "no", 58 58 'inherits' => "yes", … … 81 81 push(@$pluginlist, $class); 82 82 83 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}84 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};85 86 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);83 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 84 push(@{$hashArgOptLists->{"OptList"}},$options); 85 86 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 87 87 88 88 return bless $self, $class; … … 124 124 open(PROCITE_FILE, "<$filename"); 125 125 my $reader = new multiread(); 126 $reader->set_handle ('ProCitePlug ::PROCITE_FILE');126 $reader->set_handle ('ProCitePlugin::PROCITE_FILE'); 127 127 $reader->set_encoding ($encoding); 128 128 $reader->read_file ($textref); … … 161 161 my $cursection = $doc_obj->get_top_section(); 162 162 # Report that we're processing the file 163 print STDERR "<Processing n='$file' p='ProCitePlug '>\n" if ($gli);164 print $outhandle "ProCitePlug : processing $file\n"163 print STDERR "<Processing n='$file' p='ProCitePlugin'>\n" if ($gli); 164 print $outhandle "ProCitePlugin: processing $file\n" 165 165 if ($self->{'verbosity'}) > 1; 166 166 -
gsdl/trunk/perllib/plugins/RTFPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # RTFPlug .pm -- plugin for importing Rich Text Format files.3 # RTFPlugin.pm -- plugin for importing Rich Text Format files. 4 4 # 5 5 # A component of the Greenstone digital library software … … 25 25 ########################################################################### 26 26 27 # 12/05/02 Added usage datastructure - John Thompson 27 package RTFPlugin; 28 28 29 package RTFPlug; 30 31 use ConvertToPlug; 29 use ConvertBinaryFile; 32 30 use strict; 33 31 no strict 'refs'; # allow filehandles to be variables and viceversa 34 32 35 33 sub BEGIN { 36 @RTFPlug ::ISA = ('ConvertToPlug');34 @RTFPlugin::ISA = ('ConvertBinaryFile'); 37 35 } 38 36 39 37 my $arguments = 40 38 [ { 'name' => "process_exp", 41 'desc' => "{Bas Plug.process_exp}",39 'desc' => "{BasePlugin.process_exp}", 42 40 'type' => "regexp", 43 41 'deft' => &get_default_process_exp(), 44 42 'reqd' => "no" }, 45 43 { 'name' => "description_tags", 46 'desc' => "{HTMLPlug .description_tags}",44 'desc' => "{HTMLPlugin.description_tags}", 47 45 'type' => "flag" } 48 46 ]; 49 47 50 my $options = { 'name' => "RTFPlug ",51 'desc' => "{RTFPlug .desc}",48 my $options = { 'name' => "RTFPlugin", 49 'desc' => "{RTFPlugin.desc}", 52 50 'abstract' => "no", 53 51 'inherits' => "yes", … … 60 58 push(@$pluginlist, $class); 61 59 62 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}63 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 61 push(@{$hashArgOptLists->{"OptList"}},$options); 64 62 65 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);63 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 66 64 67 65 if ($self->{'info_only'}) { … … 70 68 } 71 69 70 $self->{'filename_extension'} = "rtf"; 71 $self->{'file_type'} = "RTF"; 72 72 73 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 73 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {74 $secondary_plugin_options->{'T EXTPlug'} = [];74 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 75 $secondary_plugin_options->{'TextPlugin'} = []; 75 76 } 76 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {77 $secondary_plugin_options->{'HTMLPlug '} = [];77 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 78 $secondary_plugin_options->{'HTMLPlugin'} = []; 78 79 } 79 my $text_options = $secondary_plugin_options->{'T EXTPlug'};80 my $html_options = $secondary_plugin_options->{'HTMLPlug '};80 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 81 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 81 82 82 83 #$self->{'input_encoding'} = "utf8"; … … 98 99 return q^(?i)\.rtf$^; 99 100 } 100 101 sub process {102 my $self = shift (@_);103 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;104 105 return $self->process_type("rtf",$base_dir,$file,$doc_obj);106 }107 101 108 102 1; -
gsdl/trunk/perllib/plugins/RealMediaPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # RealMediaPlug .pm -- Extract metadata from Real Media files3 # RealMediaPlugin.pm -- Extract metadata from Real Media files 4 4 # 5 5 # Original code by Xin Gao … … 27 27 ########################################################################### 28 28 29 package RealMediaPlug ;29 package RealMediaPlugin; 30 30 31 31 32 use UnknownPlug;32 use BasePlugin; 33 33 use rm::Header::PurePerl; 34 34 35 35 use strict; 36 36 no strict 'refs'; # make an exception so we can use variables as filehandles 37 37 no strict 'subs'; 38 38 39 39 sub BEGIN { 40 @RealMediaPlug ::ISA = ('UnknownPlug');40 @RealMediaPlugin::ISA = ('BasePlugin'); 41 41 } 42 42 … … 44 44 my $arguments = 45 45 [ { 'name' => "process_exp", 46 'desc' => "{Bas Plug.process_exp}",46 'desc' => "{BasePlugin.process_exp}", 47 47 'type' => "regexp", 48 48 'deft' => &get_default_process_exp(), 49 49 'reqd' => "no" } ]; 50 50 51 my $options = { 'name' => "RealMediaPlug ",52 'desc' => "{RealMediaPlug .desc}",51 my $options = { 'name' => "RealMediaPlugin", 52 'desc' => "{RealMediaPlugin.desc}", 53 53 'abstract' => "no", 54 54 'inherits' => "yes", … … 69 69 push(@$pluginlist, $class); 70 70 71 if (defined $arguments) { push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); }72 if (defined $options) { push(@{$hashArgOptLists->{"OptList"}}, $options); }71 push(@{$hashArgOptLists->{"ArgList"}}, @{$arguments}); 72 push(@{$hashArgOptLists->{"OptList"}}, $options); 73 73 74 my $self = new UnknownPlug($pluginlist, $inputargs, $hashArgOptLists);74 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 75 75 76 76 return bless $self, $class; 77 77 } 78 78 79 80 # do plugin specific processing of doc_obj 81 sub read 79 sub process 82 80 { 83 81 my $self = shift (@_); 84 my ($pluginfo, $base_dir, $file, $metadata, $ processor, $maxdocs, $total_count, $gli) = @_;82 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 85 83 86 my $outhandle = $self->{'outhandle'}; 84 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 85 my $top_section = $doc_obj->get_top_section(); 86 # prevent hashing: old code was in effect the following. 87 if ($doc_obj->{'OIDtype'} =~ /^hash$/) { 88 $doc_obj->set_OIDtype ("incremental"); 89 } 87 90 88 #check process and block exps, smart block, etc89 my ($block_status,$filename) = $self->read_block(@_);90 return $block_status if ((!defined $block_status) || ($block_status==0));91 92 # Report that we're processing the file93 print STDERR "<Processing n='$file' p='RealMediaPlug'>\n" if ($gli);94 print $outhandle "RealMediaPlug: processing $file\n"95 if ($self->{'verbosity'}) > 1;96 97 # create a new index document98 my $doc_obj = new doc ($filename, "indexed_doc");99 if ($processor->{'OIDtype'} =~ /^(assigned|dirname)$/) {100 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});101 }102 else {103 $doc_obj->set_OIDtype ("incremental"); # this is done to avoid hashing content of file104 }105 my $top_section = $doc_obj->get_top_section();106 107 #if there's a leading directory name, eat it...108 $file =~ s/^.*[\/\\]//;109 110 my $url = $file;111 112 # Source (filename) to be consistent with other plugins113 $doc_obj->add_metadata($top_section, "Source", $url);114 115 116 91 my $text = ""; 117 my $real_media = rm::Header::PurePerl->new($filename );92 my $real_media = rm::Header::PurePerl->new($filename_full_path); 118 93 foreach my $key (keys %{$real_media->info}) 119 94 { … … 124 99 125 100 $doc_obj->add_utf8_text($top_section, "<pre>\n$text\n</pre>"); 101 $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia"); 126 102 127 # srclink128 $doc_obj->add_metadata($top_section, "FileFormat", "RealMedia");129 103 $doc_obj->add_metadata($top_section, "srclink", "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[Source]\">"); 130 104 $doc_obj->add_metadata($top_section, "/srclink", "</a>"); … … 133 107 134 108 # Add the actual file as an associated file 135 $doc_obj->associate_file($filename , $file, "RealMedia", $top_section);109 $doc_obj->associate_file($filename_full_path, $filename_no_path, "RealMedia", $top_section); 136 110 137 # include any metadata passed in from previous plugins138 my $section = $doc_obj->get_top_section();139 $self->extra_metadata ($doc_obj, $section, $metadata);140 141 # do plugin specific processing of doc_obj142 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj));143 144 # do any automatic metadata extraction145 $self->auto_extract_metadata($doc_obj);146 147 # have we found a Title?? is the Title empty??148 if(!defined $doc_obj->get_metadata_element($section, "Title") or $doc_obj->get_metadata_element($section, "Title") eq ""){149 my $file_derived_title = &BasPlug::filename_based_title($self, $file);150 if(!defined $doc_obj->get_metadata_element($section, "Title")) {151 $doc_obj->add_metadata ($section, "Title", $file_derived_title);152 }153 else {154 $doc_obj->set_metadata_element ($section, "Title", $file_derived_title);155 }156 }157 158 # add an OID159 $doc_obj->set_OID();160 161 # process the document162 $processor->process($doc_obj);163 164 $self->{'num_processed'}++;165 return 1;166 111 } 167 112 -
gsdl/trunk/perllib/plugins/ReferPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ReferPlug .pm - a plugin for bibliography records in Refer format3 # ReferPlugin.pm - a plugin for bibliography records in Refer format 4 4 # 5 5 # A component of the Greenstone digital library software … … 26 26 ########################################################################### 27 27 28 # ReferPlug reads bibliography files in Refer format.28 # ReferPlugin reads bibliography files in Refer format. 29 29 # 30 30 # by Gordon W. Paynter ([email protected]), November 2000 … … 36 36 # 37 37 # 38 # ReferPlug creates a document object for every reference in the file.39 # It is a subclass of Split Plug, so if there are multiple records, all38 # ReferPlugin creates a document object for every reference in the file. 39 # It is a subclass of SplitTextFile, so if there are multiple records, all 40 40 # are read. 41 41 # … … 61 61 # 62 62 63 # 12/05/02 Added usage datastructure - John Thompson 64 65 package ReferPlug; 66 67 use SplitPlug; 63 package ReferPlugin; 64 65 use SplitTextFile; 68 66 use strict; 69 67 no strict 'refs'; # allow filehandles to be variables and viceversa 70 68 71 # ReferPlug is a sub-class of BasPlug.69 # ReferPlugin is a sub-class of BasePlugin. 72 70 sub BEGIN { 73 @ReferPlug ::ISA = ('SplitPlug');71 @ReferPlugin::ISA = ('SplitTextFile'); 74 72 } 75 73 76 74 my $arguments = 77 75 [ { 'name' => "process_exp", 78 'desc' => "{Bas Plug.process_exp}",76 'desc' => "{BasePlugin.process_exp}", 79 77 'type' => "regexp", 80 78 'deft' => &get_default_process_exp(), 81 79 'reqd' => "no" }, 82 80 { 'name' => "split_exp", 83 'desc' => "{Split Plug.split_exp}",81 'desc' => "{SplitTextFile.split_exp}", 84 82 'type' => "regexp", 85 83 'reqd' => "no", … … 87 85 ]; 88 86 89 my $options = { 'name' => "ReferPlug ",90 'desc' => "{ReferPlug .desc}",87 my $options = { 'name' => "ReferPlugin", 88 'desc' => "{ReferPlugin.desc}", 91 89 'abstract' => "no", 92 90 'inherits' => "yes", … … 109 107 push(@$pluginlist, $class); 110 108 111 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}112 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};113 114 my $self = new Split Plug($pluginlist, $inputargs, $hashArgOptLists);109 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 110 push(@{$hashArgOptLists->{"OptList"}},$options); 111 112 my $self = new SplitTextFile($pluginlist, $inputargs, $hashArgOptLists); 115 113 116 114 return bless $self, $class; … … 130 128 my $cursection = $doc_obj->get_top_section(); 131 129 # Report that we're processing the file 132 print STDERR "<Processing n='$file' p='ReferPlug '>\n" if ($gli);133 print $outhandle "ReferPlug : processing $file\n"130 print STDERR "<Processing n='$file' p='ReferPlugin'>\n" if ($gli); 131 print $outhandle "ReferPlugin: processing $file\n" 134 132 if ($self->{'verbosity'}) > 1; 135 133 -
gsdl/trunk/perllib/plugins/RogPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # RogPlug .pm -- simple text plugin3 # RogPlugin.pm -- simple text plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 26 26 # creates simple single-level document from .rog or .mdb files 27 27 28 package RogPlug ;29 30 use Bas Plug;28 package RogPlugin; 29 30 use BasePlugin; 31 31 use sorttools; 32 32 use doc; … … 36 36 37 37 sub BEGIN { 38 @RogPlug ::ISA = ('BasPlug');38 @RogPlugin::ISA = ('BasePlugin'); 39 39 } 40 40 41 41 my $arguments = 42 42 [ { 'name' => "process_exp", 43 'desc' => "{Bas Plug.process_exp}",43 'desc' => "{BasePlugin.process_exp}", 44 44 'type' => "regexp", 45 45 'reqd' => "no", … … 47 47 ]; 48 48 49 my $options = { 'name' => "RogPlug ",50 'desc' => "{RogPlug .desc}",49 my $options = { 'name' => "RogPlugin", 50 'desc' => "{RogPlugin.desc}", 51 51 'abstract' => "no", 52 52 'inherits' => "yes", … … 58 58 push(@$pluginlist, $class); 59 59 60 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}61 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};62 63 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 61 push(@{$hashArgOptLists->{"OptList"}},$options); 62 63 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 64 64 65 65 return bless $self, $class; … … 230 230 my $gz = (defined $3) ? 1: 0; 231 231 232 print STDERR "<Processing n='$file' p='RogPlug'>\n" if ($gli);233 print STDERR "RogPlug : processing $filename\n" if $processor->{'verbosity'};232 print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli); 233 print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'}; 234 234 235 235 if ($gz) { 236 236 open (FILE, "zcat $filename |") 237 || die "RogPlug ::read - zcat can't open $filename\n";237 || die "RogPlugin::read - zcat can't open $filename\n"; 238 238 } else { 239 239 open (FILE, $filename) 240 || die "RogPlug ::read - can't open $filename\n";240 || die "RogPlugin::read - can't open $filename\n"; 241 241 } 242 242 -
gsdl/trunk/perllib/plugins/SourceCodePlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # S RCPlug.pm -- source code plugin3 # SourceCodePlugin.pm -- source code plugin 4 4 # 5 5 # A component of the Greenstone digital library software … … 37 37 # 12/05/02 Added usage datastructure - John Thompson 38 38 39 package S RCPlug;40 41 use BasPlug;39 package SourceCodePlugin; 40 41 use ReadTextFile; 42 42 43 43 use strict; … … 45 45 46 46 sub BEGIN { 47 @S RCPlug::ISA = ('BasPlug');47 @SourceCodePlugin::ISA = ('ReadTextFile'); 48 48 } 49 49 50 50 my $arguments = 51 51 [ { 'name' => "process_exp", 52 'desc' => "{ BasPlug.process_exp}",52 'desc' => "{ReadTextFile.process_exp}", 53 53 'type' => "regexp", 54 54 'deft' => &get_default_process_exp(), 55 55 'reqd' => "no" } , 56 56 { 'name' => "block_exp", 57 'desc' => "{ BasPlug.block_exp}",57 'desc' => "{ReadTextFile.block_exp}", 58 58 'type' => "regexp", 59 59 'deft' => &get_default_block_exp(), 60 60 'reqd' => "no" }, 61 61 { 'name' => "remove_prefix", 62 'desc' => "{S RCPlug.remove_prefix}",62 'desc' => "{SourceCodePlugin.remove_prefix}", 63 63 'type' => "regexp", 64 64 'deft' => "^.*[/\\]", 65 65 'reqd' => "no" } ]; 66 66 67 my $options = { 'name' => "S RCPlug",68 'desc' => "{S RCPlug.desc}",67 my $options = { 'name' => "SourceCodePlugin", 68 'desc' => "{SourceCodePlugin.desc}", 69 69 'abstract' => "no", 70 70 'inherits' => "yes", … … 77 77 push(@$pluginlist, $class); 78 78 79 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}80 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};81 82 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 80 push(@{$hashArgOptLists->{"OptList"}},$options); 81 82 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 83 83 84 84 return bless $self, $class; … … 94 94 my $self = shift (@_); 95 95 96 # return q^(?i)\.te?xt$^;97 96 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^; 98 97 } … … 106 105 my $outhandle = $self->{'outhandle'}; 107 106 108 print STDERR "<Processing n='$file' p='SRCPlug'>\n" if ($gli);109 print $outhandle "S RCPlug: processing $file\n"107 print STDERR "<Processing n='$file' p='SourceCodePlugin'>\n" if ($gli); 108 print $outhandle "SourceCodePlugin: processing $file\n" 110 109 if $self->{'verbosity'} > 1; 111 110 -
gsdl/trunk/perllib/plugins/StructuredHTMLPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # StructuredHTMLPlug .pm -- html plugin with extra facilities for teasing out3 # StructuredHTMLPlugin.pm -- html plugin with extra facilities for teasing out 4 4 # hierarchical structure (such as h1, h2, h3, or user-defined tags) in an 5 5 # HTML document … … 32 32 # format:e.g. level1 (Abstract_title|ChapterTitle|Referencing Heading) level2(SectionHeading)... 33 33 34 package StructuredHTMLPlug ;35 36 use HTMLPlug ;37 use Image Plug;38 39 #use strict; # every perl program should have this!40 #no strict 'refs'; # make an exception so we can use variables as filehandles34 package StructuredHTMLPlugin; 35 36 use HTMLPlugin; 37 use ImageConverter; # want the identify method 38 39 use strict; # every perl program should have this! 40 no strict 'refs'; # make an exception so we can use variables as filehandles 41 41 42 42 sub BEGIN { 43 @StructuredHTMLPlug ::ISA = ('HTMLPlug');43 @StructuredHTMLPlugin::ISA = ('HTMLPlugin'); 44 44 } 45 45 … … 47 47 [ 48 48 { 'name' => "level1_header", 49 'desc' => "{StructuredHTMLPlug .level1_header}",49 'desc' => "{StructuredHTMLPlugin.level1_header}", 50 50 'type' => "regexp", 51 51 'reqd' => "no", 52 52 'deft' => "" }, 53 53 { 'name' => "level2_header", 54 'desc' => "{StructuredHTMLPlug .level2_header}",54 'desc' => "{StructuredHTMLPlugin.level2_header}", 55 55 'type' => "regexp", 56 56 'reqd' => "no", 57 57 'deft' => "" }, 58 58 { 'name' => "level3_header", 59 'desc' => "{StructuredHTMLPlug .level3_header}",59 'desc' => "{StructuredHTMLPlugin.level3_header}", 60 60 'type' => "regexp", 61 61 'reqd' => "no", 62 62 'deft' => "" }, 63 63 { 'name' => "title_header", 64 'desc' => "{StructuredHTMLPlug .title_header}",64 'desc' => "{StructuredHTMLPlugin.title_header}", 65 65 'type' => "regexp", 66 66 'reqd' => "no", 67 67 'deft' => "" }, 68 68 { 'name' => "delete_toc", 69 'desc' => "{StructuredHTMLPlug .delete_toc}",69 'desc' => "{StructuredHTMLPlugin.delete_toc}", 70 70 'type' => "flag", 71 71 'reqd' => "no"}, 72 72 { 'name' => "toc_header", 73 'desc' => "{StructuredHTMLPlug .toc_header}",73 'desc' => "{StructuredHTMLPlugin.toc_header}", 74 74 'type' => "regexp", 75 75 'reqd' => "no", … … 77 77 ]; 78 78 79 my $options = { 'name' => "StructuredHTMLPlug ",80 'desc' => "{StructuredHTMLPlug .desc}",79 my $options = { 'name' => "StructuredHTMLPlugin", 80 'desc' => "{StructuredHTMLPlugin.desc}", 81 81 'abstract' => "no", 82 82 'inherits' => "yes", … … 88 88 push(@$pluginlist, $class); 89 89 90 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}91 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};92 93 my $self = new HTMLPlug ($pluginlist, $inputargs, $hashArgOptLists);90 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 91 push(@{$hashArgOptLists->{"OptList"}},$options); 92 93 my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists); 94 94 95 95 return bless $self, $class; … … 102 102 my $outhandle = $self->{'outhandle'}; 103 103 104 print $outhandle "StructuredHTMLPlug : processing $file\n"104 print $outhandle "StructuredHTMLPlugin: processing $file\n" 105 105 if $self->{'verbosity'} > 1; 106 106 … … 161 161 $body_text =~ s/(<p[^>]*><o:p> <\/o:p><\/p>)//isg; 162 162 163 $section_text .= "<!--\n<Section>\n-->\n"; 163 # what was the following line for. effectively unused. do we need it?? 164 #$section_text .= "<!--\n<Section>\n-->\n"; 164 165 #my $top_section_tag = "<!--\n<Section>\n-->\n"; 165 166 #$body_text =~ s/(<div.*)/$top_section_text$doctitle$1/i; … … 308 309 309 310 my ($image_type, $actual_width, $actual_height, $image_size) 310 = &Image Plug::identify($img_filename, $outhandle, $verbosity);311 = &ImageConverter::identify($img_filename, $outhandle, $verbosity); 311 312 312 313 #print STDERR "**** $actual_width x $actual_height"; … … 318 319 # derive new image name based on current image 319 320 my ($tailname, $dirname, $suffix) 320 = &File::Basename::fileparse($i nput_filename, "\\.[^\\.]+\$");321 = &File::Basename::fileparse($img_filename, "\\.[^\\.]+\$"); 321 322 322 323 my $resized_filename … … 326 327 327 328 # Generate smaller image with convert 328 my $newsize = "$img_width x$image_height";329 my $newsize = "$img_width"."x$img_height"; 329 330 my $command = "convert -interlace plane -verbose " 330 ."-geometry $newsize \" img_$filename\" \"$resized_filename\"";331 ."-geometry $newsize \"$img_filename\" \"$resized_filename\""; 331 332 #print $outhandle "ImageResize: $command\n" if ($verbosity > 2); 332 333 #my $result = ''; … … 389 390 $value = $1; 390 391 if (!defined $value || !defined $tag){ 391 #print $outhandle "StructuredHTMLPlug : can't find VALUE in \"$tag\"\n";392 #print $outhandle "StructuredHTMLPlugin: can't find VALUE in \"$tag\"\n"; 392 393 next; 393 394 } else { -
gsdl/trunk/perllib/plugins/TextPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # T EXTPlug.pm -- simple text plugin3 # TextPlugin.pm -- simple text plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 29 29 # 12/05/02 Added usage datastructure - John Thompson 30 30 31 package T EXTPlug;32 33 use BasPlug;31 package TextPlugin; 32 33 use ReadTextFile; 34 34 35 35 use strict; 36 36 no strict 'refs'; # allow filehandles to be variables and viceversa 37 no strict 'subs'; 37 38 38 39 sub BEGIN { 39 @T EXTPlug::ISA = ('BasPlug');40 @TextPlugin::ISA = ('ReadTextFile'); 40 41 } 41 42 42 43 my $arguments = 43 44 [ { 'name' => "process_exp", 44 'desc' => "{Bas Plug.process_exp}",45 'desc' => "{BasePlugin.process_exp}", 45 46 'type' => "regexp", 46 47 'deft' => &get_default_process_exp(), 47 48 'reqd' => "no" } , 48 49 { 'name' => "title_sub", 49 'desc' => "{T EXTPlug.title_sub}",50 'desc' => "{TextPlugin.title_sub}", 50 51 'type' => "regexp", 51 52 'deft' => "", 52 53 'reqd' => "no" } ]; 53 54 54 my $options = { 'name' => "T EXTPlug",55 'desc' => "{T EXTPlug.desc}",55 my $options = { 'name' => "TextPlugin", 56 'desc' => "{TextPlugin.desc}", 56 57 'abstract' => "no", 57 58 'inherits' => "yes", … … 65 66 push(@$pluginlist, $class); 66 67 67 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}68 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};69 70 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);68 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 69 push(@{$hashArgOptLists->{"OptList"}},$options); 70 71 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 71 72 72 73 return bless $self, $class; … … 85 86 my $outhandle = $self->{'outhandle'}; 86 87 87 print STDERR "<Processing n='$file' p='T EXTPlug'>\n" if ($gli);88 print $outhandle "T EXTPlug: processing $file\n"88 print STDERR "<Processing n='$file' p='TextPlugin'>\n" if ($gli); 89 print $outhandle "TextPlugin xx: processing $file\n" 89 90 if $self->{'verbosity'} > 1; 90 91 … … 109 110 } 110 111 # Add FileFormat metadata 111 $doc_obj->add_metadata($cursection, "FileFormat", "T EXT");112 $doc_obj->add_metadata($cursection, "FileFormat", "Text"); 112 113 113 114 # insert preformat tags and add text to document object … … 136 137 # replace_srcdoc_with_html.pl requires all subroutines that support src_replaceable 137 138 # to contain a method called tmp_area_convert_file - this is indeed the case with all 138 # Perl modules that are subclasses of ConvertToPlug.pm, but as we want T EXTPlugto also139 # be srcreplaceable and because T EXTPlugdoes not inherit from ConvertToPlug.pm, we have139 # Perl modules that are subclasses of ConvertToPlug.pm, but as we want TextPlugin to also 140 # be srcreplaceable and because TextPlugin does not inherit from ConvertToPlug.pm, we have 140 141 # a similar subroutine with the same name here. 141 142 sub tmp_area_convert_file { … … 192 193 # Recreate the original file for writing the updated contents 193 194 unless(open(TEXT, "<$tmp_filename")) { # open it as a new file for writing 194 print STDERR "T EXTPlug.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n";195 print STDERR "TextPlugin.pm: Unable to open and read from $tmp_filename for converting to html...ERROR: $!\n"; 195 196 return ""; # no file name 196 197 } … … 212 213 # try creating this new file writing and try opening it for writing, else exit with error value 213 214 unless(open(HTML, ">$output_filename")) { # open the new html file for writing 214 print STDERR "T EXTPlug.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n";215 print STDERR "TextPlugin.pm: Unable to create $output_filename for writing $tailname$suffix txt converted to html...ERROR: $!\n"; 215 216 return ""; # no filename 216 217 } -
gsdl/trunk/perllib/plugins/UnknownPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # UnknownPlug .pm -- Plugin for files you know about but Greenstone doesn't3 # UnknownPlugin.pm -- Plugin for files you know about but Greenstone doesn't 4 4 # 5 5 # A component of the Greenstone digital library software from the New … … 26 26 ########################################################################### 27 27 28 # UnknownPlug - a plugin for unknown files28 # UnknownPlugin - a plugin for unknown files 29 29 30 30 # This is a simple Plugin for importing files in formats that … … 38 38 # movies, I add this line to the collection configuration file: 39 39 40 # plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"40 # plugin UnknownPlugin -process_exp "*.MOV" -assoc_field "movie" 41 41 42 42 # A document is created for each movie, with the associated movie … … 49 49 # You can also add extra metadata, such as the Title, Subject, and 50 50 # Duration, with metadata.xml files and RecPlug. (If you want to use 51 # UnknownPlug with more than one type of file, you will have to add51 # UnknownPlugin with more than one type of file, you will have to add 52 52 # some sort of distinguishing metadata in this way.) 53 53 54 54 55 55 56 package UnknownPlug ;56 package UnknownPlugin; 57 57 58 use Bas Plug;58 use BasePlugin; 59 59 60 60 use strict; … … 62 62 63 63 sub BEGIN { 64 @UnknownPlug ::ISA = ('BasPlug');64 @UnknownPlugin::ISA = ('BasePlugin'); 65 65 } 66 66 67 67 my $arguments = 68 68 [ { 'name' => "assoc_field", 69 'desc' => "{UnknownPlug .assoc_field}",69 'desc' => "{UnknownPlugin.assoc_field}", 70 70 'type' => "string", 71 71 'deft' => "", 72 72 'reqd' => "no" }, 73 73 { 'name' => "file_format", 74 'desc' => "{UnknownPlug .file_format}",74 'desc' => "{UnknownPlugin.file_format}", 75 75 'type' => "string", 76 76 'deft' => "", 77 77 'reqd' => "no" }, 78 78 { 'name' => "mime_type", 79 'desc' => "{UnknownPlug .mime_type}",79 'desc' => "{UnknownPlugin.mime_type}", 80 80 'type' => "string", 81 81 'deft' => "", 82 82 'reqd' => "no" }, 83 83 { 'name' => "srcicon", 84 'desc' => "{UnknownPlug .srcicon}",84 'desc' => "{UnknownPlugin.srcicon}", 85 85 'type' => "string", 86 86 'deft' => "iconunknown", 87 87 'reqd' => "no" }, 88 88 { 'name' => "process_extension", 89 'desc' => "{UnknownPlug .process_extension}",89 'desc' => "{UnknownPlugin.process_extension}", 90 90 'type' => "string", 91 91 'deft' => "", 92 92 'reqd' => "no" } ]; 93 93 94 my $options = { 'name' => "UnknownPlug ",95 'desc' => "{UnknownPlug .desc}",94 my $options = { 'name' => "UnknownPlugin", 95 'desc' => "{UnknownPlugin.desc}", 96 96 'abstract' => "no", 97 97 'inherits' => "yes", … … 107 107 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 108 108 109 my $self = new Bas Plug($pluginlist, $inputargs, $hashArgOptLists);109 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists); 110 110 111 111 # "-process_extension" is a simpler alternative to -process_exp for non-regexp people … … 117 117 } 118 118 119 sub get_default_process_exp {120 return '';121 }122 119 120 sub process { 121 my $self = shift (@_); 122 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 123 123 124 # Associate the unknown file with the new document 124 my ($filename_full_path, $filename_no_path) = $self->get_full_filenames($base_dir, $file); 125 my $outhandle = $self->{'outhandle'}; 126 my $verbosity = $self->{'verbosity'}; 125 127 126 sub associate_unknown_file { 127 my $self = shift (@_); 128 my $filename = shift (@_); # filename with full path 129 my $file = shift (@_); # filename without path 130 my $doc_obj = shift (@_); 131 132 my $verbosity = $self->{'verbosity'}; 133 my $outhandle = $self->{'outhandle'}; 134 135 # check the filename is okay 136 return 0 if ($file eq "" || $filename eq ""); 137 138 139 my $url = $file; 140 ##$url =~ s/ /%20/g; 128 # check the filename is okay - do we need this?? 129 if ($filename_full_path eq "" || $filename_no_path eq "") { 130 print $outhandle "UnknownPlugin: couldn't process \"$filename_no_path\"\n"; 131 return undef; 132 } 141 133 142 134 # Add the file as an associated file ... … … 146 138 my $assoc_field = $self->{'assoc_field'} || "unknown_file"; 147 139 148 $doc_obj->associate_file($filename , $file, $mime_type, $section);140 $doc_obj->associate_file($filename_full_path, $filename_no_path, $mime_type, $section); 149 141 $doc_obj->add_metadata ($section, "FileFormat", $file_format); 150 142 $doc_obj->add_metadata ($section, "MimeType", $mime_type); 151 $doc_obj->add_metadata ($section, $assoc_field, $file );143 $doc_obj->add_metadata ($section, $assoc_field, $filename_full_path); 152 144 153 145 $doc_obj->add_metadata ($section, "srclink", 154 146 "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[$assoc_field]\">"); 155 #$doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");156 147 $doc_obj->add_metadata ($section, "srcicon", "_".$self->{'srcicon'}."_"); 157 148 $doc_obj->add_metadata ($section, "/srclink", "</a>"); 158 149 159 # add NoText metadata which can be used to suppress the dummy text160 $ doc_obj->add_metadata ($section, "NoText", "1");150 # we have no text - add dummy text and NoText metadata 151 $self->add_dummy_text($doc_obj, $section); 161 152 162 return 1;163 }164 165 166 167 # The UnknownPlug read() function. This function does all the right168 # things to make general options work for a given plugin. UnknownPlug169 # overrides read() because there is no need to read the actual text of170 # the file in, because the contents of the file is not text...171 #172 #173 # Return number of files processed, undef if can't process174 #175 # Note that $base_dir might be "" and that $file might include directories176 177 sub read {178 my $self = shift (@_);179 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;180 181 my $outhandle = $self->{'outhandle'};182 183 # Make sure we're processing the correct file184 my ($block_status,$filename) = $self->read_block(@_);185 return $block_status if ((!defined $block_status) || ($block_status==0));186 187 print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);188 print $outhandle "UnknownPlug processing \"$filename\"\n"189 if $self->{'verbosity'} > 1;190 191 #if there's a leading directory name, eat it...192 $file =~ s/^.*[\/\\]//;193 194 # create a new document195 my $doc_obj = new doc ($filename, "indexed_doc");196 my $top_section = $doc_obj->get_top_section();197 198 $doc_obj->set_OIDtype ($processor->{'OIDtype'}, $processor->{'OIDmetadata'});199 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");200 $doc_obj->add_metadata($top_section, "Source", $file); # set the filename as Source metadata to be consistent with other plugins201 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename));202 203 # URL metadata (even invalid ones) are used to support internal204 # links, so even if 'file_is_url' is off, still need to store info205 206 my $web_url = "http://$file";207 $doc_obj->add_metadata($top_section, "URL", $web_url);208 209 210 # associate the file with the document211 if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)212 {213 if ($gli) {214 print STDERR "<ProcessingError n='$file'>\n";215 }216 print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";217 return -1; # error during processing218 }219 220 #create an empty text string so we don't break downstream plugins221 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}",1);222 223 # include any metadata passed in from previous plugins224 my $section = $doc_obj->get_top_section();225 $self->extra_metadata ($doc_obj, $section, $metadata);226 227 $self->title_fallback($doc_obj,$section,$file);228 229 # do plugin specific processing of doc_obj230 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {231 print STDERR "<ProcessingError n='$file'>\n" if ($gli);232 return -1;233 }234 235 # do any automatic metadata extraction236 $self->auto_extract_metadata ($doc_obj);237 238 # add an OID239 $doc_obj->set_OID();240 $doc_obj->add_utf8_text($section, $text);241 242 # process the document243 $processor->process($doc_obj);244 245 $self->{'num_processed'} ++;246 return 1;247 }248 249 250 # UnknownPlug processing of doc_obj. In practice we don't need to do251 # anything here because the read function takes care of everything.252 253 sub process {254 my $self = shift (@_);255 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;256 my $outhandle = $self->{'outhandle'};257 258 153 return 1; 259 154 } -
gsdl/trunk/perllib/plugins/W3ImagePlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # W3Im gPlug.pm -- Context-based image indexing plugin for HTML documents3 # W3ImagePlugin.pm -- Context-based image indexing plugin for HTML documents 4 4 # 5 5 # A component of the Greenstone digital library software … … 39 39 # collection builds at the import stage. 40 40 # 41 # W3Im gPlugis a subclass of HTMLPlug (i.e. it will index pages also42 # if required). It can be used in place of HTMLPlug to index both41 # W3ImagePlugin is a subclass of HTMLPlug (i.e. it will index pages also 42 # if required). It can be used in place of HTMLPlugin to index both 43 43 # pages and their images. 44 44 # … … 55 55 # ImageMagick can be downloaded from the website above. 56 56 # Make sure the system path includes the ImageMagick binaries 57 # before using W3Im gPlug.57 # before using W3ImagePlugin. 58 58 # 59 59 # NOTE: NT/2000/XP contain a filesystem utility 'convert.exe' … … 97 97 # ... 98 98 # 99 # plugin W3Im gPlug-index_pages -aggressiveness 699 # plugin W3ImagePlugin -index_pages -aggressiveness 6 100 100 # 101 101 # ... … … 110 110 # 111 111 112 package W3Im gPlug;113 114 use HTMLPlug ;112 package W3ImagePlugin; 113 114 use HTMLPlugin; 115 115 use ghtml; 116 116 use unicode; … … 120 120 121 121 sub BEGIN { 122 @W3Im gPlug::ISA = qw( HTMLPlug);122 @W3ImagePlugin::ISA = qw( HTMLPlugin ); 123 123 } 124 124 125 125 my $aggressiveness_list = 126 126 [ { 'name' => "1", 127 'desc' => "{W3Im gPlug.aggressiveness.1}" },127 'desc' => "{W3ImagePlugin.aggressiveness.1}" }, 128 128 { 'name' => "2", 129 'desc' => "{W3Im gPlug.aggressiveness.2}" },129 'desc' => "{W3ImagePlugin.aggressiveness.2}" }, 130 130 { 'name' => "3", 131 'desc' => "{W3Im gPlug.aggressiveness.3}" },131 'desc' => "{W3ImagePlugin.aggressiveness.3}" }, 132 132 { 'name' => "4", 133 'desc' => "{W3Im gPlug.aggressiveness.4}" },133 'desc' => "{W3ImagePlugin.aggressiveness.4}" }, 134 134 { 'name' => "5", 135 'desc' => "{W3Im gPlug.aggressiveness.5}" },135 'desc' => "{W3ImagePlugin.aggressiveness.5}" }, 136 136 { 'name' => "6", 137 'desc' => "{W3Im gPlug.aggressiveness.6}" },137 'desc' => "{W3ImagePlugin.aggressiveness.6}" }, 138 138 { 'name' => "7", 139 'desc' => "{W3Im gPlug.aggressiveness.7}" },139 'desc' => "{W3ImagePlugin.aggressiveness.7}" }, 140 140 { 'name' => "8", 141 'desc' => "{W3Im gPlug.aggressiveness.8}" },141 'desc' => "{W3ImagePlugin.aggressiveness.8}" }, 142 142 { 'name' => "9", 143 'desc' => "{W3Im gPlug.aggressiveness.9}" } ];143 'desc' => "{W3ImagePlugin.aggressiveness.9}" } ]; 144 144 145 145 my $arguments = 146 146 [ { 'name' => "aggressiveness", 147 'desc' => "{W3Im gPlug.aggressiveness}",147 'desc' => "{W3ImagePlugin.aggressiveness}", 148 148 'type' => "int", 149 149 'list' => $aggressiveness_list, … … 151 151 'reqd' => "no" }, 152 152 { 'name' => "index_pages", 153 'desc' => "{W3Im gPlug.index_pages}",153 'desc' => "{W3ImagePlugin.index_pages}", 154 154 'type' => "flag", 155 155 'reqd' => "no" }, 156 156 { 'name' => "no_cache_images", 157 'desc' => "{W3Im gPlug.no_cache_images}",157 'desc' => "{W3ImagePlugin.no_cache_images}", 158 158 'type' => "flag", 159 159 'reqd' => "no" }, 160 160 { 'name' => "min_size", 161 'desc' => "{W3Im gPlug.min_size}",161 'desc' => "{W3ImagePlugin.min_size}", 162 162 'type' => "int", 163 163 'deft' => "2000", 164 164 'reqd' => "no" }, 165 165 { 'name' => "min_width", 166 'desc' => "{W3Im gPlug.min_width}",166 'desc' => "{W3ImagePlugin.min_width}", 167 167 'type' => "int", 168 168 'deft' => "50", 169 169 'reqd' => "no" }, 170 170 { 'name' => "min_height", 171 'desc' => "{W3Im gPlug.min_height}",171 'desc' => "{W3ImagePlugin.min_height}", 172 172 'type' => "int", 173 173 'deft' => "50", 174 174 'reqd' => "no" }, 175 175 { 'name' => "thumb_size", 176 'desc' => "{W3Im gPlug.thumb_size}",176 'desc' => "{W3ImagePlugin.thumb_size}", 177 177 'type' => "int", 178 178 'deft' => "100", 179 179 'reqd' => "no" }, 180 180 { 'name' => "convert_params", 181 'desc' => "{W3Im gPlug.convert_params}",181 'desc' => "{W3ImagePlugin.convert_params}", 182 182 'type' => "string", 183 183 'deft' => "", 184 184 'reqd' => "no" }, 185 185 { 'name' => "min_near_text", 186 'desc' => "{W3Im gPlug.min_near_text}",186 'desc' => "{W3ImagePlugin.min_near_text}", 187 187 'type' => "int", 188 188 'deft' => "10", 189 189 'reqd' => "no" }, 190 190 { 'name' => "max_near_text", 191 'desc' => "{W3Im gPlug.max_near_text}",191 'desc' => "{W3ImagePlugin.max_near_text}", 192 192 'type' => "int", 193 193 'deft' => "400", 194 194 'reqd' => "no" }, 195 195 { 'name' => "smallpage_threshold", 196 'desc' => "{W3Im gPlug.smallpage_threshold}",196 'desc' => "{W3ImagePlugin.smallpage_threshold}", 197 197 'type' => "int", 198 198 'deft' => "2048", 199 199 'reqd' => "no" }, 200 200 { 'name' => "textrefs_threshold", 201 'desc' => "{W3Im gPlug.textrefs_threshold}",201 'desc' => "{W3ImagePlugin.textrefs_threshold}", 202 202 'type' => "int", 203 203 'deft' => "2", 204 204 'reqd' => "no" }, 205 205 { 'name' => "caption_length", 206 'desc' => "{W3Im gPlug.caption_length}",206 'desc' => "{W3ImagePlugin.caption_length}", 207 207 'type' => "int", 208 208 'deft' => "80", 209 209 'reqd' => "no" }, 210 210 { 'name' => "neartext_length", 211 'desc' => "{W3Im gPlug.neartext_length}",211 'desc' => "{W3ImagePlugin.neartext_length}", 212 212 'type' => "int", 213 213 'deft' => "300", 214 214 'reqd' => "no" }, 215 215 { 'name' => "document_text", 216 'desc' => "{W3Im gPlug.document_text}",216 'desc' => "{W3ImagePlugin.document_text}", 217 217 'type' => "flag", 218 218 'reqd' => "no" } ]; 219 219 220 my $options = { 'name' => "W3Im gPlug",221 'desc' => "{W3Im gPlug.desc}",220 my $options = { 'name' => "W3ImagePlugin", 221 'desc' => "{W3ImagePlugin.desc}", 222 222 'abstract' => "no", 223 223 'inherits' => "yes", … … 229 229 push(@$pluginlist, $class); 230 230 231 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}232 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};233 234 my $self = new HTMLPlug ($pluginlist, $inputargs, $hashArgOptLists);231 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 232 push(@{$hashArgOptLists->{"OptList"}},$options); 233 234 my $self = new HTMLPlugin($pluginlist, $inputargs, $hashArgOptLists); 235 235 236 236 # init class variables … … 247 247 } 248 248 249 # if indexing pages, let HTMLPlug do it's stuff249 # if indexing pages, let HTMLPlugin do it's stuff 250 250 # image extraction done through read() 251 251 sub process { 252 my ($self, $textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 252 my $self = shift(@_); 253 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 253 254 $self->{'imglist'} = (); 254 255 if ( $self->{'index_pages'} ) { … … 280 281 281 282 # get complex configuration options from configuration files 282 # -- $GSDLCOLLECTION/etc/W3Im gPlug.cfg (tag sets for aggr 2+)283 # -- $GSDLCOLLECTION/etc/W3ImagePlugin.cfg (tag sets for aggr 2+) 283 284 # -- $GSDLHOME/etc/packages/phind/stopword/en/brown.sw (stopwords for aggr 5+) 284 285 285 # If there's no W3Im gPlug.cfg file we'll use the following default values286 # If there's no W3ImagePlugin.cfg file we'll use the following default values 286 287 my $defaultcfg = ' 287 288 <delimitertagset> … … 322 323 my ($filepath); 323 324 324 print {$self->{'outhandle'}} "W3Im gPlug: Initialising\n"325 print {$self->{'outhandle'}} "W3ImagePlugin: Initialising\n" 325 326 if $self->{'verbosity'} > 1; 326 # etc/W3Im gPlug.cfg (XML)327 # etc/W3ImagePlugin.cfg (XML) 327 328 # tag sets for captions and neartext 328 329 if ( $self->{'aggressiveness'} > 1 && $self->{'aggressiveness'} != 9 ) { … … 331 332 my ($cfg, @tagsets, $tagset, $type, @delims); 332 333 333 $filepath = "$collpath/etc/W3Im gPlug.cfg";334 $filepath = "$collpath/etc/W3ImagePlugin.cfg"; 334 335 if ( open CFG, "<$filepath" ) { 335 336 while (<CFG>) { $cfg .= $_ } … … 353 354 # output a warning if there seem to be no delimiters 354 355 if ( scalar(@{$self->{'cdelims'}} == 0)) { 355 print {$self->{'outhandle'}} "W3Im gPlug: Warning: no caption delimiters found in $filepath\n";356 print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no caption delimiters found in $filepath\n"; 356 357 } 357 358 if ( scalar(@{$self->{'delims'}} == 0)) { 358 print {$self->{'outhandle'}} "W3Im gPlug: Warning: no neartext delimiters found in $filepath\n";359 print {$self->{'outhandle'}} "W3ImagePlugin: Warning: no neartext delimiters found in $filepath\n"; 359 360 } 360 361 } … … 372 373 close STOPWORDS; 373 374 } else { 374 print {$self->{'outhandle'}} "W3Im gPlug: Warning: couldn't open stopwords file at $filepath ($!)\n";375 print {$self->{'outhandle'}} "W3ImagePlugin: Warning: couldn't open stopwords file at $filepath ($!)\n"; 375 376 } 376 377 … … 379 380 if ( $self->{'neartext_length'} > $self->{'max_near_text'} ) { 380 381 $self->{'max_near_text'} = $self->{'neartext_length'} * 1.33; 381 print {$self->{'outhandle'}} "W3Im gPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";382 print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 382 383 } 383 384 if ( $self->{'caption_length'} > $self->{'max_near_text'} ) { 384 385 $self->{'max_near_text'} = $self->{'caption_length'} * 1.33; 385 print {$self->{'outhandle'}} "W3Im gPlug: Warning: adjusted max_text to $self->{'max_near_text'}\n";386 print {$self->{'outhandle'}} "W3ImagePlugin: Warning: adjusted max_text to $self->{'max_near_text'}\n"; 386 387 } 387 388 … … 396 397 my ($self, $pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = (@_); 397 398 my ($doc_obj, $section, $filepath, $imgtag, $pos, $context, $numdocs, $tndir, $imgs); 398 # forward normal read (runs HTMLPlug if index_pages T)399 # forward normal read (runs HTMLPlugin if index_pages T) 399 400 my $ok = $self->SUPER::read($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli); 400 401 if ( ! $ok ) { return $ok } # what is this returning?? … … 419 420 ($imgtag) = ($context =~ /(<(?:img|a|body)\s[^>]*$filepath[^>]*>)/is ); 420 421 if (! defined($imgtag)) { $imgtag = $filepath } 421 print $outhandle "W3Im gPlug: extracting $filepath\n"422 print $outhandle "W3ImagePlugin: extracting $filepath\n" 422 423 if ( $self->{'verbosity'} > 1 ); 423 424 $doc_obj = new doc ("", "indexed_doc"); … … 433 434 return $numdocs; 434 435 } else { 435 print $outhandle "W3Im gPlug: No images from $file indexed\n"436 print $outhandle "W3ImagePlugin: No images from $file indexed\n" 436 437 if ( $self->{'verbosity'} > 2 ); 437 438 return 1; … … 472 473 `convert -flatten -filter Hanning $self->{'convert_params'} -geometry "$self->{'thumb_size'}x$self->{'thumb_size'}>" $filepath $thumbfp` unless -e $thumbfp; 473 474 if ( ! (-e $thumbfp) ) { 474 print STDERR "W3Im gPlug: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0;475 print STDERR "W3ImagePlugin: 'convert' failed. Check ImageMagicK binaries are installed and working correctly\n"; return 0; 475 476 } 476 477 … … 853 854 } elsif ( $bestlen[$best1] < $mintext ) { 854 855 # use plain text extraction if tags failed (e.g. usable tag outside context) 855 print {$self->{'outhandle'}} "W3Im gPlug: Fallback to plain-text extraction for $tag\n"856 print {$self->{'outhandle'}} "W3ImagePlugin: Fallback to plain-text extraction for $tag\n" 856 857 if $self->{'verbosity'} > 2; 857 858 $neartext[0] = "<tr><td>RawNeartext</td><td>" . $self->extract_raw_neartext($tag, $textref) . "</td></tr>"; … … 985 986 `identify $abspath -ping -format "%wx%h"` =~ /^(\d*)x(\d*)$/m; 986 987 if (! ($width && $height)) { 987 print STDERR "W3Im gPlug: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next;988 print STDERR "W3ImagePlugin: ($abspath) 'identify' failed. Check ImageMagicK binaries are installed and working correctly\n"; next; 988 989 } 989 990 $filesize = (-s $abspath); … … 998 999 $imgs->{$filepath}{'filesize'} = $filesize; 999 1000 } else { 1000 print {$self->{'outhandle'}} "W3Im gPlug: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n"1001 print {$self->{'outhandle'}} "W3ImagePlugin: skipping $self->{'base_path'}/$relpath: $filesize, $width x $height\n" 1001 1002 if $self->{'verbosity'} > 2; 1002 1003 } … … 1029 1030 } 1030 1031 1031 # HTMLPlug only extracts meta-data if it is specified in plugin options1032 # HTMLPlugin only extracts meta-data if it is specified in plugin options 1032 1033 # hence a special function to do it here 1033 1034 sub get_meta_value { … … 1048 1049 # so we can go straight to the image 1049 1050 # within the cached version of the source page 1050 # (augment's HTMLPlug sub)1051 # (augment's HTMLPlugin sub) 1051 1052 sub replace_images { 1052 1053 my $self = shift (@_); -
gsdl/trunk/perllib/plugins/WordPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # WordPlug .pm -- plugin for importing Microsoft Word documents3 # WordPlugin.pm -- plugin for importing Microsoft Word documents 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 25 25 # 12/05/02 Added usage datastructure - John Thompson 26 26 27 package WordPlug ;28 29 use Convert ToPlug;27 package WordPlugin; 28 29 use ConvertBinaryFile; 30 30 use strict; 31 31 no strict 'refs'; # allow filehandles to be variables and viceversa 32 32 33 33 sub BEGIN { 34 @WordPlug ::ISA = ('ConvertToPlug');34 @WordPlugin::ISA = ('ConvertBinaryFile'); 35 35 } 36 36 37 37 my $arguments = 38 38 [ { 'name' => "process_exp", 39 'desc' => "{Bas Plug.process_exp}",39 'desc' => "{BasePlugin.process_exp}", 40 40 'type' => "regexp", 41 41 'deft' => &get_default_process_exp(), 42 42 'reqd' => "no" }, 43 43 { 'name' => "description_tags", 44 'desc' => "{HTMLPlug .description_tags}",44 'desc' => "{HTMLPlugin.description_tags}", 45 45 'type' => "flag" } 46 46 ]; 47 47 48 my $options = { 'name' => "WordPlug ",49 'desc' => "{WordPlug .desc}",48 my $options = { 'name' => "WordPlugin", 49 'desc' => "{WordPlugin.desc}", 50 50 'abstract' => "no", 51 51 'inherits' => "yes", … … 60 60 if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 61 61 my $ws_arg = [ { 'name' => "windows_scripting", 62 'desc' => "{WordPlug .windows_scripting}",62 'desc' => "{WordPlugin.windows_scripting}", 63 63 'type' => "flag", 64 64 'reqd' => "no" }, … … 67 67 'deft' => "Title" }, 68 68 { 'name' => "level1_header", 69 'desc' => "{StructuredHTMLPlug .level1_header}",69 'desc' => "{StructuredHTMLPlugin.level1_header}", 70 70 'type' => "regexp", 71 71 'reqd' => "no", 72 72 'deft' => "" }, 73 73 { 'name' => "level2_header", 74 'desc' => "{StructuredHTMLPlug .level2_header}",74 'desc' => "{StructuredHTMLPlugin.level2_header}", 75 75 'type' => "regexp", 76 76 'reqd' => "no", 77 77 'deft' => "" }, 78 78 { 'name' => "level3_header", 79 'desc' => "{StructuredHTMLPlug .level3_header}",79 'desc' => "{StructuredHTMLPlugin.level3_header}", 80 80 'type' => "regexp", 81 81 'reqd' => "no", 82 82 'deft' => "" }, 83 83 { 'name' => "title_header", 84 'desc' => "{StructuredHTMLPlug .title_header}",84 'desc' => "{StructuredHTMLPlugin.title_header}", 85 85 'type' => "regexp", 86 86 'reqd' => "no", 87 87 'deft' => "" }, 88 88 { 'name' => "delete_toc", 89 'desc' => "{StructuredHTMLPlug .delete_toc}",89 'desc' => "{StructuredHTMLPlugin.delete_toc}", 90 90 'type' => "flag", 91 91 'reqd' => "no", … … 94 94 'modegli' => "3"}, 95 95 { 'name' => "toc_header", 96 'desc' => "{StructuredHTMLPlug .toc_header}",96 'desc' => "{StructuredHTMLPlugin.toc_header}", 97 97 'type' => "regexp", 98 98 'reqd' => "no", … … 103 103 } 104 104 105 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}106 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};107 108 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);105 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 106 push(@{$hashArgOptLists->{"OptList"}},$options); 107 108 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 109 109 110 110 if ($self->{'info_only'}) { … … 113 113 } 114 114 115 #this is passed through to gsConvert.pl by ConvertToPlug.pm 115 $self->{'filename_extension'} = "doc"; 116 $self->{'file_type'} = "Word"; 117 118 #this is passed through to gsConvert.pl by ConvertBinaryFile.pm 116 119 $self->{'convert_options'} = "-windows_scripting" if $self->{'windows_scripting'}; 117 120 … … 123 126 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 124 127 if (defined $self->{'windows_scripting'}) { 125 if (!defined $secondary_plugin_options->{'StructuredHTMLPlug '}){126 $secondary_plugin_options->{'StructuredHTMLPlug '} = [];127 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug '};128 if (!defined $secondary_plugin_options->{'StructuredHTMLPlugin'}){ 129 $secondary_plugin_options->{'StructuredHTMLPlugin'} = []; 130 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'}; 128 131 129 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)132 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 130 133 # to extract these metadata fields from the HEAD META fields 131 134 push (@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); … … 142 145 } 143 146 } 144 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {145 $secondary_plugin_options->{'HTMLPlug '} = [];146 } 147 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {148 $secondary_plugin_options->{'T EXTPlug'} = [];149 } 150 151 my $html_options = $secondary_plugin_options->{'HTMLPlug '};152 my $text_options = $secondary_plugin_options->{'TextPlug '};153 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug '};154 # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlug knows this147 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 148 $secondary_plugin_options->{'HTMLPlugin'} = []; 149 } 150 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 151 $secondary_plugin_options->{'TextPlugin'} = []; 152 } 153 154 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 155 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 156 my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlugin'}; 157 # wvWare will always produce html files encoded as utf-8, so make sure the secondary HTMLPlugin knows this 155 158 push(@$html_options,"-input_encoding", "utf8"); 156 159 push(@$html_options,"-extract_language") if $self->{'extract_language'}; 157 160 push(@$html_options, "-description_tags") if $self->{'description_tags'}; 158 161 159 # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)162 # Instruct HTMLPlugin (when eventually accessed through read_into_doc_obj) 160 163 # to extract these metadata fields from the HEAD META fields 161 164 push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>"); … … 181 184 } 182 185 183 sub convert_post_process 186 sub convert_post_process_old 184 187 { 185 188 my $self = shift (@_); … … 199 202 # Write it out again! 200 203 #$self->utf8_write_file (\$text, $conv_filename); 201 }202 203 sub get_file_type {204 my $self = shift (@_);205 my $file_type = "Word";206 return $file_type;207 204 } 208 205 … … 230 227 } 231 228 232 # do plugin specific processing of doc_obj for HTML type233 sub process {234 my $self = shift (@_);235 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;236 237 return $self->process_type("doc", $base_dir, $file, $doc_obj);238 }239 229 240 230 1; -
gsdl/trunk/perllib/plugins/ZIPPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # ZIPPlug .pm --3 # ZIPPlugin.pm -- 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 44 44 45 45 46 package ZIPPlug ;46 package ZIPPlugin; 47 47 48 use BasPlug;48 use AbstractPlugin; 49 49 use plugin; 50 50 use util; … … 55 55 56 56 BEGIN { 57 @ZIPPlug ::ISA = ('BasPlug');57 @ZIPPlugin::ISA = ('AbstractPlugin'); 58 58 } 59 59 60 60 my $arguments = 61 61 [ { 'name' => "process_exp", 62 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 63 63 'type' => "string", 64 64 'deft' => &get_default_process_exp(), 65 65 'reqd' => "no" } ]; 66 66 67 my $options = { 'name' => "ZIPPlug ",68 'desc' => "{ZIPPlug .desc}",67 my $options = { 'name' => "ZIPPlugin", 68 'desc' => "{ZIPPlugin.desc}", 69 69 'abstract' => "no", 70 70 'inherits' => "yes", … … 77 77 push(@$pluginlist, $class); 78 78 79 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}80 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 80 push(@{$hashArgOptLists->{"OptList"}},$options); 81 81 82 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);82 my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists); 83 83 84 84 return bless $self, $class; … … 112 112 &util::mk_all_dir ($tmpdir); 113 113 114 print $outhandle "ZIPPlug : extracting $file_only to $tmpdir\n"114 print $outhandle "ZIPPlugin: extracting $file_only to $tmpdir\n" 115 115 if $self->{'verbosity'} > 1; 116 116
Note:
See TracChangeset
for help on using the changeset viewer.