Changeset 15871 for gsdl/trunk/perllib/plugins/SplitTextFile.pm
- Timestamp:
- 2008-06-05T09:26:56+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/SplitTextFile.pm
r15865 r15871 1 1 ########################################################################### 2 2 # 3 # Split Plug.pm - a plugin for splitting input files into segments that3 # SplitTextFile.pm - a plugin for splitting input files into segments that 4 4 # will then be individually processed. 5 5 # … … 29 29 30 30 31 # Split Plugis a plugin for splitting input files into segments that will31 # SplitTextFile is a plugin for splitting input files into segments that will 32 32 # then be individually processed. 33 33 … … 35 35 # process input files that contain several documents, you should write a 36 36 # plugin with a process function that will handle one of those documents 37 # and have it inherit from Split Plug. See ReferPlug for an example.38 39 40 package Split Plug;41 42 use BasPlug;37 # and have it inherit from SplitTextFile. See ReferPlug for an example. 38 39 40 package SplitTextFile; 41 42 use ReadTextFile; 43 43 use gsprintf 'gsprintf'; 44 44 use util; … … 47 47 no strict 'refs'; # allow filehandles to be variables and viceversa 48 48 49 # Split Plugis a sub-class of BasPlug.49 # SplitTextFile is a sub-class of BasPlug. 50 50 sub BEGIN { 51 @Split Plug::ISA = ('BasPlug');51 @SplitTextFile::ISA = ('ReadTextFile'); 52 52 } 53 53 … … 55 55 my $arguments = 56 56 [ { 'name' => "split_exp", 57 'desc' => "{Split Plug.split_exp}",57 'desc' => "{SplitTextFile.split_exp}", 58 58 'type' => "regexp", 59 59 #'deft' => &get_default_split_exp(), … … 61 61 'reqd' => "no" } ]; 62 62 63 my $options = { 'name' => "Split Plug",64 'desc' => "{Split Plug.desc}",63 my $options = { 'name' => "SplitTextFile", 64 'desc' => "{SplitTextFile.desc}", 65 65 'abstract' => "yes", 66 66 'inherits' => "yes", … … 73 73 push(@$pluginlist, $class); 74 74 75 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}76 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};77 78 my $self = new BasPlug($pluginlist, $inputargs, $hashArgOptLists);75 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 76 push(@{$hashArgOptLists->{"OptList"}},$options); 77 78 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists); 79 79 80 80 $self->{'textcat_store'} = {}; … … 87 87 my ($verbosity, $outhandle, $failhandle) = @_; 88 88 89 $self->BasPlug::init($verbosity, $outhandle, $failhandle); 90 89 $self->ReadTextFile::init($verbosity, $outhandle, $failhandle); 90 91 # why is this is init and not in new?? 91 92 if ((!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) { 92 93 … … 119 120 my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_; 120 121 122 # returns 1 if matches process_exp, and has done blocking in the meantime 121 123 my $matched = $self->SUPER::metadata_read($pluginfo, $base_dir, $file, 122 124 $metadata, $extrametakeys, … … 146 148 147 149 if ($text !~ /\w/) { 148 gsprintf($outhandle, "$plugin_name: { BasPlug.file_has_no_text}\n",150 gsprintf($outhandle, "$plugin_name: {ReadTextFile.file_has_no_text}\n", 149 151 $file) 150 152 if $self->{'verbosity'}; … … 171 173 } 172 174 173 print $outhandle "Split Plugfound " . (scalar @segments) . " documents in $filename\n"175 print $outhandle "SplitTextFile found " . (scalar @segments) . " documents in $filename\n" 174 176 if $self->{'verbosity'}; 175 177 … … 231 233 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Encoding", $encoding); 232 234 my ($filemeta) = $file =~ /([^\\\/]+)$/; 233 $ doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($filemeta));235 $self->set_Source_metadata($doc_obj, $filemeta, $encoding); 234 236 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "SourceSegment", "$segment"); 235 237 if ($self->{'cover_image'}) {
Note:
See TracChangeset
for help on using the changeset viewer.