Changeset 10274
- Timestamp:
- 2005-07-25T11:45:34+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/PSPlug.pm
r10254 r10274 79 79 my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs); 80 80 81 return bless $self, $class; 81 82 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 83 if (!defined $secondary_plugin_options->{'TEXTPlug'}) { 84 $secondary_plugin_options->{'TEXTPlug'} = []; 85 } 86 87 my $text_options = $secondary_plugin_options->{'TEXTPlug'}; 88 89 # following title_sub removes "Page 1" added by ps2ascii, and a leading 90 # "1", which is often the page number at the top of the page. Bad Luck 91 # if your document title actually starts with "1 " - is there a better way? 92 #$self->{'input_encoding'} = "utf8"; 93 #$self->{'extract_language'} = 1; 94 #push(@$text_options, "-input_encoding", "utf8"); 95 #push(@$text_options,"-extract_language"); 96 push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 97 98 $self = bless $self, $class; 99 100 $self->load_secondary_plugins($class,$secondary_plugin_options); 101 102 return $self; 82 103 } 83 104 … … 95 116 } 96 117 118 sub convert_post_process 119 { 120 my $self = shift (@_); 121 my ($conv_filename) = @_; 122 123 my $outhandle=$self->{'outhandle'}; 124 125 my ($language, $encoding) = $self->textcat_get_language_encoding ($conv_filename); 126 127 # print STDERR "*** Plug encoding = $encoding\n"; 128 129 # read in file ($text will be in utf8) 130 my $text = ""; 131 $self->read_file ($conv_filename, $encoding, $language, \$text); 132 133 # turn any high bytes that aren't valid utf-8 into utf-8. 134 unicode::ensure_utf8(\$text); 135 136 #print STDERR "*** Text =$text\n"; 137 # Write it out again! 138 $self->utf8_write_file (\$text, $conv_filename); 139 } 140 97 141 sub extract_metadata_from_postscript { 98 142 my $self = shift (@_); 99 my $filename = shift (@_); 100 my $doc = shift (@_); 143 144 my ($filename,$doc) = @_; 145 101 146 my $section = $doc->get_top_section(); 102 147 … … 166 211 sub process { 167 212 my $self = shift (@_); 168 my ($trash, $trash2, $path, $file, $trash3, $doc, $gli) = @_; 169 170 my $outhandle = $self->{'outhandle'}; 171 print $outhandle "PSPlug: passing $_[3] on to $self->{'converted_to'}Plug\n" 172 if $self->{'verbosity'} > 1; 173 print STDERR "<Processing n='$_[3]' p='PSPlug'>\n" if ($gli); 174 175 &extract_metadata_from_postscript($self,"$path/$file", $doc); 176 177 return ConvertToPlug::process_type($self,"ps",@_); 213 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 214 215 # my $outhandle = $self->{'outhandle'}; 216 217 # print $outhandle "PSPlug: passing $file on to $self->{'converted_to'}Plug\n" 218 # if $self->{'verbosity'} > 1; 219 # print STDERR "<Processing n='$file' p='PSPlug'>\n" if ($gli); 220 221 my $filename = &util::filename_cat($base_dir,$file); 222 $self->extract_metadata_from_postscript($filename, $doc_obj); 223 224 return $self->process_type("ps",$base_dir,$file,$doc_obj); 178 225 } 179 226
Note:
See TracChangeset
for help on using the changeset viewer.