Changeset 10353
- Timestamp:
- 2005-07-29T14:27:19+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/PDFPlug.pm
r10273 r10353 23 23 # 24 24 ########################################################################### 25 package PDFPlug; 26 27 use ConvertToPlug; 28 use unicode; 25 29 use strict; 26 30 no strict 'refs'; # so we can use a var for filehandles (eg STDERR) 27 28 package PDFPlug;29 30 use ConvertToPlug;31 use unicode;32 31 33 32 sub BEGIN { … … 74 73 push(@$pluginlist, $class); 75 74 75 push(@$inputargs,"-title_sub"); 76 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 77 78 #foreach my $temp (@$inputargs) 79 #{ 80 #print STDERR "($temp)\n"; 81 #} 82 76 83 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});} 77 84 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)}; 78 79 push(@$inputargs,"-title_sub"); 80 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 81 85 82 86 my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs); 83 84 85 #if ($self->{"use_sections"}) { 86 #$self->{"description_tags"} = 1; 87 #} 88 87 89 88 # these are passed through to gsConvert.pl by ConvertToPlug.pm 90 89 my $zoom = $self->{"zoom"}; … … 102 101 $secondary_plugin_options->{'TEXTPlug'} = []; 103 102 } 104 103 if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 104 if (!defined $secondary_plugin_options->{'PagedImgPlug'}){ 105 $secondary_plugin_options->{'PagedImgPlug'} = []; 106 my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 107 push(@$pagedimg_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?') 108 109 } 110 } 105 111 my $html_options = $secondary_plugin_options->{'HTMLPlug'}; 106 112 my $text_options = $secondary_plugin_options->{'TEXTPlug'}; 113 #my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'}; 107 114 108 115 if ($self->{'input_encoding'} eq "auto") { … … 110 117 # => restrict primary PDFPlug and secondary HTML plugin to use 111 118 # utf8 and extract language. 112 113 119 $self->{'input_encoding'} = "utf8"; 114 120 $self->{'extract_language'} = 1; … … 126 132 push(@$html_options,"-description_tags"); 127 133 } 128 129 134 # following title_sub removes "Page 1" added by pdftohtml, and a leading 130 135 # "1", which is often the page number at the top of the page. Bad Luck … … 132 137 push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 133 138 push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 139 #push(@$pagedimg_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 134 140 135 141 $self = bless $self, $class; 136 137 142 $self->load_secondary_plugins($class,$secondary_plugin_options); 138 139 143 return $self; 140 144 } … … 170 174 # be useful to give an indication of document length in browser through setting 171 175 # num_pages as metadata. 172 173 176 my @pages = ($text =~ /\<[Aa] name=\"?\w+\"?>/ig); 174 177 my $num_pages = scalar(@pages); … … 275 278 276 279 $doc_obj->add_utf8_metadata($cursection, "NumPages", $self->{'num_pages'}); 277 278 280 279 281 if ($self->{'use_sections'} && $self->{'converted_to'} eq "HTML") {
Note:
See TracChangeset
for help on using the changeset viewer.