Changeset 15872 for gsdl/trunk/perllib/plugins/PDFPlugin.pm
- Timestamp:
- 2008-06-05T09:29:32+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/perllib/plugins/PDFPlugin.pm
r15865 r15872 1 1 ########################################################################### 2 2 # 3 # PDFPlug .pm -- reasonably with-it pdf plugin3 # PDFPlugin.pm -- reasonably with-it pdf plugin 4 4 # A component of the Greenstone digital library software 5 5 # from the New Zealand Digital Library Project at the … … 23 23 # 24 24 ########################################################################### 25 package PDFPlug; 26 27 use ConvertToPlug; 25 package PDFPlugin; 26 27 use ConvertBinaryFile; 28 use ReadTextFile; 28 29 use unicode; 29 30 use strict; … … 31 32 32 33 sub BEGIN { 33 @PDFPlug ::ISA = ('ConvertToPlug');34 @PDFPlugin::ISA = ('ConvertBinaryFile', 'ReadTextFile'); 34 35 } 35 36 36 37 my $convert_to_list = 37 38 [ { 'name' => "auto", 38 'desc' => "{Convert ToPlug.convert_to.auto}" },39 'desc' => "{ConvertBinaryFile.convert_to.auto}" }, 39 40 { 'name' => "html", 40 'desc' => "{Convert ToPlug.convert_to.html}" },41 'desc' => "{ConvertBinaryFile.convert_to.html}" }, 41 42 { 'name' => "text", 42 'desc' => "{Convert ToPlug.convert_to.text}" },43 'desc' => "{ConvertBinaryFile.convert_to.text}" }, 43 44 { 'name' => "pagedimg_jpg", 44 'desc' => "{Convert ToPlug.convert_to.pagedimg_jpg}"},45 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_jpg}"}, 45 46 { 'name' => "pagedimg_gif", 46 'desc' => "{Convert ToPlug.convert_to.pagedimg_gif}"},47 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_gif}"}, 47 48 { 'name' => "pagedimg_png", 48 'desc' => "{Convert ToPlug.convert_to.pagedimg_png}"},49 'desc' => "{ConvertBinaryFile.convert_to.pagedimg_png}"}, 49 50 ]; 50 51 … … 53 54 [ 54 55 { 'name' => "convert_to", 55 'desc' => "{Convert ToPlug.convert_to}",56 'desc' => "{ConvertBinaryFile.convert_to}", 56 57 'type' => "enum", 57 58 'reqd' => "yes", … … 59 60 'deft' => "html" }, 60 61 { 'name' => "process_exp", 61 'desc' => "{Bas Plug.process_exp}",62 'desc' => "{BasePlugin.process_exp}", 62 63 'type' => "regexp", 63 64 'deft' => &get_default_process_exp(), 64 65 'reqd' => "no" }, 65 66 { 'name' => "block_exp", 66 'desc' => "{Bas Plug.block_exp}",67 'desc' => "{BasePlugin.block_exp}", 67 68 'type' => "regexp", 68 69 'deft' => &get_default_block_exp() }, 69 70 { 'name' => "metadata_fields", 70 'desc' => "{HTMLPlug .metadata_fields}",71 'desc' => "{HTMLPlugin.metadata_fields}", 71 72 'type' => "string", 72 73 'deft' => "" }, 73 74 { 'name' => "noimages", 74 'desc' => "{PDFPlug .noimages}",75 'desc' => "{PDFPlugin.noimages}", 75 76 'type' => "flag" }, 76 77 { 'name' => "allowimagesonly", 77 'desc' => "{PDFPlug .allowimagesonly}",78 'desc' => "{PDFPlugin.allowimagesonly}", 78 79 'type' => "flag" }, 79 80 { 'name' => "complex", 80 'desc' => "{PDFPlug .complex}",81 'desc' => "{PDFPlugin.complex}", 81 82 'type' => "flag" }, 82 83 { 'name' => "nohidden", 83 'desc' => "{PDFPlug .nohidden}",84 'desc' => "{PDFPlugin.nohidden}", 84 85 'type' => "flag" }, 85 86 { 'name' => "zoom", 86 'desc' => "{PDFPlug .zoom}",87 'desc' => "{PDFPlugin.zoom}", 87 88 'deft' => "2", 88 89 'range' => "1,3", # actually the range is 0.5-3 89 90 'type' => "int" }, 90 91 { 'name' => "use_sections", 91 'desc' => "{PDFPlug .use_sections}",92 'desc' => "{PDFPlugin.use_sections}", 92 93 'type' => "flag" }, 93 94 { 'name' => "description_tags", 94 'desc' => "{HTMLPlug .description_tags}",95 'desc' => "{HTMLPlugin.description_tags}", 95 96 'type' => "flag" } 96 97 ]; 97 98 98 my $options = { 'name' => "PDFPlug ",99 'desc' => "{PDFPlug .desc}",99 my $options = { 'name' => "PDFPlugin", 100 'desc' => "{PDFPlugin.desc}", 100 101 'abstract' => "no", 101 102 'inherits' => "yes", … … 111 112 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 112 113 113 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}114 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};114 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 115 push(@{$hashArgOptLists->{"OptList"}},$options); 115 116 116 117 my @arg_array = @$inputargs; 117 my $self = new Convert ToPlug($pluginlist, $inputargs, $hashArgOptLists);118 my $self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 118 119 119 120 if ($self->{'info_only'}) { … … 122 123 } 123 124 124 # these are passed through to gsConvert.pl by ConvertToPlug.pm 125 $self->{'filename_extension'} = "pdf"; 126 $self->{'file_type'} = "PDF"; 127 128 # these are passed through to gsConvert.pl by ConvertBinaryFile.pm 125 129 my $zoom = $self->{"zoom"}; 126 130 $self->{'convert_options'} = "-pdf_zoom $zoom"; … … 132 136 my $secondary_plugin_options = $self->{'secondary_plugin_options'}; 133 137 134 if (!defined $secondary_plugin_options->{'HTMLPlug '}) {135 $secondary_plugin_options->{'HTMLPlug '} = [];136 } 137 if (!defined $secondary_plugin_options->{'T EXTPlug'}) {138 $secondary_plugin_options->{'T EXTPlug'} = [];138 if (!defined $secondary_plugin_options->{'HTMLPlugin'}) { 139 $secondary_plugin_options->{'HTMLPlugin'} = []; 140 } 141 if (!defined $secondary_plugin_options->{'TextPlugin'}) { 142 $secondary_plugin_options->{'TextPlugin'} = []; 139 143 } 140 144 if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) { 141 if (!defined $secondary_plugin_options->{'PagedIm gPlug'}){142 $secondary_plugin_options->{'PagedIm gPlug'} = [];143 my $pagedimg_options = $secondary_plugin_options->{'PagedIm gPlug'};145 if (!defined $secondary_plugin_options->{'PagedImagePlugin'}){ 146 $secondary_plugin_options->{'PagedImagePlugin'} = []; 147 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 144 148 push(@$pagedimg_options, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 145 149 } 146 150 } 147 my $html_options = $secondary_plugin_options->{'HTMLPlug '};148 my $text_options = $secondary_plugin_options->{'T EXTPlug'};149 my $pagedimg_options = $secondary_plugin_options->{'PagedIm gPlug'};151 my $html_options = $secondary_plugin_options->{'HTMLPlugin'}; 152 my $text_options = $secondary_plugin_options->{'TextPlugin'}; 153 my $pagedimg_options = $secondary_plugin_options->{'PagedImagePlugin'}; 150 154 151 155 if ($self->{'input_encoding'} eq "auto") { … … 227 231 && $self->{'converted_to'} eq "HTML") { 228 232 229 print $outhandle "PDFPlug : Calculating sections...\n";233 print $outhandle "PDFPlugin: Calculating sections...\n"; 230 234 231 235 # we have "<a name=1></a>" etc for each page … … 236 240 237 241 if (scalar (@sections) == 1) { #only one section - no split! 238 print $outhandle "PDFPlug : warning - no sections found\n";242 print $outhandle "PDFPlugin: warning - no sections found\n"; 239 243 } else { 240 244 $top_section .= shift @sections; # keep HTML header etc as top_section … … 274 278 $title = " "; # get rid of the undefined warning in next line 275 279 } 276 my $newsection = "<!-- from PDFPlug -->\n<!-- <Section>\n";280 my $newsection = "<!-- from PDFPlugin -->\n<!-- <Section>\n"; 277 281 $newsection .= "<Metadata name=\"Title\">" . $title 278 282 . "</Metadata>\n--><p>\n"; … … 296 300 sub process { 297 301 my $self = shift (@_); 298 my ($ textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;302 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 299 303 300 304 my $result = $self->process_type("pdf",$base_dir,$file,$doc_obj);
Note:
See TracChangeset
for help on using the changeset viewer.