Changeset 32290
- Timestamp:
- 2018-07-19T19:54:32+12:00 (6 years ago)
- Location:
- main/trunk/greenstone2
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/gsConvert.pl
r32287 r32290 124 124 "type/$type_re/", \$input_type, 125 125 '/errlog/.*/', \$faillogfile, 126 'output/(auto|html|text|pagedimg).*/', \$output_type, # regex includes html_multi and paged_html besides html126 'output/(auto|html|text|pagedimg).*/', \$output_type, # regex includes html_multi and (paged_)pretty_html besides html, as well as pagedimgtxt_<imgext> besides pagedimg_<imgext> 127 127 'timeout/\d+/0',\$timeout, 128 128 'verbose/\d+/0', \$verbose, … … 360 360 elsif ($pdf_tool eq "xpdftools" ) { 361 361 362 # default to p rettyhtml output362 # default to paged_pretty_html output 363 363 if (!$output_type) { 364 $output_type = "p retty_html";364 $output_type = "paged_pretty_html"; 365 365 } 366 366 -
main/trunk/greenstone2/perllib/plugins/PDFPlugin.pm
r32289 r32290 165 165 if ($self->{'use_realistic_book'}) { 166 166 if ($self->{'convert_to'} ne "html") { 167 print STDERR "PDFs will be converted to HTML for realistic book functionality\n";167 &gsprintf::gsprintf(STDERR, "PDFv2Plugin: {PDFPlugin.html_for_realistic_book}\n"); 168 168 $self->{'convert_to'} = "html"; 169 169 } -
main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm
r32287 r32290 76 76 'reqd' => "yes", 77 77 'list' => $convert_to_list, 78 'deft' => "p retty_html" },78 'deft' => "paged_pretty_html" }, 79 79 { 'name' => "process_exp", 80 80 'desc' => "{BaseImporter.process_exp}", … … 86 86 'type' => "regexp", 87 87 'deft' => &get_default_block_exp() }, 88 { 'name' => "metadata_fields",89 'desc' => "{HTMLPlugin.metadata_fields}",90 'type' => "string",91 'deft' => "Title,Author,Subject,Keywords" },92 { 'name' => "metadata_field_separator",93 'desc' => "{HTMLPlugin.metadata_field_separator}",94 'type' => "string",95 'deft' => "" },88 # { 'name' => "metadata_fields", 89 # 'desc' => "{HTMLPlugin.metadata_fields}", 90 # 'type' => "string", 91 # 'deft' => "Title,Author,Subject,Keywords" }, 92 # { 'name' => "metadata_field_separator", 93 # 'desc' => "{HTMLPlugin.metadata_field_separator}", 94 # 'type' => "string", 95 # 'deft' => "" }, 96 96 { 'name' => "dpi", 97 97 'desc' => "{PDFv2Plugin.dpi}", … … 106 106 { 'name' => "use_realistic_book", 107 107 'desc' => "{PDFPlugin.use_realistic_book}", 108 'type' => "flag" }108 'type' => "flag" } 109 109 ]; 110 110 … … 126 126 my $pdfbox_converter_self = new PDFBoxConverter($pluginlist, $inputargs, $hashArgOptLists); 127 127 my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 128 my $self = BaseImporter::merge_inheritance($pdfbox_converter_self, $cbf_self); 128 my $self = BaseImporter::merge_inheritance($pdfbox_converter_self, $cbf_self); # this param order seems necessary to preserve the default/user-selected value for the convert_to option 129 129 130 130 if ($self->{'info_only'}) { … … 151 151 152 152 if ($self->{'convert_to'} eq "auto") { 153 # choose pretty_html is the best default option when using xpdftools 154 $self->{'convert_to'} = "pretty_html"; 153 # defaulting to paged_pretty_html, as it's the best default option when using xpdftools 154 $self->{'convert_to'} = "paged_pretty_html"; 155 &gsprintf::gsprintf(STDERR, "PDFv2Plugin: {PDFv2Plugin.auto_output_default}\n", $self->{'convert_to'}); 155 156 } 156 157 if ($self->{'use_realistic_book'}) { 157 158 if ($self->{'convert_to'} ne "html") { 158 print STDERR "PDFs will be converted to HTML for realistic book functionality\n";159 &gsprintf::gsprintf(STDERR, "PDFv2Plugin: {PDFPlugin.html_for_realistic_book}\n"); 159 160 $self->{'convert_to'} = "html"; 160 161 } … … 424 425 # Copying file open/close code from CommonUtil::utf8_write_file() 425 426 if (!open (OUTFILE, ">:utf8", $output_filename)) { 426 gsprintf(STDERR, "PDFv2Plugin::xpdftohtml_convert_post_process {CommonUtil.could_not_open_for_writing} ($!)\n", $output_filename);427 &gsprintf::gsprintf(STDERR, "PDFv2Plugin::xpdftohtml_convert_post_process {CommonUtil.could_not_open_for_writing} ($!)\n", $output_filename); 427 428 die "\n"; 428 429 } -
main/trunk/greenstone2/perllib/strings.properties
r32287 r32290 845 845 ConvertBinaryFile.convert_to.text:Plain text format. 846 846 847 ConvertBinaryFile.convert_to.paged_text: Text separately extracted for each individual page.847 ConvertBinaryFile.convert_to.paged_text:Sectionalised plain text, where every page's text is its own section. 848 848 849 849 ConvertBinaryFile.convert_to.pagedimg:A series of images. … … 1171 1171 PDFPlugin.complex:Create more complex output. With this option set the output html will look much more like the original PDF file. For this to function properly you Ghostscript installed (for *nix gs should be on your path while for windows you must have gswin32c.exe on your path). 1172 1172 1173 PDFPlugin.convert_to.html: HTML. Text only, no images.1174 1175 PDFPlugin.convert_to.pretty_html: A series of HTML pages, one for each page. Each HTML page contains selectable text positionally overlaid on top of a screenshot of the PDF page background comprising any images, tables and drawings.1176 1177 PDFPlugin.convert_to.paged_pretty_html:Sectionalised variant of pretty_html to allow jumping to individual pages.1173 PDFPlugin.convert_to.html:very basic HTML comprising just the extracted text, no images. 1174 1175 PDFPlugin.convert_to.pretty_html:Each PDF page as HTML containing selectable text positionally overlaid on top of a textless screenshot of the PDF page. 1176 1177 PDFPlugin.convert_to.paged_pretty_html:Sectionalised pretty_html, where each page's html is its own section. 1178 1178 1179 1179 PDFPlugin.deprecated_plugin:*************IMPORTANT******************\nPDFPlugin is being deprecated.\nConsider upgrading to the recommended PDFv2Plugin, which supports newer versions of PDFs.\nAlternatively, if you wish to retain the old style of conversion and are NOT relying on PDFBox,\nchange to PDFv1Plugin.\nIf you are using PDFBox then upgrade to PDFv2Plugin.\n*****************************************\n … … 1181 1181 PDFPlugin.desc:Plugin that processes PDF documents using the older pdftohtml tool. Does not support newer PDF versions. 1182 1182 1183 PDFPlugin.html_for_realistic_book:PDFs will be converted to HTML for realistic book functionality 1184 1183 1185 PDFPlugin.nohidden:Prevent pdftohtml from attempting to extract hidden text. This is only useful if the -complex option is also set. 1184 1186 1185 1187 PDFPlugin.noimages:Don't attempt to extract images from PDF. 1186 1188 1189 PDFv2Plugin.auto_output_default:Defaulting to output format %s 1190 1187 1191 PDFPlugin.use_realistic_book:Converts the PDF to a well-formed XHTML document to enable users view it in the realistic book format. 1188 1192 … … 1195 1199 PDFv1Plugin.zoom:The factor by which to zoom the PDF for output. Only useful if -complex is set. 1196 1200 1197 PDFv2Plugin.dpi:The resolution in DPI of background images generated for pagedimg(txt) and (paged_)pretty_html output settings.1201 PDFv2Plugin.dpi:The resolution in DPI of background images generated when convert_to is set to any of the pagedimg(txt) and (paged_)pretty_html formats. 1198 1202 1199 1203 PostScriptPlugin.desc:This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html
Note:
See TracChangeset
for help on using the changeset viewer.