Changeset 32283 for main/trunk


Ignore:
Timestamp:
2018-07-17T22:19:34+12:00 (6 years ago)
Author:
ak19
Message:

More stable behaviour by PDFv2Plugin: 1. when pdfbox_conversion is on, but an output option supported by xpdftools is selected, it now uses xpdftools anyway instead of attempting to use pdfbox_conversion. 2. when pdfbox_conversion is not on and an output format that it alone supports (and not supported by xpdftools) is selected, a warning message is displayed that xpdftools will be used to output to a fallback output format and that the user to switch on pdfbox_conversion otherwise. This message was present and displayed in a recent commit, but the behaviour was not set up yet then. In future, we may solve this differently if it's decided that PDFBoxConverter is not an AutoLoadConverter and will therefore be always available with PDFv2Plugin (but what about GS2, where PDFBox is an optional extension?)

Location:
main/trunk/greenstone2/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32280 r32283  
    109109       'deft' => "1",
    110110       'type' => "string" }, # xpdftools' zoom takes fractions
    111      { 'name' => "use_sections",
    112        'desc' => "{PDFPlugin.use_sections}",
    113        'type' => "flag" },
    114      { 'name' => "description_tags",
    115        'desc' => "{HTMLPlugin.description_tags}",
    116        'type' => "flag" },
     111#     { 'name' => "use_sections",
     112#       'desc' => "{PDFPlugin.use_sections}",
     113#       'type' => "flag" },
     114#     { 'name' => "description_tags",
     115#       'desc' => "{HTMLPlugin.description_tags}",
     116#       'type' => "flag" },
    117117      { 'name' => "use_realistic_book",
    118118        'desc' => "{PDFPlugin.use_realistic_book}",
     
    159159    $self->{'convert_options'} .= " -pdf_zoom $zoom";
    160160
    161     # check convert_to
    162    
    163     # Not all available conversion output options are possible with xpdftools, as some are
    164     # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html
    165     if ($self->{'convert_to'} =~ /^html$/) {
    166     &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, "pretty_html"));
    167 #   $self->{'convert_to'} = "pretty_html";
    168     }
    169     elsif ($self->{'convert_to'} =~ /^pagedimg/) {
    170     &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, "paged_pretty_html"));
    171 #   $self->{'convert_to'} = "paged_pretty_html";
    172     }
    173     elsif ($self->{'convert_to'} =~ /^paged_text$/) {
    174     # TODO
    175     print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n";
    176     #print STDERR "@@@ Converting to text instead.\n";
    177     #$self->{'convert_to'} = "text";
    178     }
    179    
    180161    # PDFv2Plugin now supports PDF to txt conversion on Windows too:
    181162    # using XPDF Tools (incl pdftotext) on Windows/Linux/Mac
    182     elsif ($self->{'convert_to'} eq "text" && $ENV{'GSDLOS'} =~ /^windows$/i) {
     163    if ($self->{'convert_to'} eq "text" && $ENV{'GSDLOS'} =~ /^windows$/i) {
    183164    &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.win_pdftotext_info}\n");
    184165    }
     
    193174    }
    194175    }
     176
     177    # if pdfbox_conversion is not on, check convert_to to make sure that xpdftools can
     178    # support the selected output format, or fallback on a sensible default
     179    # Not all available conversion output options are possible with xpdftools, as some are
     180    # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html
     181    if (!$self->{"pdfbox_conversion"}) {
     182    my $convert_to = $self->{'convert_to'};
     183    my $fallback_convert_to = $convert_to;
     184    if($convert_to =~ /^html$/) {
     185        $fallback_convert_to = "pretty_html";
     186    }
     187    elsif ($self->{'convert_to'} =~ /^pagedimg/) {
     188        $fallback_convert_to = "paged_pretty_html";
     189    }
     190    elsif ($self->{'convert_to'} =~ /^paged_text$/) {
     191        #   print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n";
     192        $fallback_convert_to = "text";
     193    }
     194
     195    if($convert_to =~ /^(html|pagedimg|paged_text)/) {
     196        &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, $fallback_convert_to));
     197        $self->{'convert_to'} = $fallback_convert_to;
     198    }
     199    }
     200   
    195201    # set convert_to_plugin and convert_to_ext
    196202    $self->set_standard_convert_settings();
     
    310316
    311317    my $self = shift (@_);
     318   
     319    if($self->{'convert_to'} =~ m/pretty_html$/) { # if outputting paged_pretty_html or pretty_html:
     320    # only xpdftools can output pretty_html regardless of whether pdfbox_conversion is switched on
     321    print STDERR "@@@@ PDFBox_conversion is switched on, but pretty_html variants are generated by xpdftools.\n";
     322    return $self->ConvertBinaryFile::tmp_area_convert_file(@_);
     323    }
     324    # else, output format uses pdfbox:
    312325    return $self->AutoLoadConverters::tmp_area_convert_file(@_);
    313326
     
    319332    my ($tmp_dirname, $tmp_inputPDFname, $utf8_tailname, $lc_suffix, $tailname, $suffix) = @_;   
    320333
    321     if($self->{'convert_to'} !~ /pretty_html/) {
     334    if($self->{'convert_to'} !~ m/pretty_html$/) {
    322335    return $self->ConvertBinaryFile::run_conversion_command(@_);
    323336    }
  • main/trunk/greenstone2/perllib/strings.properties

    r32277 r32283  
    11991199PDFv2Plugin.win_pdftotext_info:PDFv2Plugin uses Xpdf Tools to support pdf to text conversion, including on Windows.
    12001200
    1201 PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s. Turn on pdfbox_conversion if you wish to enable output to selected format.
     1201PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s.\nTurn on pdfbox_conversion if you wish to enable output to the selected format.
    12021202
    12031203PostScriptPlugin.desc:This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html
Note: See TracChangeset for help on using the changeset viewer.