Changeset 32283 for main/trunk

Show
Ignore:
Timestamp:
17.07.2018 22:19:34 (14 months ago)
Author:
ak19
Message:

More stable behaviour by PDFv2Plugin: 1. when pdfbox_conversion is on, but an output option supported by xpdftools is selected, it now uses xpdftools anyway instead of attempting to use pdfbox_conversion. 2. when pdfbox_conversion is not on and an output format that it alone supports (and not supported by xpdftools) is selected, a warning message is displayed that xpdftools will be used to output to a fallback output format and that the user to switch on pdfbox_conversion otherwise. This message was present and displayed in a recent commit, but the behaviour was not set up yet then. In future, we may solve this differently if it's decided that PDFBoxConverter is not an AutoLoadConverter? and will therefore be always available with PDFv2Plugin (but what about GS2, where PDFBox is an optional extension?)

Location:
main/trunk/greenstone2/perllib
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32280 r32283  
    109109       'deft' => "1", 
    110110       'type' => "string" }, # xpdftools' zoom takes fractions 
    111      { 'name' => "use_sections", 
    112        'desc' => "{PDFPlugin.use_sections}", 
    113        'type' => "flag" }, 
    114      { 'name' => "description_tags", 
    115        'desc' => "{HTMLPlugin.description_tags}", 
    116        'type' => "flag" }, 
     111#     { 'name' => "use_sections", 
     112#       'desc' => "{PDFPlugin.use_sections}", 
     113#       'type' => "flag" }, 
     114#     { 'name' => "description_tags", 
     115#       'desc' => "{HTMLPlugin.description_tags}", 
     116#       'type' => "flag" }, 
    117117      { 'name' => "use_realistic_book", 
    118118        'desc' => "{PDFPlugin.use_realistic_book}", 
     
    159159    $self->{'convert_options'} .= " -pdf_zoom $zoom"; 
    160160 
    161     # check convert_to 
    162      
    163     # Not all available conversion output options are possible with xpdftools, as some are 
    164     # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html 
    165     if ($self->{'convert_to'} =~ /^html$/) { 
    166     &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, "pretty_html")); 
    167 #   $self->{'convert_to'} = "pretty_html"; 
    168     } 
    169     elsif ($self->{'convert_to'} =~ /^pagedimg/) { 
    170     &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, "paged_pretty_html")); 
    171 #   $self->{'convert_to'} = "paged_pretty_html"; 
    172     } 
    173     elsif ($self->{'convert_to'} =~ /^paged_text$/) { 
    174     # TODO 
    175     print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n"; 
    176     #print STDERR "@@@ Converting to text instead.\n"; 
    177     #$self->{'convert_to'} = "text"; 
    178     } 
    179      
    180161    # PDFv2Plugin now supports PDF to txt conversion on Windows too: 
    181162    # using XPDF Tools (incl pdftotext) on Windows/Linux/Mac 
    182     elsif ($self->{'convert_to'} eq "text" && $ENV{'GSDLOS'} =~ /^windows$/i) { 
     163    if ($self->{'convert_to'} eq "text" && $ENV{'GSDLOS'} =~ /^windows$/i) { 
    183164    &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.win_pdftotext_info}\n"); 
    184165    } 
     
    193174    } 
    194175    } 
     176 
     177    # if pdfbox_conversion is not on, check convert_to to make sure that xpdftools can 
     178    # support the selected output format, or fallback on a sensible default 
     179    # Not all available conversion output options are possible with xpdftools, as some are 
     180    # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html 
     181    if (!$self->{"pdfbox_conversion"}) { 
     182    my $convert_to = $self->{'convert_to'}; 
     183    my $fallback_convert_to = $convert_to; 
     184    if($convert_to =~ /^html$/) { 
     185        $fallback_convert_to = "pretty_html"; 
     186    } 
     187    elsif ($self->{'convert_to'} =~ /^pagedimg/) { 
     188        $fallback_convert_to = "paged_pretty_html"; 
     189    } 
     190    elsif ($self->{'convert_to'} =~ /^paged_text$/) { 
     191        #   print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n"; 
     192        $fallback_convert_to = "text"; 
     193    } 
     194 
     195    if($convert_to =~ /^(html|pagedimg|paged_text)/) { 
     196        &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, $fallback_convert_to)); 
     197        $self->{'convert_to'} = $fallback_convert_to; 
     198    } 
     199    } 
     200     
    195201    # set convert_to_plugin and convert_to_ext 
    196202    $self->set_standard_convert_settings(); 
     
    310316 
    311317    my $self = shift (@_); 
     318     
     319    if($self->{'convert_to'} =~ m/pretty_html$/) { # if outputting paged_pretty_html or pretty_html: 
     320    # only xpdftools can output pretty_html regardless of whether pdfbox_conversion is switched on 
     321    print STDERR "@@@@ PDFBox_conversion is switched on, but pretty_html variants are generated by xpdftools.\n"; 
     322    return $self->ConvertBinaryFile::tmp_area_convert_file(@_); 
     323    } 
     324    # else, output format uses pdfbox: 
    312325    return $self->AutoLoadConverters::tmp_area_convert_file(@_); 
    313326 
     
    319332    my ($tmp_dirname, $tmp_inputPDFname, $utf8_tailname, $lc_suffix, $tailname, $suffix) = @_;     
    320333 
    321     if($self->{'convert_to'} !~ /pretty_html/) { 
     334    if($self->{'convert_to'} !~ m/pretty_html$/) { 
    322335    return $self->ConvertBinaryFile::run_conversion_command(@_); 
    323336    } 
  • main/trunk/greenstone2/perllib/strings.properties

    r32277 r32283  
    11991199PDFv2Plugin.win_pdftotext_info:PDFv2Plugin uses Xpdf Tools to support pdf to text conversion, including on Windows. 
    12001200 
    1201 PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s. Turn on pdfbox_conversion if you wish to enable output to selected format. 
     1201PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s.\nTurn on pdfbox_conversion if you wish to enable output to the selected format. 
    12021202 
    12031203PostScriptPlugin.desc:This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html