Changeset 32287

Show
Ignore:
Timestamp:
18.07.2018 20:30:14 (12 months ago)
Author:
ak19
Message:

Cleaning up unused strings, some debug statements and recently commented out code.

Location:
main/trunk/greenstone2
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r32284 r32287  
    322322    $output_type =~ s/.*\-(.*)/$1/i; 
    323323     
    324     print STDERR "@@@@@@@@ Using $pdf_tool for the conversion\n"; 
     324    #print STDERR "@@@@@@@@ Using $pdf_tool for the conversion\n"; 
    325325     
    326326    # First determine which pdf conversion tool we're using among pdftohtml/pdfbox/xpdftools 
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32286 r32287  
    3535use Mojo::DOM; # for HTML parsing 
    3636 
    37 #use AutoLoadConverters; 
    3837use PDFBoxConverter; 
    3938use ConvertBinaryFile; 
    4039 
    41 #@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters', 'ReadTextFile'); 
    4240@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'PDFBoxConverter', 'ReadTextFile'); 
    4341 
     
    123121    push(@$pluginlist, $class); 
    124122 
    125 #    push(@$inputargs,"-title_sub"); 
    126 #    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?'); 
    127  
    128123    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); 
    129124    push(@{$hashArgOptLists->{"OptList"}},$options); 
    130125 
    131     #    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["PDFBoxConverter"],1); 
    132126    my $pdfbox_converter_self = new PDFBoxConverter($pluginlist, $inputargs, $hashArgOptLists); 
    133127    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists); 
    134     #    my $self = BaseImporter::merge_inheritance($auto_converter_self, $cbf_self); 
    135128    my $self = BaseImporter::merge_inheritance($pdfbox_converter_self, $cbf_self); 
    136129     
     
    167160    } 
    168161    } 
    169  
    170     # if pdfbox_conversion is not on, check convert_to to make sure that xpdftools can 
    171     # support the selected output format, or fallback on a sensible default 
    172     # Not all available conversion output options are possible with xpdftools, as some are 
    173     # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html 
    174     # if (!$self->{"pdfbox_conversion"}) { 
    175     #   my $convert_to = $self->{'convert_to'}; 
    176     #   my $fallback_convert_to = $convert_to; 
    177     #   if($convert_to =~ /^html$/) { 
    178     #       $fallback_convert_to = "pretty_html"; 
    179     #   } 
    180     #   elsif ($self->{'convert_to'} =~ /^pagedimg/) { 
    181     #       $fallback_convert_to = "paged_pretty_html"; 
    182     #   } 
    183     #   elsif ($self->{'convert_to'} =~ /^paged_text$/) { 
    184     #       #   print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n"; 
    185     #       $fallback_convert_to = "text"; 
    186     #   } 
    187  
    188     #   if($convert_to =~ /^(html|pagedimg|paged_text)/) { 
    189     #       &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, $fallback_convert_to)); 
    190     #       $self->{'convert_to'} = $fallback_convert_to; 
    191     #   } 
    192     # } 
    193162     
    194163    # set convert_to_plugin and convert_to_ext 
     
    203172    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name}; 
    204173 
    205     # following title_sub removes "Page 1" added by pdftohtml, and a leading 
    206     # "1", which is often the page number at the top of the page. Bad Luck 
    207     # if your document title actually starts with "1 " - is there a better way? 
    208 #    push(@$specific_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 
    209174    my $associate_tail_re = $self->{'associate_tail_re'}; 
    210175    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) { 
     
    267232    # ConvertBinaryFile init 
    268233    $self->SUPER::init(@_); 
    269 #   $self->AutoLoadConverters::init(@_); 
    270234    $self->PDFBoxConverter::init(@_); 
    271235 
     
    275239    my $self = shift (@_); 
    276240 
    277 #   $self->AutoLoadConverters::begin(@_); 
    278241    $self->PDFBoxConverter::begin(@_); 
    279242    $self->SUPER::begin(@_); 
     
    285248 
    286249    $self->PDFBoxConverter::deinit(@_); 
    287 #   $self->AutoLoadConverters::deinit(@_); 
    288250    $self->SUPER::deinit(@_); 
    289251 
     
    318280    } 
    319281     
    320     # for all other output formats, use pdfbox:     
    321     #return $self->AutoLoadConverters::tmp_area_convert_file(@_); 
    322     # Here, we now do what AutoLoadConverters::tmp_area_convert_file(@_) does: 
     282    # for all other output formats, use pdfbox: 
     283     
     284    # Here, we now do directly what AutoLoadConverters::tmp_area_convert_file(@_) 
     285    # does with PDFBoxConverter: 
    323286    my ($result, $result_str, $new_filename) = $self->PDFBoxConverter::convert($input_filename, $output_ext); 
    324287    if (defined $result && $result != 0) { 
     
    382345    $self->xpdftohtml_convert_post_process($conv_filename); 
    383346    } 
    384     else { # use PDFPlugin's usual post processing 
     347    else { # use original PDFPlugin's usual post processing 
    385348    $self->default_convert_post_process($conv_filename); 
    386349    } 
     
    689652     
    690653    my $title = $sections[0]; 
    691     $title =~ s/^\"?\w+\"?>//; # specific for pdftohtml... 
     654    $title =~ s/^\"?\w+\"?>//; # specific for old pdftohtml... 
    692655    $title =~ s/<\/([^>]+)><\1>//g; # (eg) </b><b> - no space 
    693656    $title =~ s/<[^>]*>/ /g; 
     
    696659    $title =~ s/\s+$//; 
    697660    $title =~ s/\s+/ /gs; 
    698     $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'}); 
    699     $title =~ s/^\s+//s; # in case title_sub introduced any... 
     661    $title =~ s/^\s+//s; # in case title_sub (of old PDFPlugin's old pdftohtml) introduced any... Generally still useful to remove spaces at the start? 
    700662    $title = substr ($title, 0, 100); 
    701663    $title =~ s/\s\S*$/.../; 
  • main/trunk/greenstone2/perllib/strings.properties

    r32283 r32287  
    11951195PDFv1Plugin.zoom:The factor by which to zoom the PDF for output. Only useful if -complex is set. 
    11961196 
    1197 PDFv2Plugin.zoom:The factor by which to zoom the PDF for (paged_)pretty_html output. Can be fractional. 
    1198  
    1199 PDFv2Plugin.win_pdftotext_info:PDFv2Plugin uses Xpdf Tools to support pdf to text conversion, including on Windows. 
    1200  
    1201 PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s.\nTurn on pdfbox_conversion if you wish to enable output to the selected format. 
     1197PDFv2Plugin.dpi:The resolution in DPI of background images generated for pagedimg(txt) and (paged_)pretty_html output settings. 
    12021198 
    12031199PostScriptPlugin.desc:This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html