Changeset 32287


Ignore:
Timestamp:
2018-07-18T20:30:14+12:00 (3 years ago)
Author:
ak19
Message:

Cleaning up unused strings, some debug statements and recently commented out code.

Location:
main/trunk/greenstone2
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r32284 r32287  
    322322    $output_type =~ s/.*\-(.*)/$1/i;
    323323   
    324     print STDERR "@@@@@@@@ Using $pdf_tool for the conversion\n";
     324    #print STDERR "@@@@@@@@ Using $pdf_tool for the conversion\n";
    325325   
    326326    # First determine which pdf conversion tool we're using among pdftohtml/pdfbox/xpdftools
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32286 r32287  
    3535use Mojo::DOM; # for HTML parsing
    3636
    37 #use AutoLoadConverters;
    3837use PDFBoxConverter;
    3938use ConvertBinaryFile;
    4039
    41 #@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters', 'ReadTextFile');
    4240@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'PDFBoxConverter', 'ReadTextFile');
    4341
     
    123121    push(@$pluginlist, $class);
    124122
    125 #    push(@$inputargs,"-title_sub");
    126 #    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    127 
    128123    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    129124    push(@{$hashArgOptLists->{"OptList"}},$options);
    130125
    131     #    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["PDFBoxConverter"],1);
    132126    my $pdfbox_converter_self = new PDFBoxConverter($pluginlist, $inputargs, $hashArgOptLists);
    133127    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    134     #    my $self = BaseImporter::merge_inheritance($auto_converter_self, $cbf_self);
    135128    my $self = BaseImporter::merge_inheritance($pdfbox_converter_self, $cbf_self);
    136129   
     
    167160    }
    168161    }
    169 
    170     # if pdfbox_conversion is not on, check convert_to to make sure that xpdftools can
    171     # support the selected output format, or fallback on a sensible default
    172     # Not all available conversion output options are possible with xpdftools, as some are
    173     # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html
    174     # if (!$self->{"pdfbox_conversion"}) {
    175     #   my $convert_to = $self->{'convert_to'};
    176     #   my $fallback_convert_to = $convert_to;
    177     #   if($convert_to =~ /^html$/) {
    178     #       $fallback_convert_to = "pretty_html";
    179     #   }
    180     #   elsif ($self->{'convert_to'} =~ /^pagedimg/) {
    181     #       $fallback_convert_to = "paged_pretty_html";
    182     #   }
    183     #   elsif ($self->{'convert_to'} =~ /^paged_text$/) {
    184     #       #   print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n";
    185     #       $fallback_convert_to = "text";
    186     #   }
    187 
    188     #   if($convert_to =~ /^(html|pagedimg|paged_text)/) {
    189     #       &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, $fallback_convert_to));
    190     #       $self->{'convert_to'} = $fallback_convert_to;
    191     #   }
    192     # }
    193162   
    194163    # set convert_to_plugin and convert_to_ext
     
    203172    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
    204173
    205     # following title_sub removes "Page 1" added by pdftohtml, and a leading
    206     # "1", which is often the page number at the top of the page. Bad Luck
    207     # if your document title actually starts with "1 " - is there a better way?
    208 #    push(@$specific_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    209174    my $associate_tail_re = $self->{'associate_tail_re'};
    210175    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     
    267232    # ConvertBinaryFile init
    268233    $self->SUPER::init(@_);
    269 #   $self->AutoLoadConverters::init(@_);
    270234    $self->PDFBoxConverter::init(@_);
    271235
     
    275239    my $self = shift (@_);
    276240
    277 #   $self->AutoLoadConverters::begin(@_);
    278241    $self->PDFBoxConverter::begin(@_);
    279242    $self->SUPER::begin(@_);
     
    285248
    286249    $self->PDFBoxConverter::deinit(@_);
    287 #   $self->AutoLoadConverters::deinit(@_);
    288250    $self->SUPER::deinit(@_);
    289251
     
    318280    }
    319281   
    320     # for all other output formats, use pdfbox:   
    321     #return $self->AutoLoadConverters::tmp_area_convert_file(@_);
    322     # Here, we now do what AutoLoadConverters::tmp_area_convert_file(@_) does:
     282    # for all other output formats, use pdfbox:
     283   
     284    # Here, we now do directly what AutoLoadConverters::tmp_area_convert_file(@_)
     285    # does with PDFBoxConverter:
    323286    my ($result, $result_str, $new_filename) = $self->PDFBoxConverter::convert($input_filename, $output_ext);
    324287    if (defined $result && $result != 0) {
     
    382345    $self->xpdftohtml_convert_post_process($conv_filename);
    383346    }
    384     else { # use PDFPlugin's usual post processing
     347    else { # use original PDFPlugin's usual post processing
    385348    $self->default_convert_post_process($conv_filename);
    386349    }
     
    689652   
    690653    my $title = $sections[0];
    691     $title =~ s/^\"?\w+\"?>//; # specific for pdftohtml...
     654    $title =~ s/^\"?\w+\"?>//; # specific for old pdftohtml...
    692655    $title =~ s/<\/([^>]+)><\1>//g; # (eg) </b><b> - no space
    693656    $title =~ s/<[^>]*>/ /g;
     
    696659    $title =~ s/\s+$//;
    697660    $title =~ s/\s+/ /gs;
    698     $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'});
    699     $title =~ s/^\s+//s; # in case title_sub introduced any...
     661    $title =~ s/^\s+//s; # in case title_sub (of old PDFPlugin's old pdftohtml) introduced any... Generally still useful to remove spaces at the start?
    700662    $title = substr ($title, 0, 100);
    701663    $title =~ s/\s\S*$/.../;
  • main/trunk/greenstone2/perllib/strings.properties

    r32283 r32287  
    11951195PDFv1Plugin.zoom:The factor by which to zoom the PDF for output. Only useful if -complex is set.
    11961196
    1197 PDFv2Plugin.zoom:The factor by which to zoom the PDF for (paged_)pretty_html output. Can be fractional.
    1198 
    1199 PDFv2Plugin.win_pdftotext_info:PDFv2Plugin uses Xpdf Tools to support pdf to text conversion, including on Windows.
    1200 
    1201 PDFv2Plugin.conversion_needs_pdfbox:*** Conversion to %s not supported with Xpdf Tools, defaulting to %s.\nTurn on pdfbox_conversion if you wish to enable output to the selected format.
     1197PDFv2Plugin.dpi:The resolution in DPI of background images generated for pagedimg(txt) and (paged_)pretty_html output settings.
    12021198
    12031199PostScriptPlugin.desc:This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html
Note: See TracChangeset for help on using the changeset viewer.