Ignore:
Timestamp:
2018-07-18T20:30:14+12:00 (6 years ago)
Author:
ak19
Message:

Cleaning up unused strings, some debug statements and recently commented out code.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/PDFv2Plugin.pm

    r32286 r32287  
    3535use Mojo::DOM; # for HTML parsing
    3636
    37 #use AutoLoadConverters;
    3837use PDFBoxConverter;
    3938use ConvertBinaryFile;
    4039
    41 #@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'AutoLoadConverters', 'ReadTextFile');
    4240@PDFv2Plugin::ISA = ('ConvertBinaryFile', 'PDFBoxConverter', 'ReadTextFile');
    4341
     
    123121    push(@$pluginlist, $class);
    124122
    125 #    push(@$inputargs,"-title_sub");
    126 #    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    127 
    128123    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    129124    push(@{$hashArgOptLists->{"OptList"}},$options);
    130125
    131     #    my $auto_converter_self = new AutoLoadConverters($pluginlist,$inputargs,$hashArgOptLists,["PDFBoxConverter"],1);
    132126    my $pdfbox_converter_self = new PDFBoxConverter($pluginlist, $inputargs, $hashArgOptLists);
    133127    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);
    134     #    my $self = BaseImporter::merge_inheritance($auto_converter_self, $cbf_self);
    135128    my $self = BaseImporter::merge_inheritance($pdfbox_converter_self, $cbf_self);
    136129   
     
    167160    }
    168161    }
    169 
    170     # if pdfbox_conversion is not on, check convert_to to make sure that xpdftools can
    171     # support the selected output format, or fallback on a sensible default
    172     # Not all available conversion output options are possible with xpdftools, as some are
    173     # only handled by pdfbox. If a format is unavailable with xpdftools, default to pretty_html
    174     # if (!$self->{"pdfbox_conversion"}) {
    175     #   my $convert_to = $self->{'convert_to'};
    176     #   my $fallback_convert_to = $convert_to;
    177     #   if($convert_to =~ /^html$/) {
    178     #       $fallback_convert_to = "pretty_html";
    179     #   }
    180     #   elsif ($self->{'convert_to'} =~ /^pagedimg/) {
    181     #       $fallback_convert_to = "paged_pretty_html";
    182     #   }
    183     #   elsif ($self->{'convert_to'} =~ /^paged_text$/) {
    184     #       #   print STDERR "@@@ Conversion to " . $self->{'convert_to'} , " with Xpdf Tools is not yet implemented.\n";
    185     #       $fallback_convert_to = "text";
    186     #   }
    187 
    188     #   if($convert_to =~ /^(html|pagedimg|paged_text)/) {
    189     #       &gsprintf::gsprintf(STDERR, "{PDFv2Plugin.conversion_needs_pdfbox}\n", ($self->{'convert_to'}, $fallback_convert_to));
    190     #       $self->{'convert_to'} = $fallback_convert_to;
    191     #   }
    192     # }
    193162   
    194163    # set convert_to_plugin and convert_to_ext
     
    203172    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};
    204173
    205     # following title_sub removes "Page 1" added by pdftohtml, and a leading
    206     # "1", which is often the page number at the top of the page. Bad Luck
    207     # if your document title actually starts with "1 " - is there a better way?
    208 #    push(@$specific_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    209174    my $associate_tail_re = $self->{'associate_tail_re'};
    210175    if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
     
    267232    # ConvertBinaryFile init
    268233    $self->SUPER::init(@_);
    269 #   $self->AutoLoadConverters::init(@_);
    270234    $self->PDFBoxConverter::init(@_);
    271235
     
    275239    my $self = shift (@_);
    276240
    277 #   $self->AutoLoadConverters::begin(@_);
    278241    $self->PDFBoxConverter::begin(@_);
    279242    $self->SUPER::begin(@_);
     
    285248
    286249    $self->PDFBoxConverter::deinit(@_);
    287 #   $self->AutoLoadConverters::deinit(@_);
    288250    $self->SUPER::deinit(@_);
    289251
     
    318280    }
    319281   
    320     # for all other output formats, use pdfbox:   
    321     #return $self->AutoLoadConverters::tmp_area_convert_file(@_);
    322     # Here, we now do what AutoLoadConverters::tmp_area_convert_file(@_) does:
     282    # for all other output formats, use pdfbox:
     283   
     284    # Here, we now do directly what AutoLoadConverters::tmp_area_convert_file(@_)
     285    # does with PDFBoxConverter:
    323286    my ($result, $result_str, $new_filename) = $self->PDFBoxConverter::convert($input_filename, $output_ext);
    324287    if (defined $result && $result != 0) {
     
    382345    $self->xpdftohtml_convert_post_process($conv_filename);
    383346    }
    384     else { # use PDFPlugin's usual post processing
     347    else { # use original PDFPlugin's usual post processing
    385348    $self->default_convert_post_process($conv_filename);
    386349    }
     
    689652   
    690653    my $title = $sections[0];
    691     $title =~ s/^\"?\w+\"?>//; # specific for pdftohtml...
     654    $title =~ s/^\"?\w+\"?>//; # specific for old pdftohtml...
    692655    $title =~ s/<\/([^>]+)><\1>//g; # (eg) </b><b> - no space
    693656    $title =~ s/<[^>]*>/ /g;
     
    696659    $title =~ s/\s+$//;
    697660    $title =~ s/\s+/ /gs;
    698     $title =~ s/^$self->{'title_sub'}// if ($self->{'title_sub'});
    699     $title =~ s/^\s+//s; # in case title_sub introduced any...
     661    $title =~ s/^\s+//s; # in case title_sub (of old PDFPlugin's old pdftohtml) introduced any... Generally still useful to remove spaces at the start?
    700662    $title = substr ($title, 0, 100);
    701663    $title =~ s/\s\S*$/.../;
Note: See TracChangeset for help on using the changeset viewer.