Ignore:
Timestamp:
2018-07-17T20:40:57+12:00 (6 years ago)
Author:
ak19
Message:

Implementing PDFv2paged_text (with pdfbox)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/ConvertBinaryFile.pm

    r32277 r32280  
    170170    $self->{'convert_to_plugin'} = "StructuredHTMLPlugin";
    171171    $self->{'convert_to_ext'} = "html";
    172     } elsif ($convert_to =~ /^pagedimg/) {
     172    } elsif ($convert_to =~ /^pagedimg/ || $convert_to eq "paged_text") {
    173173    $self->{'convert_to_plugin'} = "PagedImagePlugin";
    174     my ($convert_to_ext) = $convert_to =~ /pagedimg\_(jpg|gif|png)/i;
    175     $convert_to_ext = 'jpg' unless defined $convert_to_ext;
    176     $self->{'convert_to_ext'} = $convert_to_ext;
     174    if($convert_to eq "paged_text") {
     175        $self->{'convert_to_ext'} = "txt";
     176    } else {
     177        my ($convert_to_ext) = $convert_to =~ /pagedimg(?:txt)?\_(jpg|gif|png)/i; # the ?: prefix avoids capturing or else discards the optional 'txt' in 'pagedimgtxt',
     178        # so that we can consider the actual portion we want to capture: the img type
     179        $convert_to_ext = 'jpg' unless defined $convert_to_ext;
     180        $self->{'convert_to_ext'} = $convert_to_ext;
     181    }
    177182    }
    178183}
Note: See TracChangeset for help on using the changeset viewer.