Changeset 10353


Ignore:
Timestamp:
2005-07-29T14:27:19+12:00 (19 years ago)
Author:
chi
Message:

Modification for allowing PDF document being converted to various types of image through convert utility. Then, allow secondary_plugin PagedImgPlug to deal with the document.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r10273 r10353  
    2323#
    2424###########################################################################
     25package PDFPlug;
     26
     27use ConvertToPlug;
     28use unicode;
    2529use strict;
    2630no strict 'refs'; # so we can use a var for filehandles (eg STDERR)
    27 
    28 package PDFPlug;
    29 
    30 use ConvertToPlug;
    31 use unicode;
    3231
    3332sub BEGIN {
     
    7473    push(@$pluginlist, $class);
    7574
     75    push(@$inputargs,"-title_sub");
     76    push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
     77
     78    #foreach my $temp (@$inputargs)
     79    #{
     80    #print STDERR "($temp)\n";
     81    #}
     82
    7683    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    7784    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    78 
    79     push(@$inputargs,"-title_sub");
    80     push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
    81 
     85   
    8286    my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
    83 
    84 
    85     #if ($self->{"use_sections"}) {
    86     #$self->{"description_tags"} = 1;
    87     #}
    88 
     87   
    8988    # these are passed through to gsConvert.pl by ConvertToPlug.pm
    9089    my $zoom = $self->{"zoom"};
     
    102101    $secondary_plugin_options->{'TEXTPlug'} = [];
    103102    }
    104 
     103    if (defined $self->{'convert_to'} && $self->{'convert_to'} =~ /pagedimg.*/i) {
     104    if (!defined $secondary_plugin_options->{'PagedImgPlug'}){
     105        $secondary_plugin_options->{'PagedImgPlug'} = [];
     106        my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
     107        push(@$pagedimg_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?')
     108       
     109    }
     110    }
    105111    my $html_options = $secondary_plugin_options->{'HTMLPlug'};
    106112    my $text_options = $secondary_plugin_options->{'TEXTPlug'};
     113    #my $pagedimg_options = $secondary_plugin_options->{'PagedImgPlug'};
    107114
    108115    if ($self->{'input_encoding'} eq "auto") {
     
    110117    # => restrict primary PDFPlug and secondary HTML plugin to use
    111118    # utf8 and extract language.
    112 
    113119    $self->{'input_encoding'} = "utf8";
    114120    $self->{'extract_language'} = 1;
     
    126132    push(@$html_options,"-description_tags");
    127133    }
    128 
    129134    # following title_sub removes "Page 1" added by pdftohtml, and a leading
    130135    # "1", which is often the page number at the top of the page. Bad Luck
     
    132137    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    133138    push(@$text_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     139    #push(@$pagedimg_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    134140
    135141    $self = bless $self, $class;
    136 
    137142    $self->load_secondary_plugins($class,$secondary_plugin_options);
    138 
    139143    return $self;
    140144}
     
    170174    # be useful to give an indication of document length in browser through setting
    171175    # num_pages as metadata.
    172 
    173176    my @pages = ($text =~ /\<[Aa] name=\"?\w+\"?>/ig);
    174177    my $num_pages = scalar(@pages);
     
    275278
    276279    $doc_obj->add_utf8_metadata($cursection, "NumPages", $self->{'num_pages'});
    277 
    278280   
    279281    if ($self->{'use_sections'} && $self->{'converted_to'} eq "HTML") {
Note: See TracChangeset for help on using the changeset viewer.