Ignore:
Timestamp:
2003-01-28T16:22:26+13:00 (21 years ago)
Author:
sjboddie
Message:

Added options to PDFPlug to take advantage of the improvements in
version 0.34 of pdftohtml. It now works much better for non latin
input documents (producing UTF-8 encoded HTML).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/gsConvert.pl

    r3538 r3720  
    5959
    6060my $use_strings;
     61my $pdf_complex;
     62my $pdf_zoom;
     63my $pdf_ignore_images;
    6164
    6265sub print_usage
     
    7073    print STDERR "\t-output\thtml|text\n";
    7174    print STDERR "\t-timeout\t<max cpu seconds>\t(ulimit on unix systems)\n";
    72     print STDERR "\t-use_strings\t(use strings to extract text if conversion fails)\n";
     75    print STDERR "\t-use_strings\tuse strings to extract text if conversion fails\n";
     76    print STDERR "\t-pdf_complex\tuse complex output when converting PDF to HTML\n";
     77    print STDERR "\t-pdf_ignore_images\tdon't attempt to extract images when\n";
     78    print STDERR "\t\tconverting PDF to HTML\n";
     79    print STDERR "\t-pdf_zoom\tfactor by which to zoom PDF (only useful if\n";
     80    print STDERR "\t\t-pdf_complex is set\n";
    7381    exit(1);
    7482}
     
    8997             'timeout/\d+/0',\$timeout,
    9098             'verbose/\d+/0',   \$verbose,
    91              'use_strings', \$use_strings))
     99             'use_strings', \$use_strings,
     100             'pdf_complex', \$pdf_complex,
     101             'pdf_zoom/\d+/2', \$pdf_zoom
     102             ))
    92103    {
    93104    print_usage();
     
    628639    $cmd = "";
    629640    if ($timeout) {$cmd = "ulimit -t $timeout;";}
    630     $cmd .= "perl -S pdftohtml.pl ";
     641    $cmd .= "perl -S pdftohtml.pl -zoom $pdf_zoom";
     642    $cmd .= " -c" if ($pdf_complex);
     643    $cmd .= " -i" if ($pdf_ignore_images);
    631644    $cmd .= " \"$input_filename\" \"$output_filestem\"";
    632645   
Note: See TracChangeset for help on using the changeset viewer.