Changeset 3350
- Timestamp:
- 2002-08-13T12:28:15+12:00 (22 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/gsConvert.pl
r3246 r3350 57 57 my $is_winnt_2000=eval {require Win32; return (Win32::IsWinNT()); return 0;}; 58 58 if (!defined($is_winnt_2000)) {$is_winnt_2000=0;} 59 60 my $use_strings; 59 61 60 62 sub print_usage … … 68 70 print STDERR "\t-output\thtml|text\n"; 69 71 print STDERR "\t-timeout\t<max cpu seconds>\t(ulimit on unix systems)\n"; 72 print STDERR "\t-use_strings\t(use strnigs to extract text if conversion fails)\n"; 70 73 exit(1); 71 74 } … … 85 88 'output/(html|text)/', \$output_type, 86 89 'timeout/\d+/0',\$timeout, 87 'verbose/\d+/0', \$verbose)) 90 'verbose/\d+/0', \$verbose, 91 'use_strings', \$use_strings)) 88 92 { 89 93 print_usage(); … … 469 473 close FAILLOG if ($write_to_fail_log); 470 474 } 471 print STDERR "Continuing...\n";472 475 return 0; # we can try any_to_text 473 476 } … … 954 957 ($input_filename, $output_filestem) = @_; 955 958 959 if (!$use_strings) { 960 return 0; 961 } 962 956 963 open(IN, "<$input_filename") || return 0; 957 964 binmode(IN); -
trunk/gsdl/perllib/plugins/ConvertToPlug.pm
r3248 r3350 61 61 print STDERR " -convert_to (html|text) plugin converts to TEXT or HTML\n"; 62 62 print STDERR " (default html)\n"; 63 print STDERR " -use_strings if set a simple strings function\n"; 64 print STDERR " will be called to extract text\n"; 65 print STDERR " if the conversion utility fails\n"; 63 66 } 64 67 … … 71 74 $plugin_name =~ s/\.pm$//; 72 75 73 my $generate_format; 74 my $kea_arg; 76 my $newargs = {}; 75 77 76 78 if (!parsargv::parse($args, 77 q^extract_keyphrases^, \$kea_arg->{'kea'}, #with extra options 78 q^extract_keyphrase_options/.*/^, \$kea_arg->{'kea_options'}, #no extra options 79 q^convert_to/(html|text)/html^, \$generate_format, 79 q^extract_keyphrases^, \$newargs->{'kea'}, #with extra options 80 q^extract_keyphrase_options/.*/^, \$newargs->{'kea_options'}, #no extra options 81 q^convert_to/(html|text)/html^, \$newargs->{'generate_format'}, 82 q^use_strings^, \$newargs->{'use_strings'}, 80 83 "allow_extra_options")) { 81 84 … … 86 89 } 87 90 88 return ($plugin_name, $generate_format, $kea_arg);91 return ($plugin_name, $newargs); 89 92 } 90 93 … … 96 99 # of the argument list. 97 100 my @arglist = @_; 98 my ($plugin_name, $ generate_format, $kea_arg) = $class->parse_args(\@_);99 100 if ($class eq "PDFPlug" && $ generate_formateq "text" &&101 my ($plugin_name, $args) = $class->parse_args(\@_); 102 103 if ($class eq "PDFPlug" && $args->{'generate_format'} eq "text" && 101 104 $ENV{'GSDLOS'} =~ /^windows$/i) { 102 105 print STDERR "Windows does not support pdf to text. PDFs will be converted to HTML instead\n"; 103 $ generate_format= "html";104 } 105 106 if ($ generate_formateq "text")106 $args->{'generate_format'} = "html"; 107 } 108 109 if ($args->{'generate_format'} eq "text") 107 110 { 108 111 $self = new TEXTPlug ($class, @arglist); … … 120 123 } 121 124 122 #if kea data to be extracted...123 $self->{'kea'} = 1 if($kea_arg->{'kea'});124 $self->{'kea_options'} = 1 if($kea_arg->{'kea_options'});125 foreach my $key (keys %$args) { 126 $self->{$key} = $args->{$key}; 127 } 125 128 126 129 return bless $self, $class; … … 173 176 # making sure the converter gives us the appropriate output type 174 177 my $output_type = lc($convert_to); 175 my $cmd = "perl -S gsConvert.pl -verbose $verbosity -errlog \"$errlog\" -output $output_type \"$tmp_filename\""; 178 my $cmd = "perl -S gsConvert.pl -verbose $verbosity "; 179 if ($self->{'use_strings'}) { 180 $cmd .= "-use_strings "; 181 } 182 $cmd .= "-errlog \"$errlog\" -output $output_type \"$tmp_filename\""; 176 183 $output_type = `$cmd`; 177 184
Note:
See TracChangeset
for help on using the changeset viewer.