Changeset 22642 for main/trunk


Ignore:
Timestamp:
2010-08-17T12:24:42+12:00 (14 years ago)
Author:
kjdon
Message:

removed all open office stuff. Haven't tested it properly as am at home

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r22596 r22642  
    6767my $pdf_allow_images_only;
    6868my $windows_scripting;
    69 my $openoffice_scripting;
    7069
    7170sub print_usage
     
    7574    print STDERR "              or text using third-party programs.\n\n";
    7675    print STDERR "  usage: $0 [options] filename\n";
    77     if ($openoffice_scripting) {
    78     print STDERR "  options:\n\t-type\tdoc|dot|docx|odf|pdf|ps|ppt|rtf|xls\t(input file type)\n";
    79     }
    80     else {
    81     print STDERR "  options:\n\t-type\tdoc|dot|pdf|ps|ppt|rtf|xls\t(input file type)\n";
    82     }
     76    print STDERR "  options:\n\t-type\tdoc|dot|pdf|ps|ppt|rtf|xls\t(input file type)\n";
    8377    print STDERR "\t-errlog\t<filename>\t(append err messages)\n";
    8478    print STDERR "\t-output\tauto|html|text|pagedimg_jpg|pagedimg_gif|pagedimg_png\t(output file type)\n";
     
    8680    print STDERR "\t-use_strings\tuse strings to extract text if conversion fails\n";
    8781    print STDERR "\t-windows_scripting\tuse windows VB script (if available) to convert Microsoft Word and PPT documents\n";
    88     print STDERR "\t-openoffice_scripting\tuse OpenOffice (if available) to convert Microsoft Office documents \n";
    8982    print STDERR "\t-pdf_complex\tuse complex output when converting PDF to HTML\n";
    9083    print STDERR "\t-pdf_nohidden\tDon't attempt to extract hidden text from PDF files\n";
     
    10699
    107100   
    108     # scan for -openoffice_scripting as it effects the permissible
    109     # values for -type
    110 
    111     foreach my $a (@ARGV) {
    112     if ($a =~ m/^-openoffice_scripting$/) {
    113         $openoffice_scripting = 1;
    114         last;
    115     }
    116     }
    117 
    118     my $parse_type;
    119     if ($openoffice_scripting) {
    120     $parse_type = 'type/(doc|dot|docx|odf|pdf|ps|ppt|rtf|xls)/';
    121     }
    122     else {
    123     $parse_type = 'type/(doc|dot|pdf|ps|ppt|rtf|xls)/';
    124     }
    125 
    126101    # read command-line arguments
    127102    if (!parsargv::parse(\@ARGV,
    128              $parse_type, \$input_type,
     103             'type/(doc|dot|pdf|ps|ppt|rtf|xls)/', \$input_type,
    129104             '/errlog/.*/', \$faillogfile,
    130105             'output/(auto|html|text|pagedimg).*/', \$output_type,
     
    132107             'verbose/\d+/0', \$verbose,
    133108             'windows_scripting',\$windows_scripting,
    134              'openoffice_scripting',\$openoffice_scripting,
    135109             'use_strings', \$use_strings,
    136110             'pdf_complex', \$pdf_complex,
     
    173147    print STDERR "Error: No filename extension or input type defined\n";
    174148    exit(1);
    175     }
    176     elsif ($openoffice_scripting && (($input_type eq "docx") || ($input_type eq "odf"))) {
    177     print &convertDOC($input_filename, $output_filestem, $output_type);
    178     print "\n";
    179149    }
    180150    elsif ($input_type eq "doc" || $input_type eq "dot") {
     
    230200    my ($input_filename, $output_filestem, $output_type) = @_;
    231201
    232     if (($openoffice_scripting) && ($input_filename =~ m/\.docx?$/i)) {
    233     # Jump right in and process with Open Office
    234         if (openoffice_doc_to_html($input_filename, $output_filestem)) {
    235         return "html";
    236     }
    237     else {
    238         return "fail";
    239     }
    240     }
    241 
    242202    # Many .doc files are not in fact word documents!
    243203    my $realtype = &find_docfile_type($input_filename);
     
    262222        $success = &native_doc_to_html($input_filename, $output_filestem);
    263223    }
    264     elsif ($openoffice_scripting) {
    265         $success = &openoffice_doc_to_html($input_filename, $output_filestem);
    266     }
    267224    else {
    268225        $success = &doc_to_html($input_filename, $output_filestem);   
     
    288245    if ($windows_scripting) {
    289246        $success = &native_doc_to_html($input_filename, $output_filestem);
    290     }
    291     elsif ($openoffice_scripting) {
    292         $success = &openoffice_doc_to_html($input_filename, $output_filestem);
    293247    }
    294248    else {
     
    402356    my $ppt_convert_type = "";
    403357
    404     if ($openoffice_scripting) {
    405     # Jump right in and process with Open Office
    406         if (openoffice_doc_to_html($input_filename, $output_filestem)) {
    407         return "html";
    408     }
    409     else {
    410         return "fail";
    411     }
    412     }
    413    
    414358    #if (!$output_type || $windows_scripting || ($output_type !~ m/html/i) || ($output_type !~ m/text/i)){
    415359    if ($windows_scripting && ($output_type !~ m/html/i) && ($output_type !~ m/text/i)){
     
    475419    my $success = 0;
    476420
    477     if ($openoffice_scripting) {
    478     # Jump right in and process with Open Office
    479         if (openoffice_doc_to_html($input_filename, $output_filestem)) {
    480         return "html";
    481     }
    482     else {
    483         return "fail";
    484     }
    485     }
    486    
    487421    # Attempt conversion to HTML
    488422    if (!$output_type || ($output_type =~ m/html/i)) {
     
    897831}
    898832
    899 # Attempt to convert a word document to html with JODConvert scripting program
    900 sub openoffice_doc_to_html {
    901     my ($input_filename, $output_filestem) = @_;
    902 
    903     if (-e "$output_filestem.html") {
    904     print STDERR "    The conversion file:\n";
    905     print STDERR "      $output_filestem.html\n";
    906     print STDERR "    ... skipping\n";
    907     return 1;
    908     }
    909 
    910     my $oo_script_dir = &util::filename_cat($ENV{'GEXT_OPENOFFICE'}, "bin", "script");
    911     my $oo2html = &util::filename_cat($oo_script_dir,"oo2html");
    912     if (!-e $oo2html) {
    913     print STDERR "Error: Unable to find 'oo2html' in: \n";
    914     print STDERR "       $oo_script_dir\n";
    915     print STDERR "       Is the OpenOffice extension to Greenstone installed?\n";
    916     return 0;
    917     }
    918 
    919     my $cmd = "";
    920     if ($timeout) {$cmd = "ulimit -t $timeout;";}
    921     $cmd .=  "$oo2html \"$input_filename\" \"$output_filestem.html\"";
    922 
    923     # redirecting STDERR
    924     $cmd .= " 2> \"$output_filestem.err\""
    925     if ($ENV {'GSDLOS'} !~ m/^windows$/i || $is_winnt_2000);
    926    
    927     # execute the command
    928     $!=0;
    929     if (system($cmd)!=0)
    930     {
    931     print STDERR "Error executing oo2html converter: $!\n";
    932     print STDERR "Command was: $cmd\n";
    933 
    934     if (-s "$output_filestem.err") {
    935         open (ERRFILE, "<$output_filestem.err");
    936        
    937         my $write_to_fail_log=0;
    938         if ($faillogfile ne "" && defined(open(FAILLOG,">>$faillogfile")))
    939         {$write_to_fail_log=1;}
    940 
    941         my $line;
    942         while ($line=<ERRFILE>) {
    943         if ($line =~ m/\w/) {
    944             print STDERR "$line";
    945             print FAILLOG "$line" if ($write_to_fail_log);
    946         }
    947         if ($line !~ m/startup error/) {next;}
    948         print STDERR " (given an invalid .DOC file?)\n";
    949         print FAILLOG " (given an invalid .DOC file?)\n"
    950         if ($write_to_fail_log);
    951        
    952         } # while ERRFILE
    953         close FAILLOG if ($write_to_fail_log);
    954     }
    955     return 0; # we can try any_to_text
    956     }
    957 
    958     # Was the conversion successful?
    959     if (-s "$output_filestem.html") {
    960     open(TMP, "$output_filestem.html");
    961     my $line = <TMP>;
    962     close(TMP);
    963     if ($line && $line =~ m/html/i) {
    964         &util::rm("$output_filestem.err") if -e "$output_filestem.err";
    965         return 1;
    966     }
    967     }
    968    
    969     # If here, an error of some sort occurred
    970    
    971     &util::rm("$output_filestem.html") if -e "$output_filestem.html";
    972     if (-e "$output_filestem.err") {
    973     if ($faillogfile ne "" && defined(open(FAILLOG,">>$faillogfile"))) {
    974         open (ERRLOG,"$output_filestem.err");
    975         while (<ERRLOG>) {print FAILLOG $_;}
    976         close FAILLOG;
    977         close ERRLOG;
    978     }
    979     &util::rm("$output_filestem.err");
    980     }
    981     return 0;
    982 }
    983 
    984833# Attempt to convert an RTF document to html with rtftohtml
    985834sub rtf_to_html {
Note: See TracChangeset for help on using the changeset viewer.