Changeset 22642

Show
Ignore:
Timestamp:
17.08.2010 12:24:42 (9 years ago)
Author:
kjdon
Message:

removed all open office stuff. Haven't tested it properly as am at home

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r22596 r22642  
    6767my $pdf_allow_images_only; 
    6868my $windows_scripting; 
    69 my $openoffice_scripting; 
    7069 
    7170sub print_usage 
     
    7574    print STDERR "              or text using third-party programs.\n\n"; 
    7675    print STDERR "  usage: $0 [options] filename\n"; 
    77     if ($openoffice_scripting) { 
    78     print STDERR "  options:\n\t-type\tdoc|dot|docx|odf|pdf|ps|ppt|rtf|xls\t(input file type)\n"; 
    79     } 
    80     else { 
    81     print STDERR "  options:\n\t-type\tdoc|dot|pdf|ps|ppt|rtf|xls\t(input file type)\n"; 
    82     } 
     76    print STDERR "  options:\n\t-type\tdoc|dot|pdf|ps|ppt|rtf|xls\t(input file type)\n"; 
    8377    print STDERR "\t-errlog\t<filename>\t(append err messages)\n"; 
    8478    print STDERR "\t-output\tauto|html|text|pagedimg_jpg|pagedimg_gif|pagedimg_png\t(output file type)\n"; 
     
    8680    print STDERR "\t-use_strings\tuse strings to extract text if conversion fails\n"; 
    8781    print STDERR "\t-windows_scripting\tuse windows VB script (if available) to convert Microsoft Word and PPT documents\n"; 
    88     print STDERR "\t-openoffice_scripting\tuse OpenOffice (if available) to convert Microsoft Office documents \n"; 
    8982    print STDERR "\t-pdf_complex\tuse complex output when converting PDF to HTML\n"; 
    9083    print STDERR "\t-pdf_nohidden\tDon't attempt to extract hidden text from PDF files\n"; 
     
    10699 
    107100     
    108     # scan for -openoffice_scripting as it effects the permissible 
    109     # values for -type 
    110  
    111     foreach my $a (@ARGV) { 
    112     if ($a =~ m/^-openoffice_scripting$/) { 
    113         $openoffice_scripting = 1; 
    114         last; 
    115     } 
    116     } 
    117  
    118     my $parse_type; 
    119     if ($openoffice_scripting) { 
    120     $parse_type = 'type/(doc|dot|docx|odf|pdf|ps|ppt|rtf|xls)/'; 
    121     } 
    122     else { 
    123     $parse_type = 'type/(doc|dot|pdf|ps|ppt|rtf|xls)/'; 
    124     } 
    125  
    126101    # read command-line arguments 
    127102    if (!parsargv::parse(\@ARGV, 
    128              $parse_type, \$input_type, 
     103             'type/(doc|dot|pdf|ps|ppt|rtf|xls)/', \$input_type, 
    129104             '/errlog/.*/', \$faillogfile, 
    130105             'output/(auto|html|text|pagedimg).*/', \$output_type, 
     
    132107             'verbose/\d+/0', \$verbose, 
    133108             'windows_scripting',\$windows_scripting, 
    134              'openoffice_scripting',\$openoffice_scripting, 
    135109             'use_strings', \$use_strings, 
    136110             'pdf_complex', \$pdf_complex, 
     
    173147    print STDERR "Error: No filename extension or input type defined\n"; 
    174148    exit(1); 
    175     }  
    176     elsif ($openoffice_scripting && (($input_type eq "docx") || ($input_type eq "odf"))) { 
    177     print &convertDOC($input_filename, $output_filestem, $output_type); 
    178     print "\n"; 
    179149    }  
    180150    elsif ($input_type eq "doc" || $input_type eq "dot") { 
     
    230200    my ($input_filename, $output_filestem, $output_type) = @_; 
    231201 
    232     if (($openoffice_scripting) && ($input_filename =~ m/\.docx?$/i)) { 
    233     # Jump right in and process with Open Office 
    234         if (openoffice_doc_to_html($input_filename, $output_filestem)) { 
    235         return "html"; 
    236     } 
    237     else { 
    238         return "fail"; 
    239     } 
    240     } 
    241  
    242202    # Many .doc files are not in fact word documents! 
    243203    my $realtype = &find_docfile_type($input_filename); 
     
    262222        $success = &native_doc_to_html($input_filename, $output_filestem); 
    263223    } 
    264     elsif ($openoffice_scripting) { 
    265         $success = &openoffice_doc_to_html($input_filename, $output_filestem); 
    266     } 
    267224    else { 
    268225        $success = &doc_to_html($input_filename, $output_filestem);     
     
    288245    if ($windows_scripting) { 
    289246        $success = &native_doc_to_html($input_filename, $output_filestem); 
    290     } 
    291     elsif ($openoffice_scripting) { 
    292         $success = &openoffice_doc_to_html($input_filename, $output_filestem); 
    293247    } 
    294248    else { 
     
    402356    my $ppt_convert_type = ""; 
    403357 
    404     if ($openoffice_scripting) { 
    405     # Jump right in and process with Open Office 
    406         if (openoffice_doc_to_html($input_filename, $output_filestem)) { 
    407         return "html"; 
    408     } 
    409     else { 
    410         return "fail"; 
    411     } 
    412     } 
    413      
    414358    #if (!$output_type || $windows_scripting || ($output_type !~ m/html/i) || ($output_type !~ m/text/i)){ 
    415359    if ($windows_scripting && ($output_type !~ m/html/i) && ($output_type !~ m/text/i)){ 
     
    475419    my $success = 0; 
    476420 
    477     if ($openoffice_scripting) { 
    478     # Jump right in and process with Open Office 
    479         if (openoffice_doc_to_html($input_filename, $output_filestem)) { 
    480         return "html"; 
    481     } 
    482     else { 
    483         return "fail"; 
    484     } 
    485     } 
    486      
    487421    # Attempt conversion to HTML 
    488422    if (!$output_type || ($output_type =~ m/html/i)) { 
     
    897831} 
    898832 
    899 # Attempt to convert a word document to html with JODConvert scripting program 
    900 sub openoffice_doc_to_html { 
    901     my ($input_filename, $output_filestem) = @_; 
    902  
    903     if (-e "$output_filestem.html") { 
    904     print STDERR "    The conversion file:\n"; 
    905     print STDERR "      $output_filestem.html\n"; 
    906     print STDERR "    ... skipping\n"; 
    907     return 1; 
    908     } 
    909  
    910     my $oo_script_dir = &util::filename_cat($ENV{'GEXT_OPENOFFICE'}, "bin", "script"); 
    911     my $oo2html = &util::filename_cat($oo_script_dir,"oo2html"); 
    912     if (!-e $oo2html) { 
    913     print STDERR "Error: Unable to find 'oo2html' in: \n"; 
    914     print STDERR "       $oo_script_dir\n"; 
    915     print STDERR "       Is the OpenOffice extension to Greenstone installed?\n"; 
    916     return 0; 
    917     } 
    918  
    919     my $cmd = ""; 
    920     if ($timeout) {$cmd = "ulimit -t $timeout;";} 
    921     $cmd .=  "$oo2html \"$input_filename\" \"$output_filestem.html\""; 
    922  
    923     # redirecting STDERR 
    924     $cmd .= " 2> \"$output_filestem.err\"" 
    925     if ($ENV {'GSDLOS'} !~ m/^windows$/i || $is_winnt_2000); 
    926      
    927     # execute the command 
    928     $!=0; 
    929     if (system($cmd)!=0) 
    930     { 
    931     print STDERR "Error executing oo2html converter: $!\n"; 
    932     print STDERR "Command was: $cmd\n"; 
    933  
    934     if (-s "$output_filestem.err") { 
    935         open (ERRFILE, "<$output_filestem.err"); 
    936          
    937         my $write_to_fail_log=0; 
    938         if ($faillogfile ne "" && defined(open(FAILLOG,">>$faillogfile"))) 
    939         {$write_to_fail_log=1;} 
    940  
    941         my $line; 
    942         while ($line=<ERRFILE>) { 
    943         if ($line =~ m/\w/) { 
    944             print STDERR "$line"; 
    945             print FAILLOG "$line" if ($write_to_fail_log); 
    946         } 
    947         if ($line !~ m/startup error/) {next;} 
    948         print STDERR " (given an invalid .DOC file?)\n"; 
    949         print FAILLOG " (given an invalid .DOC file?)\n" 
    950         if ($write_to_fail_log); 
    951          
    952         } # while ERRFILE 
    953         close FAILLOG if ($write_to_fail_log); 
    954     } 
    955     return 0; # we can try any_to_text 
    956     } 
    957  
    958     # Was the conversion successful? 
    959     if (-s "$output_filestem.html") { 
    960     open(TMP, "$output_filestem.html"); 
    961     my $line = <TMP>; 
    962     close(TMP); 
    963     if ($line && $line =~ m/html/i) { 
    964         &util::rm("$output_filestem.err") if -e "$output_filestem.err"; 
    965         return 1; 
    966     } 
    967     } 
    968      
    969     # If here, an error of some sort occurred 
    970      
    971     &util::rm("$output_filestem.html") if -e "$output_filestem.html"; 
    972     if (-e "$output_filestem.err") { 
    973     if ($faillogfile ne "" && defined(open(FAILLOG,">>$faillogfile"))) { 
    974         open (ERRLOG,"$output_filestem.err"); 
    975         while (<ERRLOG>) {print FAILLOG $_;} 
    976         close FAILLOG; 
    977         close ERRLOG; 
    978     } 
    979     &util::rm("$output_filestem.err"); 
    980     } 
    981     return 0; 
    982 } 
    983  
    984833# Attempt to convert an RTF document to html with rtftohtml 
    985834sub rtf_to_html {