Changeset 24166

Show
Ignore:
Timestamp:
16.06.2011 19:16:15 (8 years ago)
Author:
ak19
Message:

2nd and tentatively final set of changes changes to get the new docx2html functionality to work on docx files. The changes have to do with error reporting when Word is not installed/can't be found/can't be instantiated, when the script is launched with the wrong number of args and if the input file does not exist. WordPlugin? now has docx as part of the default process_expression (even when OO is not installed).

Location:
main/trunk
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/binaries/windows/bin/docx2html.vbs

    r24164 r24166  
    11Option Explicit 
    22 
    3 'args = WScript.Arguments.Count 
    4 'If args < 2 then 
    5 '  WScript.Echo usage: args.vbs argument [input docx path] [output html path] 
    6 '  WScript.Quit 
    7 'end If 
    8 'WScript.Echo WScript.Arguments.Item(0) 
    9 'WScript.Echo WScript.Arguments.Item(1) 
     3' http://www.robvanderwoude.com/vbstech_automation_word.php 
     4' http://www.nilpo.com/2008/06/windows-scripting/reading-word-documents-in-wsh/ - for grabbing just the text (cleaned of Word mark-up) from a doc(x) 
     5' http://msdn.microsoft.com/en-us/library/3ca8tfek%28v=VS.85%29.aspx - VBScript Functions (CreateObject etc) 
    106 
    11 Doc2HTML WScript.Arguments.Item(0),WScript.Arguments.Item(1) 
    12 ' In terminal, run as: > docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html 
     7' Error Handling: 
     8' http://blogs.msdn.com/b/ericlippert/archive/2004/08/19/error-handling-in-vbscript-part-one.aspx 
     9' http://msdn.microsoft.com/en-us/library/53f3k80h%28v=VS.85%29.aspx  
    1310 
    1411 
    15 ' http://www.robvanderwoude.com/vbstech_automation_word.php 
    16 ' http://www.nilpo.com/2008/06/windows-scripting/reading-word-documents-in-wsh/ 
     12' To Do: 
     13' +1. error output on bad input to this file. And commit. 
     14' +1b. Active X error msg when trying to convert normal *.doc: only when windows scripting is on and Word not installed. 
     15' +1c. Make docx accepted by default as well. Changed WordPlugin. 
     16' 2. Try converting from other office types (xlsx, pptx) to html. They may use other constants for conversion filetypes 
     17' 3. gsConvert.pl's any_to_txt can be implemented for docx by getting all the text contents. Use a separate subroutine for this. Or use wdFormatUnicodeText as outputformat. 
     18' 4. Try out this script on Windows 7 to see whether WSH is active by default, as it is on XP and Vista. 
     19' 5. What kind of error occurs if any when user tries to convert docx on a machine with an old version of Word (pre-docx/pre-Word 2007)? 
     20' 6. Ask Dr Bainbridge whether this script can or shouldn't replace word2html, since this launches all version of word as well I think. 
     21 
     22 
     23' gsConvert.pl expects error output to go to the console's STDERR  
     24' for which we need to launch this vbs with "CScript //Nologo" '(cannot use WScript if using StdErr 
     25' and //Nologo is needed to repress Microsoft logo text output which messes up error reporting) 
     26' http://www.devguru.com/technologies/wsh/quickref/wscript_StdErr.html 
     27Dim objStdErr, args 
     28Set objStdErr = WScript.StdErr 
     29 
     30args = WScript.Arguments.Count 
     31If args < 2 then 
     32  'WScript.Echo Usage: args.vbs argument [input docx path] [output html path] 
     33  objStdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input office doc path] [output html path]" & vbCrLf) 
     34  WScript.Quit 
     35end If 
     36 
     37' Now run the conversion subroutine 
     38Doc2HTML WScript.Arguments.Item(0),WScript.Arguments.Item(1) 
     39    ' In terminal, run as: > docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html 
     40    ' In terminal, run as: > CScript //Nologo docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html 
     41    ' if you want echoed error output to go to console (instead of creating a popup) and to avoid 2 lines of MS logo. 
     42    ' Will be using WScript.StdErr object to make error output go to stderr of CScript console (can't launch with WScript). 
     43    ' http://www.devguru.com/technologies/wsh/quickref/wscript_StdErr.html 
     44 
    1745 
    1846Sub Doc2HTML( inFile, outHTML ) 
     
    5583    Const wdFormatXMLTemplateMacroEnabled     = 15 
    5684    Const wdFormatXPS                         = 18 
    57  
     85     
    5886    ' Create a File System object 
    5987    Set objFSO = CreateObject( "Scripting.FileSystemObject" ) 
    6088 
    61     ' Create a Word object 
     89    ' Create a Word object. Exit with error msg if not possible (such as when Word is not installed) 
     90    On Error Resume Next 
    6291    Set objWord = CreateObject( "Word.Application" ) 
     92    If CStr(Err.Number) = 429 Then  ' 429 is the error code for "ActiveX component can't create object"  
     93                                    ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx         
     94        'WScript.Echo "Microsoft Word cannot be found -- document conversion cannot take place. Error #" & CStr(Err.Number) & ": " & Err.Description & "." & vbCrLf 
     95        objStdErr.Write ("ERROR: Windows-scripting failed. Document conversion cannot take place:" & vbCrLf)  
     96        objStdErr.Write ("   Microsoft Word cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)       
     97        objStdErr.Write ("   For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)  
     98        Exit Sub 
     99    End If 
    63100 
    64101    With objWord 
     
    71108            strFile = objFile.Path 
    72109        Else 
    73             WScript.Echo "FILE OPEN ERROR: The file does not exist" & vbCrLf 
     110            'WScript.Echo "FILE OPEN ERROR: The file does not exist" & vbCrLf 
     111            objStdErr.Write ("ERROR: Windows-scripting failed. Cannot open " & inFile & ". The file does not exist. ") 
    74112            ' Close Word 
    75113            .Quit 
     
    79117        'outHTML = objFSO.BuildPath( objFile.ParentFolder, _ 
    80118        '          objFSO.GetBaseName( objFile ) & ".html" ) 
    81     'outHTML = outFile 
    82119 
    83120        ' Open the Word document 
  • main/trunk/greenstone2/bin/script/gsConvert.pl

    r24164 r24166  
    786786    my ($input_filename, $output_filestem) = @_; 
    787787 
    788     my $vbScript = &util::filename_cat($ENV{'GSDLHOME'}, "bin", 
    789                        $ENV{'GSDLOS'}, "word2html"); 
     788    # build up the path to the doc-to-html conversion tool we're going to use 
     789    my $vbScript = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}); 
    790790 
    791791    if ($ENV{'GSDLOS'} =~ m/^windows$/i) { 
    792     # if windows scripting with docx input, use new VBscript 
    793     if($input_filename =~ m/docx$/i) { 
    794         $vbScript = "docx2html.vbs";  
    795     } else { # use the usual word2html conversion VB executable 
    796         $vbScript = "word2html"; 
    797     } 
    798     } 
     792        # if windows scripting with docx input, use new VBscript to get the local Word install (if 
     793        # any) to do the conversion, since docX can't be processed by word2html's windows_scripting 
     794         
     795        if($input_filename =~ m/docx$/i) {  # need to use full path to docx2html script,  
     796                                            # else script launch fails when there are error msgs 
     797            $vbScript = &util::filename_cat($vbScript, "docx2html.vbs");  
     798            $vbScript = "CScript //Nologo \"$vbScript\"";   # launche with CScript for error output in STDERR 
     799                                                            # //Nologo flag avoids Microsoft's opening/logo msgs 
     800        }  
     801        else {  # old doc versions. use the usual VB executable word2html for the 
     802                # conversion. Doesn't need full path, since bin\windows is on PATH           
     803            $vbScript = "word2html"; #$vbScript = "\"".&util::filename_cat($vbScript, "word2html")."\""; 
     804        } 
     805    }  
     806    else { # not windows 
     807        $vbScript = "\"".&util::filename_cat($vbScript, "word2html")."\""; 
     808    } 
    799809 
    800810    if (-e "$output_filestem.html") { 
     
    812822 
    813823    # redirecting STDERR 
    814     $cmd .= " 2> \"$output_filestem.err\"" 
    815     if ($ENV {'GSDLOS'} !~ m/^windows$/i || $is_winnt_2000); 
    816      
     824     
     825    $cmd .= " 2> \"$output_filestem.err\"" 
     826        if ($ENV {'GSDLOS'} !~ m/^windows$/i || $is_winnt_2000);     
     827    #print STDERR "@@@@@@@@@ cmd=$cmd\n"; 
     828     
    817829    # execute the command 
    818830    $!=0; 
     
    822834    if (-s "$output_filestem.err") { 
    823835        open (ERRFILE, "<$output_filestem.err"); 
    824          
     836         
    825837        my $write_to_fail_log=0; 
    826838        if ($faillogfile ne "" && defined(open(FAILLOG,">>$faillogfile"))) 
  • main/trunk/greenstone2/perllib/plugins/WordPlugin.pm

    r23751 r24166  
    239239    return q^(?i)\.(doc|dot|docx|odt|wpd)$^; 
    240240    } 
    241     return q^(?i)\.(doc|dot)$^; 
     241    return q^(?i)\.(docx?|dot)$^; 
    242242} 
    243243