1 | Option Explicit
|
---|
2 |
|
---|
3 | ' http://www.robvanderwoude.com/vbstech_automation_word.php
|
---|
4 | ' http://www.nilpo.com/2008/06/windows-scripting/reading-word-documents-in-wsh/ - for grabbing just the text (cleaned of Word mark-up) from a doc(x)
|
---|
5 | ' http://msdn.microsoft.com/en-us/library/3ca8tfek%28v=VS.85%29.aspx - VBScript Functions (CreateObject etc)
|
---|
6 | ' http://msdn.microsoft.com/en-us/library/aa220734%28v=office.11%29.aspx - SaveAs Method. Expand "WdSaveFormat" section to see all the default filetypes Office 2003+ can save as
|
---|
7 |
|
---|
8 | ' Error Handling:
|
---|
9 | ' http://blogs.msdn.com/b/ericlippert/archive/2004/08/19/error-handling-in-vbscript-part-one.aspx
|
---|
10 | ' http://msdn.microsoft.com/en-us/library/53f3k80h%28v=VS.85%29.aspx
|
---|
11 |
|
---|
12 |
|
---|
13 | ' To Do:
|
---|
14 | ' +1. error output on bad input to this file. And commit.
|
---|
15 | ' +1b. Active X error msg when trying to convert normal *.doc: only when windows scripting is on and Word not installed.
|
---|
16 | ' +1c. Make docx accepted by default as well. Changed WordPlugin.
|
---|
17 | ' 2. Try converting from other office types (xlsx, pptx) to html. They may use other constants for conversion filetypes
|
---|
18 | ' 3. gsConvert.pl's any_to_txt can be implemented for docx by getting all the text contents. Use a separate subroutine for this. Or use wdFormatUnicodeText as outputformat.
|
---|
19 | ' 4. Try out this script on Windows 7 to see whether WSH is active by default, as it is on XP and Vista.
|
---|
20 | ' 5. What kind of error occurs if any when user tries to convert docx on a machine with an old version of Word (pre-docx/pre-Word 2007)?
|
---|
21 | ' 6. Ask Dr Bainbridge whether this script can or shouldn't replace word2html, since this can launch all versions of word (not just 2007) I think.
|
---|
22 | ' Unless some commands have changed? Including for other Office apps, in which case word2html would remain the correct program to use for those cases.
|
---|
23 |
|
---|
24 |
|
---|
25 | ' gsConvert.pl expects error output to go to the console's STDERR
|
---|
26 | ' for which we need to launch this vbs with "CScript //Nologo" '(cannot use WScript if using StdErr
|
---|
27 | ' and //Nologo is needed to repress Microsoft logo text output which messes up error reporting)
|
---|
28 | ' http://www.devguru.com/technologies/wsh/quickref/wscript_StdErr.html
|
---|
29 | Dim objStdErr, args
|
---|
30 | Set objStdErr = WScript.StdErr
|
---|
31 |
|
---|
32 | args = WScript.Arguments.Count
|
---|
33 | If args < 2 then
|
---|
34 | 'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
|
---|
35 | objStdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input office doc path] [output html path]" & vbCrLf)
|
---|
36 | WScript.Quit
|
---|
37 | end If
|
---|
38 |
|
---|
39 | ' Now run the conversion subroutine
|
---|
40 | Doc2HTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
|
---|
41 | ' In terminal, run as: > docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html
|
---|
42 | ' In terminal, run as: > CScript //Nologo docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html
|
---|
43 | ' if you want echoed error output to go to console (instead of creating a popup) and to avoid 2 lines of MS logo.
|
---|
44 | ' Will be using WScript.StdErr object to make error output go to stderr of CScript console (can't launch with WScript).
|
---|
45 | ' http://www.devguru.com/technologies/wsh/quickref/wscript_StdErr.html
|
---|
46 |
|
---|
47 |
|
---|
48 | Sub Doc2HTML( inFile, outHTML )
|
---|
49 | ' This subroutine opens a Word document,
|
---|
50 | ' then saves it as HTML, and closes Word.
|
---|
51 | ' If the HTML file exists, it is overwritten.
|
---|
52 | ' If Word was already active, the subroutine
|
---|
53 | ' will leave the other document(s) alone and
|
---|
54 | ' close only its "own" document.
|
---|
55 | '
|
---|
56 | ' Written by Rob van der Woude
|
---|
57 | ' http://www.robvanderwoude.com
|
---|
58 | ' Standard housekeeping
|
---|
59 | Dim objDoc, objFile, objFSO, objWord, strFile
|
---|
60 |
|
---|
61 | Const wdFormatDocument = 0
|
---|
62 | Const wdFormatDocument97 = 0
|
---|
63 | Const wdFormatDocumentDefault = 16
|
---|
64 | Const wdFormatDOSText = 4
|
---|
65 | Const wdFormatDOSTextLineBreaks = 5
|
---|
66 | Const wdFormatEncodedText = 7
|
---|
67 | Const wdFormatFilteredHTML = 10
|
---|
68 | Const wdFormatFlatXML = 19
|
---|
69 | Const wdFormatFlatXMLMacroEnabled = 20
|
---|
70 | Const wdFormatFlatXMLTemplate = 21
|
---|
71 | Const wdFormatFlatXMLTemplateMacroEnabled = 22
|
---|
72 | Const wdFormatHTML = 8
|
---|
73 | Const wdFormatPDF = 17
|
---|
74 | Const wdFormatRTF = 6
|
---|
75 | Const wdFormatTemplate = 1
|
---|
76 | Const wdFormatTemplate97 = 1
|
---|
77 | Const wdFormatText = 2
|
---|
78 | Const wdFormatTextLineBreaks = 3
|
---|
79 | Const wdFormatUnicodeText = 7
|
---|
80 | Const wdFormatWebArchive = 9
|
---|
81 | Const wdFormatXML = 11
|
---|
82 | Const wdFormatXMLDocument = 12
|
---|
83 | Const wdFormatXMLDocumentMacroEnabled = 13
|
---|
84 | Const wdFormatXMLTemplate = 14
|
---|
85 | Const wdFormatXMLTemplateMacroEnabled = 15
|
---|
86 | Const wdFormatXPS = 18
|
---|
87 |
|
---|
88 | ' Create a File System object
|
---|
89 | Set objFSO = CreateObject( "Scripting.FileSystemObject" )
|
---|
90 |
|
---|
91 | ' Create a Word object. Exit with error msg if not possible (such as when Word is not installed)
|
---|
92 | On Error Resume Next
|
---|
93 | Set objWord = CreateObject( "Word.Application" )
|
---|
94 | If CStr(Err.Number) = 429 Then ' 429 is the error code for "ActiveX component can't create object"
|
---|
95 | ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx
|
---|
96 | 'WScript.Echo "Microsoft Word cannot be found -- document conversion cannot take place. Error #" & CStr(Err.Number) & ": " & Err.Description & "." & vbCrLf
|
---|
97 | objStdErr.Write ("ERROR: Windows-scripting failed. Document conversion cannot take place:" & vbCrLf)
|
---|
98 | objStdErr.Write (" Microsoft Word cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)
|
---|
99 | objStdErr.Write (" For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
|
---|
100 | Exit Sub
|
---|
101 | End If
|
---|
102 |
|
---|
103 | With objWord
|
---|
104 | ' True: make Word visible; False: invisible
|
---|
105 | .Visible = False
|
---|
106 |
|
---|
107 | ' Check if the Word document exists
|
---|
108 | If objFSO.FileExists( inFile ) Then
|
---|
109 | Set objFile = objFSO.GetFile( inFile )
|
---|
110 | strFile = objFile.Path
|
---|
111 | Else
|
---|
112 | 'WScript.Echo "FILE OPEN ERROR: The file does not exist" & vbCrLf
|
---|
113 | objStdErr.Write ("ERROR: Windows-scripting failed. Cannot open " & inFile & ". The file does not exist. ")
|
---|
114 | ' Close Word
|
---|
115 | .Quit
|
---|
116 | Exit Sub
|
---|
117 | End If
|
---|
118 |
|
---|
119 | 'outHTML = objFSO.BuildPath( objFile.ParentFolder, _
|
---|
120 | ' objFSO.GetBaseName( objFile ) & ".html" )
|
---|
121 |
|
---|
122 | ' Open the Word document
|
---|
123 | .Documents.Open strFile
|
---|
124 |
|
---|
125 | ' Make the opened file the active document
|
---|
126 | Set objDoc = .ActiveDocument
|
---|
127 |
|
---|
128 | ' Save as HTML -- http://msdn.microsoft.com/en-us/library/aa220734%28v=office.11%29.aspx
|
---|
129 | objDoc.SaveAs outHTML, wdFormatFilteredHTML
|
---|
130 |
|
---|
131 | ' Close the active document
|
---|
132 | objDoc.Close
|
---|
133 |
|
---|
134 | ' Close Word
|
---|
135 | .Quit
|
---|
136 | End With
|
---|
137 | End Sub |
---|