source: main/trunk/binaries/windows/bin/docx2html.vbs@ 24164

Last change on this file since 24164 was 24164, checked in by ak19, 13 years ago

Docx to html with windows Scripting. Thanks to Veronica who found a VBScript that can launch Word in the background and do the conversion of docX to HTML now (for when the user has word). Modified the script to work with cmd line parameters and to run Word in invisible mode. gsConvert.pl now calls on docx2html.vbs if the inputfile is docx, else it still calls the default word2html.exe VB program as before to do windows scripting.

File size: 3.4 KB
Line 
1Option Explicit
2
3'args = WScript.Arguments.Count
4'If args < 2 then
5' WScript.Echo usage: args.vbs argument [input docx path] [output html path]
6' WScript.Quit
7'end If
8'WScript.Echo WScript.Arguments.Item(0)
9'WScript.Echo WScript.Arguments.Item(1)
10
11Doc2HTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
12' In terminal, run as: > docx2html.vbs C:\fullpath\to\input.docx C:\fullpath\to\output.html
13
14
15' http://www.robvanderwoude.com/vbstech_automation_word.php
16' http://www.nilpo.com/2008/06/windows-scripting/reading-word-documents-in-wsh/
17
18Sub Doc2HTML( inFile, outHTML )
19' This subroutine opens a Word document,
20' then saves it as HTML, and closes Word.
21' If the HTML file exists, it is overwritten.
22' If Word was already active, the subroutine
23' will leave the other document(s) alone and
24' close only its "own" document.
25'
26' Written by Rob van der Woude
27' http://www.robvanderwoude.com
28 ' Standard housekeeping
29 Dim objDoc, objFile, objFSO, objWord, strFile
30
31 Const wdFormatDocument = 0
32 Const wdFormatDocument97 = 0
33 Const wdFormatDocumentDefault = 16
34 Const wdFormatDOSText = 4
35 Const wdFormatDOSTextLineBreaks = 5
36 Const wdFormatEncodedText = 7
37 Const wdFormatFilteredHTML = 10
38 Const wdFormatFlatXML = 19
39 Const wdFormatFlatXMLMacroEnabled = 20
40 Const wdFormatFlatXMLTemplate = 21
41 Const wdFormatFlatXMLTemplateMacroEnabled = 22
42 Const wdFormatHTML = 8
43 Const wdFormatPDF = 17
44 Const wdFormatRTF = 6
45 Const wdFormatTemplate = 1
46 Const wdFormatTemplate97 = 1
47 Const wdFormatText = 2
48 Const wdFormatTextLineBreaks = 3
49 Const wdFormatUnicodeText = 7
50 Const wdFormatWebArchive = 9
51 Const wdFormatXML = 11
52 Const wdFormatXMLDocument = 12
53 Const wdFormatXMLDocumentMacroEnabled = 13
54 Const wdFormatXMLTemplate = 14
55 Const wdFormatXMLTemplateMacroEnabled = 15
56 Const wdFormatXPS = 18
57
58 ' Create a File System object
59 Set objFSO = CreateObject( "Scripting.FileSystemObject" )
60
61 ' Create a Word object
62 Set objWord = CreateObject( "Word.Application" )
63
64 With objWord
65 ' True: make Word visible; False: invisible
66 .Visible = False
67
68 ' Check if the Word document exists
69 If objFSO.FileExists( inFile ) Then
70 Set objFile = objFSO.GetFile( inFile )
71 strFile = objFile.Path
72 Else
73 WScript.Echo "FILE OPEN ERROR: The file does not exist" & vbCrLf
74 ' Close Word
75 .Quit
76 Exit Sub
77 End If
78
79 'outHTML = objFSO.BuildPath( objFile.ParentFolder, _
80 ' objFSO.GetBaseName( objFile ) & ".html" )
81 'outHTML = outFile
82
83 ' Open the Word document
84 .Documents.Open strFile
85
86 ' Make the opened file the active document
87 Set objDoc = .ActiveDocument
88
89 ' Save as HTML
90 objDoc.SaveAs outHTML, wdFormatFilteredHTML
91
92 ' Close the active document
93 objDoc.Close
94
95 ' Close Word
96 .Quit
97 End With
98End Sub
Note: See TracBrowser for help on using the repository browser.