source: main/trunk/binaries/windows/bin/pptextract.vbs@ 28353

Last change on this file since 28353 was 28353, checked in by ak19, 8 years ago

VBScript version of VB pptextract.exe/frm/vbp. VBScript is easier to maintain and debug. The VB version is older now and Vis Studio is not able to upgrade it without errors and packages extra required packages.

  • Property svn:executable set to *
File size: 10.2 KB
Line 
1'Option Explicit
2'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
3
4' Run as: CScript //Nologo <script> args
5' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
6' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
7' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
8' If you want the code to work, you have to use cscript.exe to run it.
9
10
11Dim args
12args = WScript.Arguments.Count
13If args < 2 Or args > 3 Then
14 'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
15 WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
16 WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
17 WScript.Quit
18end If
19
20'set ppAPP to a power point application
21Dim ppApp
22Set ppApp = CreateObject("PowerPoint.Application")
23If CStr(Err.Number) = 429 Then ' 429 is the error code for "ActiveX component can't create object"
24 ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx
25 WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
26 WScript.StdErr.Write (" Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)
27 WScript.StdErr.Write (" For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
28 WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
29End If
30
31' Declare COM interface constants for PPT File SaveAs types
32' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
33' http://msdn.microsoft.com/en-us/library/ff746500.aspx
34 Const ppSaveAsPresentation = 1 '&H1
35 Const ppSaveAsPowerPoint7 = 2 '&H2
36 Const ppSaveAsPowerPoint4 = 3 '&H3
37 Const ppSaveAsPowerPoint3 = 4 '&H4
38 Const ppSaveAsTemplate = 5 '&H5
39 Const ppSaveAsRTF = 6 '&H6
40 Const ppSaveAsShow = 7 '&H7
41 Const ppSaveAsAddIn = 8 '&H8
42 Const ppSaveAsPowerPoint4FarEast = 10 '&HA
43 Const ppSaveAsDefault = 11 '&HB
44 Const ppSaveAsHTML = 12 '&HC
45 Const ppSaveAsHTMLv3 = 13 '&HD
46 Const ppSaveAsHTMLDual = 14 '&HE
47 Const ppSaveAsMetaFile = 15 '&HF
48 Const ppSaveAsGIF = 16 '&H10
49 Const ppSaveAsJPG = 17 '&H11
50 Const ppSaveAsPNG = 18 '&H12
51 Const ppSaveAsBMP = 19 '&H13
52 Const ppSaveAsWebArchive = 20 '&H14
53 Const ppSaveAsTIF = 21 '&H15
54 Const ppSaveAsPresForReview = 22 '&H16
55 Const ppSaveAsEMF = 23 '&H17
56
57' Now run the conversion subroutine
58
59If args = 2 Then
60PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
61Else
62PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
63End If
64
65' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
66' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
67' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
68' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
69' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
70' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
71Sub PPTtoHTML(inFile, outHTML)
72 'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
73 ' Open the ppt document
74 ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
75 Dim prsPres
76 Set prsPres = ppApp.ActivePresentation
77 'Call the SaveAs method of Presentation object and specify the format as HTML
78 prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
79 ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
80 'Close the Presentation object
81 prsPres.Close()
82 'Close the Application object
83 ppApp.Quit()
84
85End Sub
86
87' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
88' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
89' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
90' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
91' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
92Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
93 ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
94 WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
95
96 Dim fso
97 Set fso = CreateObject("Scripting.FileSystemObject")
98
99 Dim outputDir, itemFile
100
101 outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript
102 itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
103
104 WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)
105
106 If Not fso.FolderExists(outputDir) Then
107 fso.CreateFolder(outputDir)
108 Else
109 WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)
110 End If
111
112 Select Case outputType
113 Case "-g" outputType = "gif"
114 Case "-gif" outputType = "gif"
115 Case "gif" outputType = "gif"
116 Case "-j" outputType = "jpg"
117 Case "-jpg" outputType = "jpg"
118 Case "jpg" outputType = "jpg"
119 Case "-p" outputType = "png"
120 Case "-png" outputType = "png"
121 Case "png" outputType = "png"
122 'Case "htm"
123 'Not developed for converting to html yet
124 'Currently, if the user choose to convert the PPT to the html file
125 'We will only allow users to use the open source method through Greenstone
126 End Select
127
128
129 'generate document_name.item file
130 itemFile = outFileStem + "\" + itemFile + ".item"
131
132 'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
133
134 ' Writing out to a file in UTF-8 http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
135 Dim item
136 Set item = CreateObject("ADODB.Stream")
137 item.CharSet = "utf-8"
138 item.Open
139
140 'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
141
142
143'do stuff
144 Dim objPA
145 Set objPA = CreateObject("PowerPoint.Application")
146 objPA.Visible = True
147
148 Dim objPPTs
149 Set objPPTs = objPA.Presentations
150 objPPTs.Open (inFileName)
151
152 item.WriteText "<PagedDocument>", 1
153
154 n = 1
155 Dim slide_shape
156 For slide_count = 1 To objPPTs(1).Slides.Count
157 current_slide = objPPTs(1).Slides(slide_count).Name
158 'generate a text version
159 'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
160 Dim text
161 Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
162 text.CharSet = "utf-8"
163 text.Open
164 If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
165 slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
166 Else
167 slide_title = objPPTs(1).Slides(slide_count).Name
168 End If
169 slide_text = ""
170 found_text = True
171 For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
172 If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
173 slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
174 'MsgBox ("Slide_text:" + slide_text)
175 If slide_text <> "" Then
176 text.WriteText slide_text, 1
177 Else
178 text.WriteText "This slide has no text", 1
179 End If
180 End If
181 Next
182 text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
183 text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
184 text.Close
185
186 If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
187 next_slide = objPPTs(1).Slides(slide_count + 1).Name
188 Else
189 nextf = " "
190 End If
191
192 ' For the gif, png, jpg files:
193 current_slide = "Slide" + CStr(n)
194 If nextf <> " " Then
195 nextf = outputDir + "\" + "Slide" + CStr(n + 1)
196 End If
197 prevhtml = outputDir + "\" + current_slide + "." + outputType
198 If slide_text = "" Then
199 itemxml item, current_slide + "." + outputType, "", n, slide_title
200 Else
201 itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
202 End If
203
204 n = n + 1
205 item.WriteText " </Page>", 1
206 Next
207 i = 0
208 Select Case outputType
209 'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
210 Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
211 Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
212 Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
213 End Select
214
215 item.WriteText "</PagedDocument>", 1
216
217 item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
218 item.Close
219
220 objPPTs(1).Close
221 Set fso = Nothing
222 Set text = Nothing
223 Set item = Nothing
224 objPA.Quit
225End Sub
226
227
228
229Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
230 out_item.WriteText " <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1
231 If slide_title <> "" Then
232 out_item.WriteText " <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
233 'MsgBox ("Title:" + slide_title)
234 End If
235End Sub
Note: See TracBrowser for help on using the repository browser.