source: main/trunk/binaries/windows/bin/pptextract.vbs@ 30572

Last change on this file since 30572 was 30572, checked in by ak19, 8 years ago

Removed stale files pptextract.exe, pptextract.frm, pptextract.vbp (visual basic project file), pptextract.vbw

  • Property svn:executable set to *
File size: 11.8 KB
RevLine 
[30572]1' This file is a port to VBScript from VB of the files pptextract.exe, pptextract.frm
2' (and pptextract.vbp, pptextract.vbw), which have been removed after revision 30571.
[28354]3
4' It was hard to upgrade the VB pptextract.frm form script in Visual Studio
5' to the current Visual Basic, and some packages it needed wouldn't install,
6' making it hard to compile up.
7
8' As this VBScript doesn't need to be compiled up, it may be easier to maintain.
9
10' For differences between VBScript and VB, see
11' http://msdn.microsoft.com/en-us/library/ms970436.aspx
12' http://www.htmlgoodies.com/beyond/asp/vbs-ref/article.php/3458611/Key-Differences-Between-VB-and-VB-Script.htm
13' (Note that VBScript does support reading and writing to files)
14
15
16
[28353]17'Option Explicit
18'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
19
20' Run as: CScript //Nologo <script> args
21' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
22' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
23' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
24' If you want the code to work, you have to use cscript.exe to run it.
25
[28354]26' This is a CScript (console-only). If launched in WScript mode, run as CScript anyway
27' From: http://stackoverflow.com/questions/4692542/force-a-vbs-to-run-using-cscript-instead-of-wscript
28Sub forceCScriptExecution
29 Dim Arg, Str
30 If Not LCase( Right( WScript.FullName, 12 ) ) = "\cscript.exe" Then
31 For Each Arg In WScript.Arguments
32 If InStr( Arg, " " ) Then Arg = """" & Arg & """"
33 Str = Str & " " & Arg
34 Next
35 CreateObject( "WScript.Shell" ).Run _
36 "cscript //nologo """ & _
37 WScript.ScriptFullName & _
38 """ " & Str
39 WScript.Quit
40 End If
41End Sub
42forceCScriptExecution
[28353]43
[28354]44' Where this script actually starts
[28353]45Dim args
46args = WScript.Arguments.Count
47If args < 2 Or args > 3 Then
48 'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
49 WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
50 WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
51 WScript.Quit
52end If
53
54'set ppAPP to a power point application
55Dim ppApp
56Set ppApp = CreateObject("PowerPoint.Application")
57If CStr(Err.Number) = 429 Then ' 429 is the error code for "ActiveX component can't create object"
58 ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx
59 WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
60 WScript.StdErr.Write (" Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)
61 WScript.StdErr.Write (" For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
62 WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
63End If
64
65' Declare COM interface constants for PPT File SaveAs types
66' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
67' http://msdn.microsoft.com/en-us/library/ff746500.aspx
68 Const ppSaveAsPresentation = 1 '&H1
69 Const ppSaveAsPowerPoint7 = 2 '&H2
70 Const ppSaveAsPowerPoint4 = 3 '&H3
71 Const ppSaveAsPowerPoint3 = 4 '&H4
72 Const ppSaveAsTemplate = 5 '&H5
73 Const ppSaveAsRTF = 6 '&H6
74 Const ppSaveAsShow = 7 '&H7
75 Const ppSaveAsAddIn = 8 '&H8
76 Const ppSaveAsPowerPoint4FarEast = 10 '&HA
77 Const ppSaveAsDefault = 11 '&HB
78 Const ppSaveAsHTML = 12 '&HC
79 Const ppSaveAsHTMLv3 = 13 '&HD
80 Const ppSaveAsHTMLDual = 14 '&HE
81 Const ppSaveAsMetaFile = 15 '&HF
82 Const ppSaveAsGIF = 16 '&H10
83 Const ppSaveAsJPG = 17 '&H11
84 Const ppSaveAsPNG = 18 '&H12
85 Const ppSaveAsBMP = 19 '&H13
86 Const ppSaveAsWebArchive = 20 '&H14
87 Const ppSaveAsTIF = 21 '&H15
88 Const ppSaveAsPresForReview = 22 '&H16
89 Const ppSaveAsEMF = 23 '&H17
90
91' Now run the conversion subroutine
92
93If args = 2 Then
94PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
95Else
96PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
97End If
98
99' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
100' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
101' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
102' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
103' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
104' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
105Sub PPTtoHTML(inFile, outHTML)
106 'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
107 ' Open the ppt document
108 ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
109 Dim prsPres
110 Set prsPres = ppApp.ActivePresentation
111 'Call the SaveAs method of Presentation object and specify the format as HTML
112 prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
113 ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
114 'Close the Presentation object
115 prsPres.Close()
116 'Close the Application object
117 ppApp.Quit()
118
119End Sub
120
121' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
122' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
123' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
124' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
125' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
126Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
127 ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
[28354]128 'WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
[28353]129
130 Dim fso
131 Set fso = CreateObject("Scripting.FileSystemObject")
132
133 Dim outputDir, itemFile
134
135 outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript
136 itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
137
[28354]138 'WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)
[28353]139
140 If Not fso.FolderExists(outputDir) Then
141 fso.CreateFolder(outputDir)
142 Else
143 WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)
144 End If
145
146 Select Case outputType
147 Case "-g" outputType = "gif"
148 Case "-gif" outputType = "gif"
149 Case "gif" outputType = "gif"
150 Case "-j" outputType = "jpg"
151 Case "-jpg" outputType = "jpg"
152 Case "jpg" outputType = "jpg"
153 Case "-p" outputType = "png"
154 Case "-png" outputType = "png"
155 Case "png" outputType = "png"
156 'Case "htm"
157 'Not developed for converting to html yet
158 'Currently, if the user choose to convert the PPT to the html file
159 'We will only allow users to use the open source method through Greenstone
160 End Select
161
162
163 'generate document_name.item file
164 itemFile = outFileStem + "\" + itemFile + ".item"
165
[28354]166 'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, default is Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
167 ' The default file-write methods in VBScript all create UTF16 Little Endian (USC-2LE) files like Notepad's default, rather than the UTF-8 we want
168 ' Writing out to a file in UTF-8 is achieved as at: http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
[28353]169 Dim item
170 Set item = CreateObject("ADODB.Stream")
171 item.CharSet = "utf-8"
172 item.Open
173
174 'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
175
176
177'do stuff
178 Dim objPA
179 Set objPA = CreateObject("PowerPoint.Application")
180 objPA.Visible = True
181
182 Dim objPPTs
183 Set objPPTs = objPA.Presentations
184 objPPTs.Open (inFileName)
185
186 item.WriteText "<PagedDocument>", 1
187
188 n = 1
189 Dim slide_shape
190 For slide_count = 1 To objPPTs(1).Slides.Count
191 current_slide = objPPTs(1).Slides(slide_count).Name
192 'generate a text version
193 'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
194 Dim text
195 Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
196 text.CharSet = "utf-8"
197 text.Open
198 If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
199 slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
200 Else
201 slide_title = objPPTs(1).Slides(slide_count).Name
202 End If
203 slide_text = ""
204 found_text = True
205 For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
206 If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
207 slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
208 'MsgBox ("Slide_text:" + slide_text)
209 If slide_text <> "" Then
210 text.WriteText slide_text, 1
211 Else
212 text.WriteText "This slide has no text", 1
213 End If
214 End If
215 Next
216 text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
217 text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
218 text.Close
219
220 If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
221 next_slide = objPPTs(1).Slides(slide_count + 1).Name
222 Else
223 nextf = " "
224 End If
225
226 ' For the gif, png, jpg files:
227 current_slide = "Slide" + CStr(n)
228 If nextf <> " " Then
229 nextf = outputDir + "\" + "Slide" + CStr(n + 1)
230 End If
231 prevhtml = outputDir + "\" + current_slide + "." + outputType
232 If slide_text = "" Then
233 itemxml item, current_slide + "." + outputType, "", n, slide_title
234 Else
235 itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
236 End If
237
238 n = n + 1
239 item.WriteText " </Page>", 1
240 Next
241 i = 0
242 Select Case outputType
243 'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
244 Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
245 Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
246 Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
247 End Select
248
249 item.WriteText "</PagedDocument>", 1
250
251 item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
252 item.Close
253
254 objPPTs(1).Close
255 Set fso = Nothing
256 Set text = Nothing
257 Set item = Nothing
258 objPA.Quit
259End Sub
260
261
262
263Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
264 out_item.WriteText " <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1
265 If slide_title <> "" Then
266 out_item.WriteText " <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
267 'MsgBox ("Title:" + slide_title)
268 End If
269End Sub
Note: See TracBrowser for help on using the repository browser.