source: main/trunk/binaries/windows/bin/pptextract.vbs@ 28354

Last change on this file since 28354 was 28354, checked in by ak19, 11 years ago

This VBScript file is now replacing the old VB files (exe, frm form file and vbp project file). Otherwise this script was already working and writing out the item file and slide files, which were generated from converting ppt slides to txt, as UTF-8. This is where this script is preferable to the older VB executable, which had all the code that wrote out UTF-8 commented out. By default, file write methods in VB and VBScript write out UTF16 LE, which is not what Greenstone wants. Greenstone wants UTF-8. The differences from the last revision: 1. Added subroutine to run this script as CScript, which uses the console, in case this .vbs file (VBScript) is launched with WScript. 2. Better comments.

  • Property svn:executable set to *
File size: 11.8 KB
RevLine 
[28354]1' This file is a port to VBScript from VB of the pptextract.exe, pptextract.frm (and pptextract.vbp) files.
2
3' It was hard to upgrade the VB pptextract.frm form script in Visual Studio
4' to the current Visual Basic, and some packages it needed wouldn't install,
5' making it hard to compile up.
6
7' As this VBScript doesn't need to be compiled up, it may be easier to maintain.
8
9' For differences between VBScript and VB, see
10' http://msdn.microsoft.com/en-us/library/ms970436.aspx
11' http://www.htmlgoodies.com/beyond/asp/vbs-ref/article.php/3458611/Key-Differences-Between-VB-and-VB-Script.htm
12' (Note that VBScript does support reading and writing to files)
13
14
15
[28353]16'Option Explicit
17'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
18
19' Run as: CScript //Nologo <script> args
20' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
21' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
22' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
23' If you want the code to work, you have to use cscript.exe to run it.
24
[28354]25' This is a CScript (console-only). If launched in WScript mode, run as CScript anyway
26' From: http://stackoverflow.com/questions/4692542/force-a-vbs-to-run-using-cscript-instead-of-wscript
27Sub forceCScriptExecution
28 Dim Arg, Str
29 If Not LCase( Right( WScript.FullName, 12 ) ) = "\cscript.exe" Then
30 For Each Arg In WScript.Arguments
31 If InStr( Arg, " " ) Then Arg = """" & Arg & """"
32 Str = Str & " " & Arg
33 Next
34 CreateObject( "WScript.Shell" ).Run _
35 "cscript //nologo """ & _
36 WScript.ScriptFullName & _
37 """ " & Str
38 WScript.Quit
39 End If
40End Sub
41forceCScriptExecution
[28353]42
[28354]43' Where this script actually starts
[28353]44Dim args
45args = WScript.Arguments.Count
46If args < 2 Or args > 3 Then
47 'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
48 WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
49 WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
50 WScript.Quit
51end If
52
53'set ppAPP to a power point application
54Dim ppApp
55Set ppApp = CreateObject("PowerPoint.Application")
56If CStr(Err.Number) = 429 Then ' 429 is the error code for "ActiveX component can't create object"
57 ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx
58 WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
59 WScript.StdErr.Write (" Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)
60 WScript.StdErr.Write (" For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
61 WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
62End If
63
64' Declare COM interface constants for PPT File SaveAs types
65' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
66' http://msdn.microsoft.com/en-us/library/ff746500.aspx
67 Const ppSaveAsPresentation = 1 '&H1
68 Const ppSaveAsPowerPoint7 = 2 '&H2
69 Const ppSaveAsPowerPoint4 = 3 '&H3
70 Const ppSaveAsPowerPoint3 = 4 '&H4
71 Const ppSaveAsTemplate = 5 '&H5
72 Const ppSaveAsRTF = 6 '&H6
73 Const ppSaveAsShow = 7 '&H7
74 Const ppSaveAsAddIn = 8 '&H8
75 Const ppSaveAsPowerPoint4FarEast = 10 '&HA
76 Const ppSaveAsDefault = 11 '&HB
77 Const ppSaveAsHTML = 12 '&HC
78 Const ppSaveAsHTMLv3 = 13 '&HD
79 Const ppSaveAsHTMLDual = 14 '&HE
80 Const ppSaveAsMetaFile = 15 '&HF
81 Const ppSaveAsGIF = 16 '&H10
82 Const ppSaveAsJPG = 17 '&H11
83 Const ppSaveAsPNG = 18 '&H12
84 Const ppSaveAsBMP = 19 '&H13
85 Const ppSaveAsWebArchive = 20 '&H14
86 Const ppSaveAsTIF = 21 '&H15
87 Const ppSaveAsPresForReview = 22 '&H16
88 Const ppSaveAsEMF = 23 '&H17
89
90' Now run the conversion subroutine
91
92If args = 2 Then
93PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
94Else
95PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
96End If
97
98' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
99' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
100' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
101' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
102' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
103' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
104Sub PPTtoHTML(inFile, outHTML)
105 'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
106 ' Open the ppt document
107 ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
108 Dim prsPres
109 Set prsPres = ppApp.ActivePresentation
110 'Call the SaveAs method of Presentation object and specify the format as HTML
111 prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
112 ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
113 'Close the Presentation object
114 prsPres.Close()
115 'Close the Application object
116 ppApp.Quit()
117
118End Sub
119
120' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
121' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
122' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
123' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
124' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
125Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
126 ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
[28354]127 'WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
[28353]128
129 Dim fso
130 Set fso = CreateObject("Scripting.FileSystemObject")
131
132 Dim outputDir, itemFile
133
134 outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript
135 itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
136
[28354]137 'WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)
[28353]138
139 If Not fso.FolderExists(outputDir) Then
140 fso.CreateFolder(outputDir)
141 Else
142 WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)
143 End If
144
145 Select Case outputType
146 Case "-g" outputType = "gif"
147 Case "-gif" outputType = "gif"
148 Case "gif" outputType = "gif"
149 Case "-j" outputType = "jpg"
150 Case "-jpg" outputType = "jpg"
151 Case "jpg" outputType = "jpg"
152 Case "-p" outputType = "png"
153 Case "-png" outputType = "png"
154 Case "png" outputType = "png"
155 'Case "htm"
156 'Not developed for converting to html yet
157 'Currently, if the user choose to convert the PPT to the html file
158 'We will only allow users to use the open source method through Greenstone
159 End Select
160
161
162 'generate document_name.item file
163 itemFile = outFileStem + "\" + itemFile + ".item"
164
[28354]165 'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, default is Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
166 ' The default file-write methods in VBScript all create UTF16 Little Endian (USC-2LE) files like Notepad's default, rather than the UTF-8 we want
167 ' Writing out to a file in UTF-8 is achieved as at: http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
[28353]168 Dim item
169 Set item = CreateObject("ADODB.Stream")
170 item.CharSet = "utf-8"
171 item.Open
172
173 'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
174
175
176'do stuff
177 Dim objPA
178 Set objPA = CreateObject("PowerPoint.Application")
179 objPA.Visible = True
180
181 Dim objPPTs
182 Set objPPTs = objPA.Presentations
183 objPPTs.Open (inFileName)
184
185 item.WriteText "<PagedDocument>", 1
186
187 n = 1
188 Dim slide_shape
189 For slide_count = 1 To objPPTs(1).Slides.Count
190 current_slide = objPPTs(1).Slides(slide_count).Name
191 'generate a text version
192 'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
193 Dim text
194 Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
195 text.CharSet = "utf-8"
196 text.Open
197 If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
198 slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
199 Else
200 slide_title = objPPTs(1).Slides(slide_count).Name
201 End If
202 slide_text = ""
203 found_text = True
204 For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
205 If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
206 slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
207 'MsgBox ("Slide_text:" + slide_text)
208 If slide_text <> "" Then
209 text.WriteText slide_text, 1
210 Else
211 text.WriteText "This slide has no text", 1
212 End If
213 End If
214 Next
215 text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
216 text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
217 text.Close
218
219 If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
220 next_slide = objPPTs(1).Slides(slide_count + 1).Name
221 Else
222 nextf = " "
223 End If
224
225 ' For the gif, png, jpg files:
226 current_slide = "Slide" + CStr(n)
227 If nextf <> " " Then
228 nextf = outputDir + "\" + "Slide" + CStr(n + 1)
229 End If
230 prevhtml = outputDir + "\" + current_slide + "." + outputType
231 If slide_text = "" Then
232 itemxml item, current_slide + "." + outputType, "", n, slide_title
233 Else
234 itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
235 End If
236
237 n = n + 1
238 item.WriteText " </Page>", 1
239 Next
240 i = 0
241 Select Case outputType
242 'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
243 Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
244 Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
245 Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
246 End Select
247
248 item.WriteText "</PagedDocument>", 1
249
250 item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
251 item.Close
252
253 objPPTs(1).Close
254 Set fso = Nothing
255 Set text = Nothing
256 Set item = Nothing
257 objPA.Quit
258End Sub
259
260
261
262Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
263 out_item.WriteText " <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1
264 If slide_title <> "" Then
265 out_item.WriteText " <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
266 'MsgBox ("Title:" + slide_title)
267 End If
268End Sub
Note: See TracBrowser for help on using the repository browser.