root/main/trunk/binaries/windows/bin/pptextract.vbs @ 30572

Revision 30572, 11.8 KB (checked in by ak19, 4 years ago)

Removed stale files pptextract.exe, pptextract.frm, pptextract.vbp (visual basic project file), pptextract.vbw

  • Property svn:executable set to *
Line 
1' This file is a port to VBScript from VB of the files pptextract.exe, pptextract.frm
2' (and pptextract.vbp, pptextract.vbw), which have been removed after revision 30571.
3
4' It was hard to upgrade the VB pptextract.frm form script in Visual Studio
5' to the current Visual Basic, and some packages it needed wouldn't install,
6' making it hard to compile up.
7
8' As this VBScript doesn't need to be compiled up, it may be easier to maintain.
9
10' For differences between VBScript and VB, see
11' http://msdn.microsoft.com/en-us/library/ms970436.aspx
12' http://www.htmlgoodies.com/beyond/asp/vbs-ref/article.php/3458611/Key-Differences-Between-VB-and-VB-Script.htm
13' (Note that VBScript does support reading and writing to files)
14
15
16
17'Option Explicit
18'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
19
20' Run as: CScript //Nologo <script> args
21' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
22' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
23' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
24' If you want the code to work, you have to use cscript.exe to run it.
25
26' This is a CScript (console-only). If launched in WScript mode, run as CScript anyway
27' From: http://stackoverflow.com/questions/4692542/force-a-vbs-to-run-using-cscript-instead-of-wscript
28Sub forceCScriptExecution
29    Dim Arg, Str
30    If Not LCase( Right( WScript.FullName, 12 ) ) = "\cscript.exe" Then
31        For Each Arg In WScript.Arguments
32            If InStr( Arg, " " ) Then Arg = """" & Arg & """"
33            Str = Str & " " & Arg
34        Next
35        CreateObject( "WScript.Shell" ).Run _
36            "cscript //nologo """ & _
37            WScript.ScriptFullName & _
38            """ " & Str
39        WScript.Quit
40    End If
41End Sub
42forceCScriptExecution
43
44' Where this script actually starts
45Dim args
46args = WScript.Arguments.Count
47If args < 2 Or args > 3 Then
48  'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
49  WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
50  WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
51  WScript.Quit
52end If
53
54'set ppAPP to a power point application
55Dim ppApp
56Set ppApp = CreateObject("PowerPoint.Application")
57If CStr(Err.Number) = 429 Then  ' 429 is the error code for "ActiveX component can't create object"
58                                ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx               
59    WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
60    WScript.StdErr.Write ("   Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)       
61    WScript.StdErr.Write ("   For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
62    WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
63End If
64
65' Declare COM interface constants for PPT File SaveAs types
66' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
67' http://msdn.microsoft.com/en-us/library/ff746500.aspx
68  Const ppSaveAsPresentation = 1  '&H1
69  Const ppSaveAsPowerPoint7 = 2  '&H2
70  Const ppSaveAsPowerPoint4 = 3  '&H3
71  Const ppSaveAsPowerPoint3 = 4  '&H4
72  Const ppSaveAsTemplate = 5  '&H5
73  Const ppSaveAsRTF = 6  '&H6
74  Const ppSaveAsShow = 7  '&H7
75  Const ppSaveAsAddIn = 8  '&H8
76  Const ppSaveAsPowerPoint4FarEast = 10  '&HA
77  Const ppSaveAsDefault = 11  '&HB
78  Const ppSaveAsHTML = 12  '&HC
79  Const ppSaveAsHTMLv3 = 13  '&HD
80  Const ppSaveAsHTMLDual = 14  '&HE
81  Const ppSaveAsMetaFile = 15  '&HF
82  Const ppSaveAsGIF = 16  '&H10
83  Const ppSaveAsJPG = 17  '&H11
84  Const ppSaveAsPNG = 18  '&H12
85  Const ppSaveAsBMP = 19  '&H13
86  Const ppSaveAsWebArchive = 20  '&H14
87  Const ppSaveAsTIF = 21  '&H15
88  Const ppSaveAsPresForReview = 22  '&H16
89  Const ppSaveAsEMF = 23  '&H17
90 
91' Now run the conversion subroutine
92
93If args = 2 Then
94PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
95Else
96PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
97End If
98
99' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
100' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
101' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
102' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
103' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
104' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
105Sub PPTtoHTML(inFile, outHTML)
106    'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
107    ' Open the ppt document
108    ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
109    Dim prsPres
110    Set prsPres = ppApp.ActivePresentation
111    'Call the SaveAs method of Presentation object and specify the format as HTML
112    prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
113            ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
114    'Close the Presentation object
115    prsPres.Close()
116    'Close the Application object
117    ppApp.Quit()
118
119End Sub
120
121' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
122' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
123' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
124' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
125' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
126Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
127    ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
128    'WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
129   
130    Dim fso
131    Set fso = CreateObject("Scripting.FileSystemObject")
132   
133    Dim outputDir, itemFile
134   
135    outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript   
136    itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
137   
138    'WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)   
139   
140    If Not fso.FolderExists(outputDir) Then
141       fso.CreateFolder(outputDir) 
142    Else
143        WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)   
144    End If 
145   
146    Select Case outputType 
147        Case "-g"     outputType = "gif"
148        Case "-gif"   outputType = "gif"
149        Case "gif"    outputType = "gif"
150        Case "-j"     outputType = "jpg"
151        Case "-jpg"   outputType = "jpg"
152        Case "jpg"    outputType = "jpg"
153        Case "-p"     outputType = "png"
154        Case "-png"   outputType = "png"
155        Case "png"    outputType = "png"
156        'Case "htm"
157        'Not developed for converting to html yet
158        'Currently, if the user choose to convert the PPT to the html file
159        'We will only allow users to use the open source method through Greenstone     
160    End Select
161
162   
163    'generate document_name.item file
164    itemFile = outFileStem + "\" + itemFile + ".item"
165   
166    'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, default is Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
167    ' The default file-write methods in VBScript all create UTF16 Little Endian (USC-2LE) files like Notepad's default, rather than the UTF-8 we want
168    ' Writing out to a file in UTF-8 is achieved as at: http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6   
169    Dim item
170    Set item = CreateObject("ADODB.Stream")
171    item.CharSet = "utf-8"
172    item.Open
173   
174    'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
175
176
177'do stuff
178    Dim objPA
179    Set objPA = CreateObject("PowerPoint.Application")
180    objPA.Visible = True
181   
182    Dim objPPTs
183    Set objPPTs = objPA.Presentations
184    objPPTs.Open (inFileName)
185   
186    item.WriteText "<PagedDocument>", 1
187   
188    n = 1
189    Dim slide_shape
190    For slide_count = 1 To objPPTs(1).Slides.Count
191        current_slide = objPPTs(1).Slides(slide_count).Name
192        'generate a text version
193        'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
194        Dim text
195        Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
196        text.CharSet = "utf-8"
197        text.Open
198        If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
199            slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
200        Else
201            slide_title = objPPTs(1).Slides(slide_count).Name
202        End If
203        slide_text = ""
204        found_text = True
205        For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
206            If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
207                slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
208                'MsgBox ("Slide_text:" + slide_text)
209                If slide_text <> "" Then
210                    text.WriteText slide_text, 1
211                Else
212                    text.WriteText "This slide has no text", 1
213                End If
214            End If
215        Next
216        text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
217        text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
218        text.Close
219       
220        If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
221            next_slide = objPPTs(1).Slides(slide_count + 1).Name
222        Else
223            nextf = " "
224        End If
225       
226        ' For the gif, png, jpg files:
227        current_slide = "Slide" + CStr(n)
228        If nextf <> " " Then
229            nextf = outputDir + "\" + "Slide" + CStr(n + 1)
230        End If
231        prevhtml = outputDir + "\" + current_slide + "." + outputType
232        If slide_text = "" Then
233           itemxml item, current_slide + "." + outputType, "", n, slide_title
234        Else
235           itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
236        End If
237       
238        n = n + 1
239        item.WriteText "   </Page>", 1
240    Next
241    i = 0
242    Select Case outputType
243        'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
244        Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
245        Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
246        Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
247    End Select
248               
249    item.WriteText "</PagedDocument>", 1
250   
251    item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
252    item.Close
253   
254    objPPTs(1).Close
255    Set fso = Nothing
256    Set text = Nothing
257    Set item = Nothing
258    objPA.Quit
259End Sub
260
261
262
263Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
264    out_item.WriteText "   <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1   
265    If slide_title <> "" Then
266        out_item.WriteText "      <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
267        'MsgBox ("Title:" + slide_title)       
268    End If
269End Sub
Note: See TracBrowser for help on using the browser.