root/main/trunk/binaries/windows/bin/pptextract.vbs @ 28353

Revision 28353, 10.2 KB (checked in by ak19, 6 years ago)

VBScript version of VB pptextract.exe/frm/vbp. VBScript is easier to maintain and debug. The VB version is older now and Vis Studio is not able to upgrade it without errors and packages extra required packages.

  • Property svn:executable set to *
Line 
1'Option Explicit
2'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
3
4' Run as: CScript //Nologo <script> args
5' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
6' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
7' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
8' If you want the code to work, you have to use cscript.exe to run it.
9
10
11Dim args
12args = WScript.Arguments.Count
13If args < 2 Or args > 3 Then
14  'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
15  WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
16  WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
17  WScript.Quit
18end If
19
20'set ppAPP to a power point application
21Dim ppApp
22Set ppApp = CreateObject("PowerPoint.Application")
23If CStr(Err.Number) = 429 Then  ' 429 is the error code for "ActiveX component can't create object"
24                                ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx               
25    WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
26    WScript.StdErr.Write ("   Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)       
27    WScript.StdErr.Write ("   For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
28    WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
29End If
30
31' Declare COM interface constants for PPT File SaveAs types
32' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
33' http://msdn.microsoft.com/en-us/library/ff746500.aspx
34  Const ppSaveAsPresentation = 1  '&H1
35  Const ppSaveAsPowerPoint7 = 2  '&H2
36  Const ppSaveAsPowerPoint4 = 3  '&H3
37  Const ppSaveAsPowerPoint3 = 4  '&H4
38  Const ppSaveAsTemplate = 5  '&H5
39  Const ppSaveAsRTF = 6  '&H6
40  Const ppSaveAsShow = 7  '&H7
41  Const ppSaveAsAddIn = 8  '&H8
42  Const ppSaveAsPowerPoint4FarEast = 10  '&HA
43  Const ppSaveAsDefault = 11  '&HB
44  Const ppSaveAsHTML = 12  '&HC
45  Const ppSaveAsHTMLv3 = 13  '&HD
46  Const ppSaveAsHTMLDual = 14  '&HE
47  Const ppSaveAsMetaFile = 15  '&HF
48  Const ppSaveAsGIF = 16  '&H10
49  Const ppSaveAsJPG = 17  '&H11
50  Const ppSaveAsPNG = 18  '&H12
51  Const ppSaveAsBMP = 19  '&H13
52  Const ppSaveAsWebArchive = 20  '&H14
53  Const ppSaveAsTIF = 21  '&H15
54  Const ppSaveAsPresForReview = 22  '&H16
55  Const ppSaveAsEMF = 23  '&H17
56 
57' Now run the conversion subroutine
58
59If args = 2 Then
60PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
61Else
62PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
63End If
64
65' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
66' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
67' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
68' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
69' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
70' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
71Sub PPTtoHTML(inFile, outHTML)
72    'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
73    ' Open the ppt document
74    ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
75    Dim prsPres
76    Set prsPres = ppApp.ActivePresentation
77    'Call the SaveAs method of Presentation object and specify the format as HTML
78    prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
79            ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
80    'Close the Presentation object
81    prsPres.Close()
82    'Close the Application object
83    ppApp.Quit()
84
85End Sub
86
87' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
88' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
89' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
90' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
91' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
92Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
93    ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
94    WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
95   
96    Dim fso
97    Set fso = CreateObject("Scripting.FileSystemObject")
98   
99    Dim outputDir, itemFile
100   
101    outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript   
102    itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
103   
104    WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)
105   
106    If Not fso.FolderExists(outputDir) Then
107       fso.CreateFolder(outputDir) 
108    Else
109        WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)   
110    End If 
111   
112    Select Case outputType 
113        Case "-g"     outputType = "gif"
114        Case "-gif"   outputType = "gif"
115        Case "gif"    outputType = "gif"
116        Case "-j"     outputType = "jpg"
117        Case "-jpg"   outputType = "jpg"
118        Case "jpg"    outputType = "jpg"
119        Case "-p"     outputType = "png"
120        Case "-png"   outputType = "png"
121        Case "png"    outputType = "png"
122        'Case "htm"
123        'Not developed for converting to html yet
124        'Currently, if the user choose to convert the PPT to the html file
125        'We will only allow users to use the open source method through Greenstone     
126    End Select
127
128   
129    'generate document_name.item file
130    itemFile = outFileStem + "\" + itemFile + ".item"
131   
132    'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
133   
134    ' Writing out to a file in UTF-8 http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
135    Dim item
136    Set item = CreateObject("ADODB.Stream")
137    item.CharSet = "utf-8"
138    item.Open
139   
140    'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
141
142
143'do stuff
144    Dim objPA
145    Set objPA = CreateObject("PowerPoint.Application")
146    objPA.Visible = True
147   
148    Dim objPPTs
149    Set objPPTs = objPA.Presentations
150    objPPTs.Open (inFileName)
151   
152    item.WriteText "<PagedDocument>", 1
153   
154    n = 1
155    Dim slide_shape
156    For slide_count = 1 To objPPTs(1).Slides.Count
157        current_slide = objPPTs(1).Slides(slide_count).Name
158        'generate a text version
159        'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
160        Dim text
161        Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
162        text.CharSet = "utf-8"
163        text.Open
164        If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
165            slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
166        Else
167            slide_title = objPPTs(1).Slides(slide_count).Name
168        End If
169        slide_text = ""
170        found_text = True
171        For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
172            If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
173                slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
174                'MsgBox ("Slide_text:" + slide_text)
175                If slide_text <> "" Then
176                    text.WriteText slide_text, 1
177                Else
178                    text.WriteText "This slide has no text", 1
179                End If
180            End If
181        Next
182        text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
183        text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
184        text.Close
185       
186        If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
187            next_slide = objPPTs(1).Slides(slide_count + 1).Name
188        Else
189            nextf = " "
190        End If
191       
192        ' For the gif, png, jpg files:
193        current_slide = "Slide" + CStr(n)
194        If nextf <> " " Then
195            nextf = outputDir + "\" + "Slide" + CStr(n + 1)
196        End If
197        prevhtml = outputDir + "\" + current_slide + "." + outputType
198        If slide_text = "" Then
199           itemxml item, current_slide + "." + outputType, "", n, slide_title
200        Else
201           itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
202        End If
203       
204        n = n + 1
205        item.WriteText "   </Page>", 1
206    Next
207    i = 0
208    Select Case outputType
209        'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
210        Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
211        Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
212        Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
213    End Select
214               
215    item.WriteText "</PagedDocument>", 1
216   
217    item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
218    item.Close
219   
220    objPPTs(1).Close
221    Set fso = Nothing
222    Set text = Nothing
223    Set item = Nothing
224    objPA.Quit
225End Sub
226
227
228
229Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
230    out_item.WriteText "   <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1   
231    If slide_title <> "" Then
232        out_item.WriteText "      <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
233        'MsgBox ("Title:" + slide_title)       
234    End If
235End Sub
Note: See TracBrowser for help on using the browser.