root/main/trunk/binaries/windows/bin/pptextract.vbs @ 28354

Revision 28354, 11.8 KB (checked in by ak19, 6 years ago)

This VBScript file is now replacing the old VB files (exe, frm form file and vbp project file). Otherwise this script was already working and writing out the item file and slide files, which were generated from converting ppt slides to txt, as UTF-8. This is where this script is preferable to the older VB executable, which had all the code that wrote out UTF-8 commented out. By default, file write methods in VB and VBScript write out UTF16 LE, which is not what Greenstone wants. Greenstone wants UTF-8. The differences from the last revision: 1. Added subroutine to run this script as CScript, which uses the console, in case this .vbs file (VBScript) is launched with WScript. 2. Better comments.

  • Property svn:executable set to *
Line 
1' This file is a port to VBScript from VB of the pptextract.exe, pptextract.frm (and pptextract.vbp) files.
2
3' It was hard to upgrade the VB pptextract.frm form script in Visual Studio
4' to the current Visual Basic, and some packages it needed wouldn't install,
5' making it hard to compile up.
6
7' As this VBScript doesn't need to be compiled up, it may be easier to maintain.
8
9' For differences between VBScript and VB, see
10' http://msdn.microsoft.com/en-us/library/ms970436.aspx
11' http://www.htmlgoodies.com/beyond/asp/vbs-ref/article.php/3458611/Key-Differences-Between-VB-and-VB-Script.htm
12' (Note that VBScript does support reading and writing to files)
13
14
15
16'Option Explicit
17'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
18
19' Run as: CScript //Nologo <script> args
20' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
21' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
22' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
23' If you want the code to work, you have to use cscript.exe to run it.
24
25' This is a CScript (console-only). If launched in WScript mode, run as CScript anyway
26' From: http://stackoverflow.com/questions/4692542/force-a-vbs-to-run-using-cscript-instead-of-wscript
27Sub forceCScriptExecution
28    Dim Arg, Str
29    If Not LCase( Right( WScript.FullName, 12 ) ) = "\cscript.exe" Then
30        For Each Arg In WScript.Arguments
31            If InStr( Arg, " " ) Then Arg = """" & Arg & """"
32            Str = Str & " " & Arg
33        Next
34        CreateObject( "WScript.Shell" ).Run _
35            "cscript //nologo """ & _
36            WScript.ScriptFullName & _
37            """ " & Str
38        WScript.Quit
39    End If
40End Sub
41forceCScriptExecution
42
43' Where this script actually starts
44Dim args
45args = WScript.Arguments.Count
46If args < 2 Or args > 3 Then
47  'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
48  WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
49  WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
50  WScript.Quit
51end If
52
53'set ppAPP to a power point application
54Dim ppApp
55Set ppApp = CreateObject("PowerPoint.Application")
56If CStr(Err.Number) = 429 Then  ' 429 is the error code for "ActiveX component can't create object"
57                                ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx               
58    WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
59    WScript.StdErr.Write ("   Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)       
60    WScript.StdErr.Write ("   For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
61    WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
62End If
63
64' Declare COM interface constants for PPT File SaveAs types
65' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
66' http://msdn.microsoft.com/en-us/library/ff746500.aspx
67  Const ppSaveAsPresentation = 1  '&H1
68  Const ppSaveAsPowerPoint7 = 2  '&H2
69  Const ppSaveAsPowerPoint4 = 3  '&H3
70  Const ppSaveAsPowerPoint3 = 4  '&H4
71  Const ppSaveAsTemplate = 5  '&H5
72  Const ppSaveAsRTF = 6  '&H6
73  Const ppSaveAsShow = 7  '&H7
74  Const ppSaveAsAddIn = 8  '&H8
75  Const ppSaveAsPowerPoint4FarEast = 10  '&HA
76  Const ppSaveAsDefault = 11  '&HB
77  Const ppSaveAsHTML = 12  '&HC
78  Const ppSaveAsHTMLv3 = 13  '&HD
79  Const ppSaveAsHTMLDual = 14  '&HE
80  Const ppSaveAsMetaFile = 15  '&HF
81  Const ppSaveAsGIF = 16  '&H10
82  Const ppSaveAsJPG = 17  '&H11
83  Const ppSaveAsPNG = 18  '&H12
84  Const ppSaveAsBMP = 19  '&H13
85  Const ppSaveAsWebArchive = 20  '&H14
86  Const ppSaveAsTIF = 21  '&H15
87  Const ppSaveAsPresForReview = 22  '&H16
88  Const ppSaveAsEMF = 23  '&H17
89 
90' Now run the conversion subroutine
91
92If args = 2 Then
93PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
94Else
95PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
96End If
97
98' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
99' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
100' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
101' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
102' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
103' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
104Sub PPTtoHTML(inFile, outHTML)
105    'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
106    ' Open the ppt document
107    ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
108    Dim prsPres
109    Set prsPres = ppApp.ActivePresentation
110    'Call the SaveAs method of Presentation object and specify the format as HTML
111    prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
112            ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
113    'Close the Presentation object
114    prsPres.Close()
115    'Close the Application object
116    ppApp.Quit()
117
118End Sub
119
120' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
121' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
122' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
123' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
124' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
125Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
126    ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
127    'WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
128   
129    Dim fso
130    Set fso = CreateObject("Scripting.FileSystemObject")
131   
132    Dim outputDir, itemFile
133   
134    outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript   
135    itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
136   
137    'WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)   
138   
139    If Not fso.FolderExists(outputDir) Then
140       fso.CreateFolder(outputDir) 
141    Else
142        WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)   
143    End If 
144   
145    Select Case outputType 
146        Case "-g"     outputType = "gif"
147        Case "-gif"   outputType = "gif"
148        Case "gif"    outputType = "gif"
149        Case "-j"     outputType = "jpg"
150        Case "-jpg"   outputType = "jpg"
151        Case "jpg"    outputType = "jpg"
152        Case "-p"     outputType = "png"
153        Case "-png"   outputType = "png"
154        Case "png"    outputType = "png"
155        'Case "htm"
156        'Not developed for converting to html yet
157        'Currently, if the user choose to convert the PPT to the html file
158        'We will only allow users to use the open source method through Greenstone     
159    End Select
160
161   
162    'generate document_name.item file
163    itemFile = outFileStem + "\" + itemFile + ".item"
164   
165    'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, default is Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
166    ' The default file-write methods in VBScript all create UTF16 Little Endian (USC-2LE) files like Notepad's default, rather than the UTF-8 we want
167    ' Writing out to a file in UTF-8 is achieved as at: http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6   
168    Dim item
169    Set item = CreateObject("ADODB.Stream")
170    item.CharSet = "utf-8"
171    item.Open
172   
173    'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
174
175
176'do stuff
177    Dim objPA
178    Set objPA = CreateObject("PowerPoint.Application")
179    objPA.Visible = True
180   
181    Dim objPPTs
182    Set objPPTs = objPA.Presentations
183    objPPTs.Open (inFileName)
184   
185    item.WriteText "<PagedDocument>", 1
186   
187    n = 1
188    Dim slide_shape
189    For slide_count = 1 To objPPTs(1).Slides.Count
190        current_slide = objPPTs(1).Slides(slide_count).Name
191        'generate a text version
192        'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
193        Dim text
194        Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
195        text.CharSet = "utf-8"
196        text.Open
197        If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
198            slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
199        Else
200            slide_title = objPPTs(1).Slides(slide_count).Name
201        End If
202        slide_text = ""
203        found_text = True
204        For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
205            If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
206                slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
207                'MsgBox ("Slide_text:" + slide_text)
208                If slide_text <> "" Then
209                    text.WriteText slide_text, 1
210                Else
211                    text.WriteText "This slide has no text", 1
212                End If
213            End If
214        Next
215        text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
216        text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
217        text.Close
218       
219        If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
220            next_slide = objPPTs(1).Slides(slide_count + 1).Name
221        Else
222            nextf = " "
223        End If
224       
225        ' For the gif, png, jpg files:
226        current_slide = "Slide" + CStr(n)
227        If nextf <> " " Then
228            nextf = outputDir + "\" + "Slide" + CStr(n + 1)
229        End If
230        prevhtml = outputDir + "\" + current_slide + "." + outputType
231        If slide_text = "" Then
232           itemxml item, current_slide + "." + outputType, "", n, slide_title
233        Else
234           itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
235        End If
236       
237        n = n + 1
238        item.WriteText "   </Page>", 1
239    Next
240    i = 0
241    Select Case outputType
242        'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
243        Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
244        Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
245        Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
246    End Select
247               
248    item.WriteText "</PagedDocument>", 1
249   
250    item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
251    item.Close
252   
253    objPPTs(1).Close
254    Set fso = Nothing
255    Set text = Nothing
256    Set item = Nothing
257    objPA.Quit
258End Sub
259
260
261
262Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
263    out_item.WriteText "   <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1   
264    If slide_title <> "" Then
265        out_item.WriteText "      <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
266        'MsgBox ("Title:" + slide_title)       
267    End If
268End Sub
Note: See TracBrowser for help on using the browser.