[28354] | 1 | ' This file is a port to VBScript from VB of the pptextract.exe, pptextract.frm (and pptextract.vbp) files.
|
---|
| 2 |
|
---|
| 3 | ' It was hard to upgrade the VB pptextract.frm form script in Visual Studio
|
---|
| 4 | ' to the current Visual Basic, and some packages it needed wouldn't install,
|
---|
| 5 | ' making it hard to compile up.
|
---|
| 6 |
|
---|
| 7 | ' As this VBScript doesn't need to be compiled up, it may be easier to maintain.
|
---|
| 8 |
|
---|
| 9 | ' For differences between VBScript and VB, see
|
---|
| 10 | ' http://msdn.microsoft.com/en-us/library/ms970436.aspx
|
---|
| 11 | ' http://www.htmlgoodies.com/beyond/asp/vbs-ref/article.php/3458611/Key-Differences-Between-VB-and-VB-Script.htm
|
---|
| 12 | ' (Note that VBScript does support reading and writing to files)
|
---|
| 13 |
|
---|
| 14 |
|
---|
| 15 |
|
---|
[28353] | 16 | 'Option Explicit
|
---|
| 17 | 'Imports PowerPoint = Microsoft.Office.Interop.PowerPoint
|
---|
| 18 |
|
---|
| 19 | ' Run as: CScript //Nologo <script> args
|
---|
| 20 | ' Without using the CScript at the start, it will try to use WScript for which WScript.StdErr is undefined/an invalid handle
|
---|
| 21 | ' http://stackoverflow.com/questions/774319/why-does-this-vbscript-give-me-an-error
|
---|
| 22 | ' It appears that the handle StdOut is only available when using a console host (cscript.exe) and not a windowed host (wscript.exe).
|
---|
| 23 | ' If you want the code to work, you have to use cscript.exe to run it.
|
---|
| 24 |
|
---|
[28354] | 25 | ' This is a CScript (console-only). If launched in WScript mode, run as CScript anyway
|
---|
| 26 | ' From: http://stackoverflow.com/questions/4692542/force-a-vbs-to-run-using-cscript-instead-of-wscript
|
---|
| 27 | Sub forceCScriptExecution
|
---|
| 28 | Dim Arg, Str
|
---|
| 29 | If Not LCase( Right( WScript.FullName, 12 ) ) = "\cscript.exe" Then
|
---|
| 30 | For Each Arg In WScript.Arguments
|
---|
| 31 | If InStr( Arg, " " ) Then Arg = """" & Arg & """"
|
---|
| 32 | Str = Str & " " & Arg
|
---|
| 33 | Next
|
---|
| 34 | CreateObject( "WScript.Shell" ).Run _
|
---|
| 35 | "cscript //nologo """ & _
|
---|
| 36 | WScript.ScriptFullName & _
|
---|
| 37 | """ " & Str
|
---|
| 38 | WScript.Quit
|
---|
| 39 | End If
|
---|
| 40 | End Sub
|
---|
| 41 | forceCScriptExecution
|
---|
[28353] | 42 |
|
---|
[28354] | 43 | ' Where this script actually starts
|
---|
[28353] | 44 | Dim args
|
---|
| 45 | args = WScript.Arguments.Count
|
---|
| 46 | If args < 2 Or args > 3 Then
|
---|
| 47 | 'WScript.Echo Usage: args.vbs argument [input docx path] [output html path]
|
---|
| 48 | WScript.StdErr.Write ("ERROR. Usage: CScript //Nologo " & WScript.ScriptName & " [input ppt path] [output html path]" & vbCrLf)
|
---|
| 49 | WScript.StdErr.Write ("OR: CScript //Nologo " & WScript.ScriptName & " [-j(pg)/g(if)/p(ng)] [input ppt path] [output img path and filename prefix] " & vbCrLf)
|
---|
| 50 | WScript.Quit
|
---|
| 51 | end If
|
---|
| 52 |
|
---|
| 53 | 'set ppAPP to a power point application
|
---|
| 54 | Dim ppApp
|
---|
| 55 | Set ppApp = CreateObject("PowerPoint.Application")
|
---|
| 56 | If CStr(Err.Number) = 429 Then ' 429 is the error code for "ActiveX component can't create object"
|
---|
| 57 | ' http://msdn.microsoft.com/en-us/library/xe43cc8d%28v=VS.85%29.aspx
|
---|
| 58 | WScript.StdErr.Write ("ERROR: Windows-scripting failed. ppt conversion cannot take place:" & vbCrLf)
|
---|
| 59 | WScript.StdErr.Write (" Microsoft Powerpoint cannot be found or cannot be launched. (Error #" & CStr(Err.Number) & ": " & Err.Description & "). " & vbCrLf)
|
---|
| 60 | WScript.StdErr.Write (" For converting the latest Office documents, install OpenOffice and Greenstone's OpenOffice extension. (Turn it on and turn off windows-scripting.)" & vbCrLf)
|
---|
| 61 | WScript.Quit -1 ' http://www.tek-tips.com/viewthread.cfm?qid=1297200
|
---|
| 62 | End If
|
---|
| 63 |
|
---|
| 64 | ' Declare COM interface constants for PPT File SaveAs types
|
---|
| 65 | ' http://include.wutils.com/com-dll/constants/constants-PowerPoint.htm
|
---|
| 66 | ' http://msdn.microsoft.com/en-us/library/ff746500.aspx
|
---|
| 67 | Const ppSaveAsPresentation = 1 '&H1
|
---|
| 68 | Const ppSaveAsPowerPoint7 = 2 '&H2
|
---|
| 69 | Const ppSaveAsPowerPoint4 = 3 '&H3
|
---|
| 70 | Const ppSaveAsPowerPoint3 = 4 '&H4
|
---|
| 71 | Const ppSaveAsTemplate = 5 '&H5
|
---|
| 72 | Const ppSaveAsRTF = 6 '&H6
|
---|
| 73 | Const ppSaveAsShow = 7 '&H7
|
---|
| 74 | Const ppSaveAsAddIn = 8 '&H8
|
---|
| 75 | Const ppSaveAsPowerPoint4FarEast = 10 '&HA
|
---|
| 76 | Const ppSaveAsDefault = 11 '&HB
|
---|
| 77 | Const ppSaveAsHTML = 12 '&HC
|
---|
| 78 | Const ppSaveAsHTMLv3 = 13 '&HD
|
---|
| 79 | Const ppSaveAsHTMLDual = 14 '&HE
|
---|
| 80 | Const ppSaveAsMetaFile = 15 '&HF
|
---|
| 81 | Const ppSaveAsGIF = 16 '&H10
|
---|
| 82 | Const ppSaveAsJPG = 17 '&H11
|
---|
| 83 | Const ppSaveAsPNG = 18 '&H12
|
---|
| 84 | Const ppSaveAsBMP = 19 '&H13
|
---|
| 85 | Const ppSaveAsWebArchive = 20 '&H14
|
---|
| 86 | Const ppSaveAsTIF = 21 '&H15
|
---|
| 87 | Const ppSaveAsPresForReview = 22 '&H16
|
---|
| 88 | Const ppSaveAsEMF = 23 '&H17
|
---|
| 89 |
|
---|
| 90 | ' Now run the conversion subroutine
|
---|
| 91 |
|
---|
| 92 | If args = 2 Then
|
---|
| 93 | PPTtoHTML WScript.Arguments.Item(0),WScript.Arguments.Item(1)
|
---|
| 94 | Else
|
---|
| 95 | PPTslidesToImgs WScript.Arguments.Item(0),WScript.Arguments.Item(1),WScript.Arguments.Item(2)
|
---|
| 96 | End If
|
---|
| 97 |
|
---|
| 98 | ' Based on http://stackoverflow.com/questions/12643024/can-i-automatically-convert-ppt-to-html
|
---|
| 99 | ' AFTER GETTING THIS SCRIPT TO RUN AT LAST, CONVERSION TO HTML STILL DOESN'T WORK, BECAUSE:
|
---|
| 100 | ' Although PPT 2010 could still save ppt as html using vb(script), see instructions at http://support.microsoft.com/kb/980553
|
---|
| 101 | ' for PPT 2013 that doesn't work anymore either. The option to save as html is simply no longer there.
|
---|
| 102 | ' Maybe we can convert to xml and then to html using a custom xsl stylesheet?
|
---|
| 103 | ' CONVERSION TO IMAGES SHOULD BE ABLE TO WORK, BUT STILL NEED TO CLEAN UP THAT FUNCTION TO GET THERE
|
---|
| 104 | Sub PPTtoHTML(inFile, outHTML)
|
---|
| 105 | 'ppApp.Visible = False ' Invalid Request: Hiding the application window is not allowed
|
---|
| 106 | ' Open the ppt document
|
---|
| 107 | ppApp.Presentations.Open inFile, 1, 0, 1 ', MsoTriState.msoTrue, MsoTriState.msoFalse, MsoTriState.msoFalse
|
---|
| 108 | Dim prsPres
|
---|
| 109 | Set prsPres = ppApp.ActivePresentation
|
---|
| 110 | 'Call the SaveAs method of Presentation object and specify the format as HTML
|
---|
| 111 | prsPres.SaveAs outHTML, ppSaveAsHTML, 0 ' PowerPoint.PpSaveAsFileType.ppSaveAsHTML, MsoTriState.msoTrue
|
---|
| 112 | ' Tristate.msoFalse enum evaluates to 0, see http://msdn.microsoft.com/en-us/library/microsoft.visualbasic.tristate.aspx
|
---|
| 113 | 'Close the Presentation object
|
---|
| 114 | prsPres.Close()
|
---|
| 115 | 'Close the Application object
|
---|
| 116 | ppApp.Quit()
|
---|
| 117 |
|
---|
| 118 | End Sub
|
---|
| 119 |
|
---|
| 120 | ' Porting pptextract.frm Visual Basic form that needs to be compiled to .exe into a VBscript (.vbs)
|
---|
| 121 | ' Converting PPT slides to images http://vbadud.blogspot.co.nz/2009/05/save-powerpoint-slides-as-images-using.html
|
---|
| 122 | ' Maybe helpful too: http://stackoverflow.com/questions/13057432/convert-all-worksheet-objects-to-images-in-powerpoint
|
---|
| 123 | ' http://msdn.microsoft.com/en-us/library/sdbcfyzh.aspx for logical operators
|
---|
| 124 | ' Like JScript, VBScript uses the FSO to read and write files: http://stackoverflow.com/questions/2198810/creating-and-writing-lines-to-a-file
|
---|
| 125 | Sub PPTslidesToImgs(outputType, inFileName, outFileStem)
|
---|
| 126 | ' switch statement, http://msdn.microsoft.com/en-us/library/6ef9w614%28v=vs.84%29.aspx
|
---|
[28354] | 127 | 'WScript.StdErr.Write ("Output stem: " & outFileStem & vbCrLf)
|
---|
[28353] | 128 |
|
---|
| 129 | Dim fso
|
---|
| 130 | Set fso = CreateObject("Scripting.FileSystemObject")
|
---|
| 131 |
|
---|
| 132 | Dim outputDir, itemFile
|
---|
| 133 |
|
---|
| 134 | outputDir = outFileStem 'outputDir = Left(outFileStem, InStrRev(outFileStem, "\")) ' outputDir = substring upto final \, no need to escape \ in vbscript
|
---|
| 135 | itemFile = Mid(outFileStem, InStrRev(outFileStem, "\")+1)
|
---|
| 136 |
|
---|
[28354] | 137 | 'WScript.StdErr.Write ("outputDir: " & outFileStem & vbCrLf)
|
---|
[28353] | 138 |
|
---|
| 139 | If Not fso.FolderExists(outputDir) Then
|
---|
| 140 | fso.CreateFolder(outputDir)
|
---|
| 141 | Else
|
---|
| 142 | WScript.StdErr.Write ("**** Folder " & outputDir & " Already exists" &vbCrLf)
|
---|
| 143 | End If
|
---|
| 144 |
|
---|
| 145 | Select Case outputType
|
---|
| 146 | Case "-g" outputType = "gif"
|
---|
| 147 | Case "-gif" outputType = "gif"
|
---|
| 148 | Case "gif" outputType = "gif"
|
---|
| 149 | Case "-j" outputType = "jpg"
|
---|
| 150 | Case "-jpg" outputType = "jpg"
|
---|
| 151 | Case "jpg" outputType = "jpg"
|
---|
| 152 | Case "-p" outputType = "png"
|
---|
| 153 | Case "-png" outputType = "png"
|
---|
| 154 | Case "png" outputType = "png"
|
---|
| 155 | 'Case "htm"
|
---|
| 156 | 'Not developed for converting to html yet
|
---|
| 157 | 'Currently, if the user choose to convert the PPT to the html file
|
---|
| 158 | 'We will only allow users to use the open source method through Greenstone
|
---|
| 159 | End Select
|
---|
| 160 |
|
---|
| 161 |
|
---|
| 162 | 'generate document_name.item file
|
---|
| 163 | itemFile = outFileStem + "\" + itemFile + ".item"
|
---|
| 164 |
|
---|
[28354] | 165 | 'Set item = fso.CreateTextFile(itemFile, 2, True) ' ForWriting = 2, default is Unicode = -1, see http://msdn.microsoft.com/en-us/library/314cz14s%28v=vs.84%29.aspx
|
---|
| 166 | ' The default file-write methods in VBScript all create UTF16 Little Endian (USC-2LE) files like Notepad's default, rather than the UTF-8 we want
|
---|
| 167 | ' Writing out to a file in UTF-8 is achieved as at: http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
|
---|
[28353] | 168 | Dim item
|
---|
| 169 | Set item = CreateObject("ADODB.Stream")
|
---|
| 170 | item.CharSet = "utf-8"
|
---|
| 171 | item.Open
|
---|
| 172 |
|
---|
| 173 | 'WScript.StdErr.Write ("itemFile: " & itemFile & vbCrLf)
|
---|
| 174 |
|
---|
| 175 |
|
---|
| 176 | 'do stuff
|
---|
| 177 | Dim objPA
|
---|
| 178 | Set objPA = CreateObject("PowerPoint.Application")
|
---|
| 179 | objPA.Visible = True
|
---|
| 180 |
|
---|
| 181 | Dim objPPTs
|
---|
| 182 | Set objPPTs = objPA.Presentations
|
---|
| 183 | objPPTs.Open (inFileName)
|
---|
| 184 |
|
---|
| 185 | item.WriteText "<PagedDocument>", 1
|
---|
| 186 |
|
---|
| 187 | n = 1
|
---|
| 188 | Dim slide_shape
|
---|
| 189 | For slide_count = 1 To objPPTs(1).Slides.Count
|
---|
| 190 | current_slide = objPPTs(1).Slides(slide_count).Name
|
---|
| 191 | 'generate a text version
|
---|
| 192 | 'Set text = fso.CreateTextFile(outputDir + "\Slide" + CStr(slide_count) + ".txt", ForWriting, True)
|
---|
| 193 | Dim text
|
---|
| 194 | Set text = CreateObject("ADODB.Stream") ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
|
---|
| 195 | text.CharSet = "utf-8"
|
---|
| 196 | text.Open
|
---|
| 197 | If (objPPTs(1).Slides(slide_count).Shapes.HasTitle) Then
|
---|
| 198 | slide_title = objPPTs(1).Slides(slide_count).Shapes.Title.TextFrame.TextRange
|
---|
| 199 | Else
|
---|
| 200 | slide_title = objPPTs(1).Slides(slide_count).Name
|
---|
| 201 | End If
|
---|
| 202 | slide_text = ""
|
---|
| 203 | found_text = True
|
---|
| 204 | For iShape = 1 To objPPTs(1).Slides(slide_count).Shapes.Count
|
---|
| 205 | If (objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.HasText) Then
|
---|
| 206 | slide_text = objPPTs(1).Slides(slide_count).Shapes(iShape).TextFrame.TextRange.text
|
---|
| 207 | 'MsgBox ("Slide_text:" + slide_text)
|
---|
| 208 | If slide_text <> "" Then
|
---|
| 209 | text.WriteText slide_text, 1
|
---|
| 210 | Else
|
---|
| 211 | text.WriteText "This slide has no text", 1
|
---|
| 212 | End If
|
---|
| 213 | End If
|
---|
| 214 | Next
|
---|
| 215 | text_file = outputDir + "\Slide" + CStr(slide_count) + ".txt"
|
---|
| 216 | text.SaveToFile text_file, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
|
---|
| 217 | text.Close
|
---|
| 218 |
|
---|
| 219 | If objPPTs(1).Slides.Count >= 1 And slide_count < objPPTs(1).Slides.Count Then
|
---|
| 220 | next_slide = objPPTs(1).Slides(slide_count + 1).Name
|
---|
| 221 | Else
|
---|
| 222 | nextf = " "
|
---|
| 223 | End If
|
---|
| 224 |
|
---|
| 225 | ' For the gif, png, jpg files:
|
---|
| 226 | current_slide = "Slide" + CStr(n)
|
---|
| 227 | If nextf <> " " Then
|
---|
| 228 | nextf = outputDir + "\" + "Slide" + CStr(n + 1)
|
---|
| 229 | End If
|
---|
| 230 | prevhtml = outputDir + "\" + current_slide + "." + outputType
|
---|
| 231 | If slide_text = "" Then
|
---|
| 232 | itemxml item, current_slide + "." + outputType, "", n, slide_title
|
---|
| 233 | Else
|
---|
| 234 | itemxml item, current_slide + "." + outputType, current_slide + ".txt", n, slide_title
|
---|
| 235 | End If
|
---|
| 236 |
|
---|
| 237 | n = n + 1
|
---|
| 238 | item.WriteText " </Page>", 1
|
---|
| 239 | Next
|
---|
| 240 | i = 0
|
---|
| 241 | Select Case outputType
|
---|
| 242 | 'Case "htm" 'ppts(1).SaveAs outputDir, ppSaveAsHTMLv3
|
---|
| 243 | Case "gif" objPPTs(1).SaveAs outputDir, ppSaveAsGIF
|
---|
| 244 | Case "jpg" objPPTs(1).SaveAs outputDir, ppSaveAsJPG
|
---|
| 245 | Case "png" objPPTs(1).SaveAs outputDir, ppSaveAsPNG
|
---|
| 246 | End Select
|
---|
| 247 |
|
---|
| 248 | item.WriteText "</PagedDocument>", 1
|
---|
| 249 |
|
---|
| 250 | item.SaveToFile itemFile, 2 ' http://stackoverflow.com/questions/10450156/write-text-file-in-appending-utf-8-encoded-in-vb6
|
---|
| 251 | item.Close
|
---|
| 252 |
|
---|
| 253 | objPPTs(1).Close
|
---|
| 254 | Set fso = Nothing
|
---|
| 255 | Set text = Nothing
|
---|
| 256 | Set item = Nothing
|
---|
| 257 | objPA.Quit
|
---|
| 258 | End Sub
|
---|
| 259 |
|
---|
| 260 |
|
---|
| 261 |
|
---|
| 262 | Sub itemxml(out_item, thisfile, txtfile, num, slide_title)
|
---|
| 263 | out_item.WriteText " <Page pagenum=" + Chr(34) + CStr(num) + Chr(34) + " imgfile=" + Chr(34) + thisfile + Chr(34) + " txtfile=" + Chr(34) + txtfile + Chr(34) + ">", 1
|
---|
| 264 | If slide_title <> "" Then
|
---|
| 265 | out_item.WriteText " <Metadata name=" + Chr(34) + "Title" + Chr(34) + ">" + slide_title + "</Metadata>", 1
|
---|
| 266 | 'MsgBox ("Title:" + slide_title)
|
---|
| 267 | End If
|
---|
| 268 | End Sub |
---|