Changeset 37646
- Timestamp:
- 2023-04-11T00:54:55+12:00 (13 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py
r37645 r37646 8 8 import gdown 9 9 import openpyxl 10 import urllib.request 11 12 10 #import urllib.request 11 12 # from urllib.request import urlopen 13 14 import requests 13 15 14 16 #import argparse … … 22 24 downloads_outputdir = "downloads/" 23 25 26 #import ssl 27 #ssl._create_default_https_context = ssl._create_unverified_context 24 28 25 29 … … 122 126 else: 123 127 col_pos = extra_heading_rec["colpos"] 124 index_pos = col_pos - 1; 125 worksheet.insert_cols(idx=index_pos) 128 worksheet.insert_cols(col_pos,1) 126 129 worksheet.cell(row=1,column=col_pos).value = extra_heading 127 130 … … 157 160 158 161 159 def downloadURLDIY(url,ofilename): 160 print("*** downloadURLDIY() -- currently untested") 161 162 # Based on: 163 # https://stackoverflow.com/questions/7243750/download-file-from-web-in-python-3 164 165 166 # Download the file from `url` and save it locally under `file_name`: 167 with urllib.request.urlopen(url) as response, open(ofilename, 'wb') as out_file: 168 data = response.read() # a `bytes` object 169 170 # Assume we're working with raw bytes that are UTF-8 171 # If not, then need to decode data 172 # Something along the lines of ... 173 # text = data.decode('utf-8') # a `str`; this step can't be used if data is binary 174 175 out_file.write(data) 162 163 def downloadURL(url): 164 # Based on 'requests' example on 165 # https://www.codingem.com/python-download-file-from-url/ 166 167 print("Downloading url:") 168 print(" '" + url +"'") 169 170 ofile = url.rsplit('/', 1)[-1] # everything after the last '/' 171 file_ext = os.path.splitext(ofile)[1] 172 173 # Looking for a simple file extension to the URL, otherwise assume HTML 174 175 if file_ext == "": 176 ofile += ".html" 177 178 ofilename = os.path.join(downloads_outputdir,ofile) 179 180 # Do the actual downloading, and saving to file 181 response = requests.get(url) 182 open(ofilename, "wb").write(response.content) 183 184 # # The follow works for HTML or binary data 185 # with urlopen(url) as fin: 186 # data = fin.read() 187 188 # with open(ofilename, 'wb') as fout: 189 # fout.write(data) 190 191 192 # # Download the file from `url` and save it locally under `ofilename`: 193 # with urllib.request.urlopen(url) as response, open(ofilename, 'wb') as out_file: 194 # data = response.read() # a `bytes` object 195 # out_file.write(data) 196 197 198 return ofilename 176 199 177 200 178 201 def downloadGoogleDoc(url): 179 180 202 181 203 print("Downloading Google Doc url:") … … 196 218 197 219 os.rename(downloaded_filename,dl_identifier_filename) 198 220 221 return dl_identifier_filename 199 222 200 223 … … 222 245 223 246 if opt_hyperlink != None: 247 224 248 opt_hyperlink_colpos = HeadingsTransformation["OptHyperlink"]["colpos"] 225 249 doc_url = opt_hyperlink.target 226 250 worksheet.cell(row=row_i, column=opt_hyperlink_colpos, value=doc_url) 227 228 downloaded_filename = downloadGoogleDoc(doc_url) 229 251 252 if (doc_url.startswith("https://drive.google.com") or doc_url.startswith("https://docs.google.com")): 253 downloaded_filename = downloadGoogleDoc(doc_url) 254 255 else: 256 # If no filename extension, then assumes HTML, and adds this on as the file extension 257 downloaded_filename = downloadURL(doc_url) 258 230 259 orig_file = os.path.basename(downloaded_filename) 231 260 orig_filename_colpos = HeadingsTransformation["OrigFilename"]["colpos"] 232 261 worksheet.cell(row=row_i, column=orig_filename_colpos, value=orig_file) 233 262 234 fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str) 263 renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str) 264 265 filename_colpos = HeadingsTransformation["Filename"]["colpos"] 266 worksheet.cell(row=row_i, column=filename_colpos, value=renamed_filename) 267 235 268 236 269 dl_identifier += 1
Note:
See TracChangeset
for help on using the changeset viewer.