Context Navigation

← Previous Changeset
Next Changeset →

Changeset 37646

Timestamp:

2023-04-11T00:54:55+12:00 (13 months ago)

Author:

davidb

Message:

Further coding improvements

File:

: 1 edited

gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py

-              r37645
+              r37646
 import gdown
 import openpyxl
+import urllib.request
+#import urllib.request
+# from urllib.request import urlopen
+import requests
 #import argparse
 …
 downloads_outputdir = "downloads/"
+#import ssl
+#ssl._create_default_https_context = ssl._create_unverified_context
 …
         else:
             col_pos = extra_heading_rec["colpos"]
+            index_pos = col_pos - 1;
+            worksheet.insert_cols(idx=index_pos)
+            worksheet.insert_cols(col_pos,1)
             worksheet.cell(row=1,column=col_pos).value = extra_heading
 …
+def downloadURLDIY(url,ofilename):
+    print("*** downloadURLDIY() -- currently untested")
+    # Based on:
+    #    https://stackoverflow.com/questions/7243750/download-file-from-web-in-python-3
+    # Download the file from `url` and save it locally under `file_name`:
+    with urllib.request.urlopen(url) as response, open(ofilename, 'wb') as out_file:
+        data = response.read() # a `bytes` object
+        # Assume we're working with raw bytes that are UTF-8
+        # If not, then need to decode data
+        # Something along the lines of ...
+        # text = data.decode('utf-8') # a `str`; this step can't be used if data is binary
+        out_file.write(data)
+def downloadURL(url):
+    # Based on 'requests' example on
+    #    https://www.codingem.com/python-download-file-from-url/
+    print("Downloading url:")
+    print("  '" + url +"'")
+    ofile = url.rsplit('/', 1)[-1] # everything after the last '/'
+    file_ext = os.path.splitext(ofile)[1]
+    # Looking for a simple file extension to the URL, otherwise assume HTML
+    if file_ext == "":
+        ofile += ".html"
+    ofilename = os.path.join(downloads_outputdir,ofile)
+    # Do the actual downloading, and saving to file
+    response = requests.get(url)
+    open(ofilename, "wb").write(response.content)
+#    # The follow works for HTML or binary data
+#    with urlopen(url) as fin:
+#        data = fin.read()
+#    with open(ofilename, 'wb') as fout:
+#        fout.write(data)
+#    # Download the file from `url` and save it locally under `ofilename`:
+#    with urllib.request.urlopen(url) as response, open(ofilename, 'wb') as out_file:
+#       data = response.read() # a `bytes` object
+#        out_file.write(data)
+    return ofilename
 def downloadGoogleDoc(url):
     print("Downloading Google Doc url:")
 …
     os.rename(downloaded_filename,dl_identifier_filename)
+    return dl_identifier_filename
 …
         if opt_hyperlink != None:
             opt_hyperlink_colpos = HeadingsTransformation["OptHyperlink"]["colpos"]
             doc_url = opt_hyperlink.target
             worksheet.cell(row=row_i, column=opt_hyperlink_colpos, value=doc_url)
+            downloaded_filename = downloadGoogleDoc(doc_url)
+            if (doc_url.startswith("https://drive.google.com") or doc_url.startswith("https://docs.google.com")):
+                downloaded_filename = downloadGoogleDoc(doc_url)
+            else:
+                # If no filename extension, then assumes HTML, and adds this on as the file extension
+                downloaded_filename = downloadURL(doc_url)
             orig_file = os.path.basename(downloaded_filename)
             orig_filename_colpos = HeadingsTransformation["OrigFilename"]["colpos"]
             worksheet.cell(row=row_i, column=orig_filename_colpos, value=orig_file)
+            fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str)
+            renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str)
+            filename_colpos = HeadingsTransformation["Filename"]["colpos"]
+            worksheet.cell(row=row_i, column=filename_colpos, value=renamed_filename)
         dl_identifier += 1

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 37646

Legend:

gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py

Download in other formats: