Ignore:
Timestamp:
2023-09-10T17:13:54+12:00 (9 months ago)
Author:
davidb
Message:

Changes after the latest version of the Google spreadsheet was passed on

Location:
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/02-XLSX-TO-GSFRIENDLY-CSV.sh

    r37726 r38072  
    11#!/bin/bash
    22
    3 echo ""
    4 echo "Away to delete: downloads/*"
    5 echo ""
     3echo "----"
     4echo "- Away to delete: downloads/*"
     5echo "----"
    66
    77echo "[Continuing in 5 seconds ... press enter to continue immediately]"
     
    1414echo "[... finished sleep]"
    1515
    16 echo "Removing: downloads/*"
     16echo ""
     17echo "**** Removing: downloads/*"
    1718/bin/rm -rf downloads
    1819mkdir downloads
    1920
     21echo ""
     22echo "Running xlsx-to-csv--thewillow-directorysheet.py"
     23#echo "With a transcript of stdout and stderrr saved to: 02-output.log"
    2024
    2125python3 xlsx-to-csv--thewillow-directorysheet.py
     26
     27# ??? >02-output.log 2>&1 &
     28
     29# ????? 2>&1 | tee 02-output.log
  • gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py

    r37831 r38072  
    3030    "Ref. No."          : { "gsheading": "RefNum"         },
    3131    "Title"             : { "gsheading": "Title"          },
    32     "Creator"           : { "gsheading": "Creator"        },
     32    "Document ID"       : { "gsheading": "WCADocumentID"  },
     33    "Contributor"       : { "gsheading": "Creator"        },
    3334    "Description"       : { "gsheading": "Description"    },
    3435    "Creation Date"     : { "gsheading": "CreationDate"   },
     
    212213    download_filename = gdown.download(url, output=downloads_outputdir, fuzzy=True, quiet=True)
    213214
    214     print("Downloaded filename:", download_filename)
     215    if download_filename != None:
     216        print("Downloaded filename:", download_filename)
    215217
    216218    return download_filename
     
    319321            downloaded_filename = downloadGoogleDoc(doc_url)
    320322
    321             orig_file = os.path.basename(downloaded_filename)
    322 
    323             # Determine if this will be the primary (doc) file, or an assoicated file
    324             orig_filename_colpos = HeadingsTransformation["OrigFilename"]["colpos"]
    325             orig_filename_value = worksheet.cell(row=row_i, column=orig_filename_colpos).value
    326 
    327             downloaded_filename = optConvertDocToPdf(downloaded_filename)
    328            
    329             if orig_filename_value is None or orig_filename_value == "":
    330                 # Make this the primary file/doc
    331                 worksheet.cell(row=row_i, column=orig_filename_colpos, value=orig_file)
    332 
    333                 renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str)
    334 
    335                 filename_colpos = HeadingsTransformation["Filename"]["colpos"]
    336                 worksheet.cell(row=row_i, column=filename_colpos, value=renamed_filename)               
     323            if downloaded_filename == None:
     324                print("====", file=sys.stderr)
     325                print("= Warning: Failed to download URL:", file=sys.stderr)
     326                print("=   " + doc_url, file=sys.stderr)
     327                print("= Processing row " + str(row_i) + " as a document with only metadata", file=sys.stderr)
     328                print("====", file=sys.stderr)
     329
    337330            else:
    338                 orig_drivename_colpos = HeadingsTransformation["OrigDrivename"]["colpos"]
     331                orig_file = os.path.basename(downloaded_filename)
     332
     333                # Determine if this will be the primary (doc) file, or an assoicated file
     334                orig_filename_colpos = HeadingsTransformation["OrigFilename"]["colpos"]
     335                orig_filename_value = worksheet.cell(row=row_i, column=orig_filename_colpos).value
     336
     337                downloaded_filename = optConvertDocToPdf(downloaded_filename)
     338           
     339                if orig_filename_value is None or orig_filename_value == "":
     340                    # Make this the primary file/doc
     341                    worksheet.cell(row=row_i, column=orig_filename_colpos, value=orig_file)
     342                   
     343                    renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_str)
     344                   
     345                    filename_colpos = HeadingsTransformation["Filename"]["colpos"]
     346                    worksheet.cell(row=row_i, column=filename_colpos, value=renamed_filename)               
     347                else:
     348                    orig_drivename_colpos = HeadingsTransformation["OrigDrivename"]["colpos"]
    339349               
    340                 worksheet.cell(row=row_i, column=orig_drivename_colpos, value=orig_file)
    341 
    342                 dl_identifier_assocwith_str = "tw-contrib-%04d-assocwith" % dl_identifier
     350                    worksheet.cell(row=row_i, column=orig_drivename_colpos, value=orig_file)
     351                   
     352                    dl_identifier_assocwith_str = "tw-contrib-%04d-assocwith" % dl_identifier
    343353               
    344                 renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_assocwith_str)
    345 
    346                 # We store this in the spreasheet mostly for auditing purposes
    347                 # The actual 'assocwith' DL behaviour is triggered by the plugin option,
    348                 # driven by a regular expression matching on the two filenames
    349                 filename_assocwith_colpos = HeadingsTransformation["FilenameAssocwith"]["colpos"]
    350                 worksheet.cell(row=row_i, column=filename_assocwith_colpos, value=renamed_filename)               
     354                    renamed_filename = fileRenameToDLIdentifier(downloaded_filename,dl_identifier_assocwith_str)
     355
     356                    # We store this in the spreasheet mostly for auditing purposes
     357                    # The actual 'assocwith' DL behaviour is triggered by the plugin option,
     358                    # driven by a regular expression matching on the two filenames
     359                    filename_assocwith_colpos = HeadingsTransformation["FilenameAssocwith"]["colpos"]
     360                    worksheet.cell(row=row_i, column=filename_assocwith_colpos, value=renamed_filename)               
    351361
    352362        dl_identifier += 1
Note: See TracChangeset for help on using the changeset viewer.