Ignore:
Timestamp:
2023-10-15T23:58:43+13:00 (8 months ago)
Author:
davidb
Message:

Changes in response to running with the latest version of the Google drive sheet

Location:
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/02-XLSX-TO-GSFRIENDLY-CSV.sh

    r38072 r38313  
    99echo "****"
    1010echo "!!!! Press ^C to abort !!!!"
     11echo "Delete?: [Y/n]"
    1112echo "****"
    12 read -p "" -t 5
     13read -p "" -t 5 do_delete
    1314
    1415echo "[... finished sleep]"
    1516
    1617echo ""
    17 echo "**** Removing: downloads/*"
    18 /bin/rm -rf downloads
    19 mkdir downloads
     18
     19if [ "x$do_delete" = "x" ] || [ "$do_delete" = "y" ] || [ "$do_delete" = "Y" ] ; then
     20    echo "**** Removing: downloads/*"
     21    /bin/rm -rf downloads
     22    mkdir downloads
     23else
     24    echo "Prompt response: No"
     25    echo "Skipping delete of: downloads/*"   
     26fi
    2027
    2128echo ""
  • gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py

    r38072 r38313  
    6161   
    6262
     63ObjectTypeTransformation = {
     64    "Audio Recording"               : "Audio",
     65    "Interview Transcription"       : "Interview",
     66    "Newspaper Article"             : "Newspaper",
     67    "Newspaper Article (Microfilm)" : "Newspaper",
     68    "Online Article"                : "Article",
     69    "Photograph"                    : "Photograph",
     70    "Written Recollection"          : "Memories"
     71}
     72   
     73   
    6374
    6475def trimLastRow(worksheet):
     
    161172            worksheet.cell(row=1,column=colpos, value=gs_heading)
    162173
     174def transformObjectTypeValues(worksheet, num_rows,num_cols):
     175   
     176    # Skip header
     177    row_i = 2
     178
     179    object_type_col_positions = [
     180        "Object Type 1",
     181        "Object Type 2",
     182        "Object Type 3"
     183    ]
     184           
     185    while row_i<=num_rows:
     186
     187        for object_type_heading in object_type_col_positions:
     188           
     189            object_type_colpos = HeadingsTransformation[object_type_heading]["colpos"]
     190           
     191            cell_val = worksheet.cell(row=row_i,column=object_type_colpos).value
     192           
     193            if cell_val is not None:
     194                gs_cell_val = ObjectTypeTransformation[cell_val.strip()]
     195                if gs_cell_val is not None:
     196                    worksheet.cell(row=row_i,column=object_type_colpos, value=gs_cell_val)
     197
     198        row_i += 1               
    163199
    164200
     
    185221    ofilename = os.path.join(downloads_outputdir,ofile)
    186222
    187     # Do the actual downloading, and saving to file
    188     response = requests.get(url)
    189     open(ofilename, "wb").write(response.content)
    190 
    191 #    # The follow works for HTML or binary data
    192 #    with urlopen(url) as fin:
    193 #        data = fin.read()
    194 
    195 #    with open(ofilename, 'wb') as fout:
    196 #        fout.write(data)
     223    if os.path.exists(ofilename):
     224        print("  Skipping as file already downloaded")
     225    else:   
     226        # Do the actual downloading, and saving to file
     227        response = requests.get(url)
     228        open(ofilename, "wb").write(response.content)
     229
     230#        # The follow works for HTML or binary data
     231#        with urlopen(url) as fin:
     232#            data = fin.read()
     233
     234#        with open(ofilename, 'wb') as fout:
     235#            fout.write(data)
    197236   
    198237   
     
    458497    transformHeadings(worksheet,num_cols)
    459498
     499    print("  Transforming Excel spreadsheet 'Object Type' values to Greenstone friendly ones (for indexing)")
     500    transformObjectTypeValues(worksheet,num_rows,num_cols)
     501
    460502    print("  Processing values")
    461503
Note: See TracChangeset for help on using the changeset viewer.