Changeset 38313 for gs3-installations/thewillow
- Timestamp:
- 2023-10-15T23:58:43+13:00 (8 months ago)
- Location:
- gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/02-XLSX-TO-GSFRIENDLY-CSV.sh
r38072 r38313 9 9 echo "****" 10 10 echo "!!!! Press ^C to abort !!!!" 11 echo "Delete?: [Y/n]" 11 12 echo "****" 12 read -p "" -t 5 13 read -p "" -t 5 do_delete 13 14 14 15 echo "[... finished sleep]" 15 16 16 17 echo "" 17 echo "**** Removing: downloads/*" 18 /bin/rm -rf downloads 19 mkdir downloads 18 19 if [ "x$do_delete" = "x" ] || [ "$do_delete" = "y" ] || [ "$do_delete" = "Y" ] ; then 20 echo "**** Removing: downloads/*" 21 /bin/rm -rf downloads 22 mkdir downloads 23 else 24 echo "Prompt response: No" 25 echo "Skipping delete of: downloads/*" 26 fi 20 27 21 28 echo "" -
gs3-installations/thewillow/trunk/sites/thewillow/collect/community-contributions/prepare/xlsx-to-csv--thewillow-directorysheet.py
r38072 r38313 61 61 62 62 63 ObjectTypeTransformation = { 64 "Audio Recording" : "Audio", 65 "Interview Transcription" : "Interview", 66 "Newspaper Article" : "Newspaper", 67 "Newspaper Article (Microfilm)" : "Newspaper", 68 "Online Article" : "Article", 69 "Photograph" : "Photograph", 70 "Written Recollection" : "Memories" 71 } 72 73 63 74 64 75 def trimLastRow(worksheet): … … 161 172 worksheet.cell(row=1,column=colpos, value=gs_heading) 162 173 174 def transformObjectTypeValues(worksheet, num_rows,num_cols): 175 176 # Skip header 177 row_i = 2 178 179 object_type_col_positions = [ 180 "Object Type 1", 181 "Object Type 2", 182 "Object Type 3" 183 ] 184 185 while row_i<=num_rows: 186 187 for object_type_heading in object_type_col_positions: 188 189 object_type_colpos = HeadingsTransformation[object_type_heading]["colpos"] 190 191 cell_val = worksheet.cell(row=row_i,column=object_type_colpos).value 192 193 if cell_val is not None: 194 gs_cell_val = ObjectTypeTransformation[cell_val.strip()] 195 if gs_cell_val is not None: 196 worksheet.cell(row=row_i,column=object_type_colpos, value=gs_cell_val) 197 198 row_i += 1 163 199 164 200 … … 185 221 ofilename = os.path.join(downloads_outputdir,ofile) 186 222 187 # Do the actual downloading, and saving to file 188 response = requests.get(url) 189 open(ofilename, "wb").write(response.content) 190 191 # # The follow works for HTML or binary data 192 # with urlopen(url) as fin: 193 # data = fin.read() 194 195 # with open(ofilename, 'wb') as fout: 196 # fout.write(data) 223 if os.path.exists(ofilename): 224 print(" Skipping as file already downloaded") 225 else: 226 # Do the actual downloading, and saving to file 227 response = requests.get(url) 228 open(ofilename, "wb").write(response.content) 229 230 # # The follow works for HTML or binary data 231 # with urlopen(url) as fin: 232 # data = fin.read() 233 234 # with open(ofilename, 'wb') as fout: 235 # fout.write(data) 197 236 198 237 … … 458 497 transformHeadings(worksheet,num_cols) 459 498 499 print(" Transforming Excel spreadsheet 'Object Type' values to Greenstone friendly ones (for indexing)") 500 transformObjectTypeValues(worksheet,num_rows,num_cols) 501 460 502 print(" Processing values") 461 503
Note:
See TracChangeset
for help on using the changeset viewer.