#!/usr/bin/env python3 import csv import datetime import openpyxl import re print("") print("***** don't forget to:") print(" pip install openpyxl") print("") xlsx_ifilename="HMS-Catalogue.xlsx" csv_ofilename ="HMS-Catalogue.csv" workbook = openpyxl.load_workbook(xlsx_ifilename) worksheet = workbook['Table'] # Useful example # https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file # No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes IMID = 1 Date = 2 ProgAndPage = 3 Artist = 4 Composer = 5 Work = 6 Venue = 7 Attendance = 8 Review = 9 Notes = 10 GSFriendlyID = 11 GSFriendlyDate = 12 GSFriendlyProg = 13 GSFriendlyPage = 14 GSFriendlyArtist = 15 worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage") worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate") worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg") worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage") worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist") num_rows = worksheet.max_row; # Tweak the data # (skip the first row with headers) row_i = 2 print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)") while row_i<=num_rows: print(" Processing row: " + str(row_i)) # # Massage Intermuse ID # id_cell = worksheet.cell(row=row_i,column=IMID) id_value = id_cell.value gs_id_value_str = "D"+id_value worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str) # # Massage Date # date_cell = worksheet.cell(row=row_i,column=Date) date_value = date_cell.value #print(repr(date_value)) date_str = date_value.strftime('%d-%m-%Y') gs_date_str = date_value.strftime('%Y%m%d') worksheet.cell(row=row_i,column=Date, value=date_str) worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str) # # Split ProgAndPage into two separate parts # progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage) progandpage_value = progandpage_cell.value prog_page_array = progandpage_value.split("/") if (len(prog_page_array) == 2): [prog_value,page_value] = prog_page_array; worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value) worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value) # # Massage Artist # artist_cell = worksheet.cell(row=row_i,column=Artist) artist_value = artist_cell.value gs_artist_value = re.sub("\s+\(.*?\)","",artist_value) worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value) row_i = row_i + 1 # print(repr(date_value)) # formated_date_value=date_value.strftime("%Y%m/%d") # print(formatted_date_value) # date_value_without_time = date_value.split(" ",1)[0] # date_value_without_hypens = date_value_without_time.replace('-','') # print(date_value_without_time) ob = csv.writer(open(csv_ofilename,'w', newline = "")) for r in worksheet.rows: row = [a.value for a in r] ob.writerow(row)