[37070] | 1 | #!/usr/bin/env python3
|
---|
| 2 |
|
---|
| 3 | import csv
|
---|
| 4 | import datetime
|
---|
| 5 | import openpyxl
|
---|
| 6 | import re
|
---|
| 7 |
|
---|
| 8 |
|
---|
| 9 | print("")
|
---|
| 10 | print("***** don't forget to:")
|
---|
| 11 | print(" pip install openpyxl")
|
---|
| 12 | print("")
|
---|
| 13 |
|
---|
| 14 | xlsx_ifilename="HMS-Catalogue.xlsx"
|
---|
| 15 | csv_ofilename ="HMS-Catalogue.csv"
|
---|
| 16 |
|
---|
| 17 |
|
---|
| 18 | workbook = openpyxl.load_workbook(xlsx_ifilename)
|
---|
| 19 | worksheet = workbook['Table']
|
---|
| 20 |
|
---|
| 21 |
|
---|
| 22 | # Useful example
|
---|
| 23 | # https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file
|
---|
| 24 |
|
---|
| 25 |
|
---|
| 26 | # No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
|
---|
| 27 | IMID = 1
|
---|
| 28 | Date = 2
|
---|
| 29 | ProgAndPage = 3
|
---|
| 30 | Artist = 4
|
---|
| 31 | Composer = 5
|
---|
| 32 | Work = 6
|
---|
| 33 | Venue = 7
|
---|
| 34 | Attendance = 8
|
---|
| 35 | Review = 9
|
---|
| 36 | Notes = 10
|
---|
| 37 |
|
---|
| 38 | GSFriendlyID = 11
|
---|
| 39 | GSFriendlyDate = 12
|
---|
| 40 | GSFriendlyProg = 13
|
---|
| 41 | GSFriendlyPage = 14
|
---|
| 42 | GSFriendlyArtist = 15
|
---|
| 43 |
|
---|
| 44 |
|
---|
| 45 | worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
|
---|
| 46 | worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
|
---|
| 47 | worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
|
---|
| 48 | worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
|
---|
| 49 | worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
|
---|
| 50 |
|
---|
| 51 |
|
---|
| 52 | num_rows = worksheet.max_row;
|
---|
| 53 |
|
---|
| 54 | # Tweak the data
|
---|
| 55 |
|
---|
| 56 | # (skip the first row with headers)
|
---|
| 57 | row_i = 2
|
---|
| 58 |
|
---|
| 59 | print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
|
---|
| 60 |
|
---|
| 61 | while row_i<=num_rows:
|
---|
| 62 |
|
---|
| 63 | print(" Processing row: " + str(row_i))
|
---|
| 64 |
|
---|
| 65 | #
|
---|
| 66 | # Massage Intermuse ID
|
---|
| 67 | #
|
---|
| 68 |
|
---|
| 69 | id_cell = worksheet.cell(row=row_i,column=IMID)
|
---|
| 70 | id_value = id_cell.value
|
---|
| 71 | gs_id_value_str = "D"+id_value
|
---|
| 72 |
|
---|
| 73 | worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
|
---|
| 74 |
|
---|
| 75 | #
|
---|
| 76 | # Massage Date
|
---|
| 77 | #
|
---|
| 78 | date_cell = worksheet.cell(row=row_i,column=Date)
|
---|
| 79 | date_value = date_cell.value
|
---|
| 80 |
|
---|
| 81 | #print(repr(date_value))
|
---|
| 82 |
|
---|
| 83 | date_str = date_value.strftime('%d-%m-%Y')
|
---|
| 84 | gs_date_str = date_value.strftime('%Y%m%d')
|
---|
| 85 |
|
---|
| 86 | worksheet.cell(row=row_i,column=Date, value=date_str)
|
---|
| 87 | worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
|
---|
| 88 |
|
---|
| 89 |
|
---|
| 90 | #
|
---|
| 91 | # Split ProgAndPage into two separate parts
|
---|
| 92 | #
|
---|
| 93 | progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
|
---|
| 94 | progandpage_value = progandpage_cell.value
|
---|
| 95 | prog_page_array = progandpage_value.split("/")
|
---|
| 96 | if (len(prog_page_array) == 2):
|
---|
| 97 | [prog_value,page_value] = prog_page_array;
|
---|
| 98 |
|
---|
| 99 | worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
|
---|
| 100 | worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
|
---|
| 101 |
|
---|
| 102 | #
|
---|
| 103 | # Massage Artist
|
---|
| 104 | #
|
---|
| 105 | artist_cell = worksheet.cell(row=row_i,column=Artist)
|
---|
| 106 | artist_value = artist_cell.value
|
---|
| 107 |
|
---|
| 108 | gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
|
---|
| 109 |
|
---|
| 110 | worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
|
---|
| 111 |
|
---|
| 112 | row_i = row_i + 1
|
---|
| 113 |
|
---|
| 114 |
|
---|
| 115 | # print(repr(date_value))
|
---|
| 116 |
|
---|
| 117 | # formated_date_value=date_value.strftime("%Y%m/%d")
|
---|
| 118 | # print(formatted_date_value)
|
---|
| 119 |
|
---|
| 120 | # date_value_without_time = date_value.split(" ",1)[0]
|
---|
| 121 | # date_value_without_hypens = date_value_without_time.replace('-','')
|
---|
| 122 |
|
---|
| 123 | # print(date_value_without_time)
|
---|
| 124 |
|
---|
| 125 |
|
---|
| 126 |
|
---|
| 127 |
|
---|
| 128 | ob = csv.writer(open(csv_ofilename,'w', newline = ""))
|
---|
| 129 |
|
---|
| 130 | for r in worksheet.rows:
|
---|
| 131 | row = [a.value for a in r]
|
---|
| 132 | ob.writerow(row)
|
---|