1 | #!/usr/bin/env python3
|
---|
2 |
|
---|
3 | import csv
|
---|
4 | import datetime
|
---|
5 | import openpyxl
|
---|
6 | import re
|
---|
7 |
|
---|
8 |
|
---|
9 | print("")
|
---|
10 | print("***** don't forget to:")
|
---|
11 | print(" pip install openpyxl")
|
---|
12 | print("")
|
---|
13 |
|
---|
14 | xlsx_ifilename="HMS-Catalogue.xlsx"
|
---|
15 | csv_ofilename ="HMS-Catalogue.csv"
|
---|
16 |
|
---|
17 |
|
---|
18 | workbook = openpyxl.load_workbook(xlsx_ifilename)
|
---|
19 | worksheet = workbook['Table']
|
---|
20 |
|
---|
21 |
|
---|
22 | # Useful example
|
---|
23 | # https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file
|
---|
24 |
|
---|
25 |
|
---|
26 | # No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
|
---|
27 | IMID = 1
|
---|
28 | Date = 2
|
---|
29 | ProgAndPage = 3
|
---|
30 | Artist = 4
|
---|
31 | Composer = 5
|
---|
32 | Work = 6
|
---|
33 | Venue = 7
|
---|
34 | Attendance = 8
|
---|
35 | Review = 9
|
---|
36 | Notes = 10
|
---|
37 |
|
---|
38 | GSFriendlyID = 11
|
---|
39 | GSFriendlyDate = 12
|
---|
40 | GSFriendlyProg = 13
|
---|
41 | GSFriendlyPage = 14
|
---|
42 | GSFriendlyArtist = 15
|
---|
43 |
|
---|
44 |
|
---|
45 | worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
|
---|
46 | worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
|
---|
47 | worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
|
---|
48 | worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
|
---|
49 | worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
|
---|
50 |
|
---|
51 |
|
---|
52 | num_rows = worksheet.max_row;
|
---|
53 |
|
---|
54 | # Tweak the data
|
---|
55 |
|
---|
56 | # (skip the first row with headers)
|
---|
57 | row_i = 2
|
---|
58 |
|
---|
59 | print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
|
---|
60 |
|
---|
61 | while row_i<=num_rows:
|
---|
62 |
|
---|
63 | print(" Processing row: " + str(row_i))
|
---|
64 |
|
---|
65 | #
|
---|
66 | # Massage Intermuse ID
|
---|
67 | #
|
---|
68 |
|
---|
69 | id_cell = worksheet.cell(row=row_i,column=IMID)
|
---|
70 | id_value = id_cell.value
|
---|
71 | gs_id_value_str = "D"+id_value
|
---|
72 |
|
---|
73 | worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
|
---|
74 |
|
---|
75 | #
|
---|
76 | # Massage Date
|
---|
77 | #
|
---|
78 | date_cell = worksheet.cell(row=row_i,column=Date)
|
---|
79 | date_value = date_cell.value
|
---|
80 |
|
---|
81 | #print(repr(date_value))
|
---|
82 |
|
---|
83 | date_str = date_value.strftime('%d-%m-%Y')
|
---|
84 | gs_date_str = date_value.strftime('%Y%m%d')
|
---|
85 |
|
---|
86 | worksheet.cell(row=row_i,column=Date, value=date_str)
|
---|
87 | worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
|
---|
88 |
|
---|
89 |
|
---|
90 | #
|
---|
91 | # Split ProgAndPage into two separate parts
|
---|
92 | #
|
---|
93 | progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
|
---|
94 | progandpage_value = progandpage_cell.value
|
---|
95 | prog_page_array = progandpage_value.split("/")
|
---|
96 | if (len(prog_page_array) == 2):
|
---|
97 | [prog_value,page_value] = prog_page_array;
|
---|
98 |
|
---|
99 | worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
|
---|
100 | worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
|
---|
101 |
|
---|
102 | #
|
---|
103 | # Massage Artist
|
---|
104 | #
|
---|
105 | artist_cell = worksheet.cell(row=row_i,column=Artist)
|
---|
106 | artist_value = artist_cell.value
|
---|
107 |
|
---|
108 | gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
|
---|
109 |
|
---|
110 | worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
|
---|
111 |
|
---|
112 | row_i = row_i + 1
|
---|
113 |
|
---|
114 |
|
---|
115 | # print(repr(date_value))
|
---|
116 |
|
---|
117 | # formated_date_value=date_value.strftime("%Y%m/%d")
|
---|
118 | # print(formatted_date_value)
|
---|
119 |
|
---|
120 | # date_value_without_time = date_value.split(" ",1)[0]
|
---|
121 | # date_value_without_hypens = date_value_without_time.replace('-','')
|
---|
122 |
|
---|
123 | # print(date_value_without_time)
|
---|
124 |
|
---|
125 |
|
---|
126 |
|
---|
127 |
|
---|
128 | ob = csv.writer(open(csv_ofilename,'w', newline = ""))
|
---|
129 |
|
---|
130 | for r in worksheet.rows:
|
---|
131 | row = [a.value for a in r]
|
---|
132 | ob.writerow(row)
|
---|