source: gs3-installations/intermuse/trunk/sites/intermuse/collect/performers/prepare/xlsx-to-csv.py@ 37123

Last change on this file since 37123 was 37123, checked in by davidb, 15 months ago

Fixed ommision in CSV output header

  • Property svn:executable set to *
File size: 3.3 KB
Line 
1#!/usr/bin/env python3
2
3import csv
4import datetime
5import openpyxl
6import re
7
8
9print("")
10print("***** don't forget to:")
11print(" pip install openpyxl")
12print("")
13
14xlsx_ifilename="HMS-Catalogue.xlsx"
15csv_ofilename ="HMS-Catalogue.csv"
16
17
18workbook = openpyxl.load_workbook(xlsx_ifilename)
19worksheet = workbook['Table']
20
21
22# Useful example
23# https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file
24
25
26# No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
27IMID = 1
28Date = 2
29ProgAndPage = 3
30Artist = 4
31Composer = 5
32Work = 6
33Venue = 7
34Attendance = 8
35Review = 9
36Notes = 10
37
38GSFriendlyID = 11
39GSFriendlyDate = 12
40GSFriendlyProg = 13
41GSFriendlyPage = 14
42GSFriendlyArtist = 15
43
44
45worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
46worksheet.cell(row=1,column=GSFriendlyID, value="GSFriendlyID")
47worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
48worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
49worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
50worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
51
52
53num_rows = worksheet.max_row;
54
55# Tweak the data
56
57# (skip the first row with headers)
58row_i = 2
59
60print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
61
62while row_i<=num_rows:
63
64 print(" Processing row: " + str(row_i))
65
66 #
67 # Massage Intermuse ID
68 #
69
70 id_cell = worksheet.cell(row=row_i,column=IMID)
71 id_value = id_cell.value
72 gs_id_value_str = "D"+id_value
73
74 worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
75
76 #
77 # Massage Date
78 #
79 date_cell = worksheet.cell(row=row_i,column=Date)
80 date_value = date_cell.value
81
82 #print(repr(date_value))
83
84 date_str = date_value.strftime('%d-%m-%Y')
85 gs_date_str = date_value.strftime('%Y%m%d')
86
87 worksheet.cell(row=row_i,column=Date, value=date_str)
88 worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
89
90
91 #
92 # Split ProgAndPage into two separate parts
93 #
94 progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
95 progandpage_value = progandpage_cell.value
96 prog_page_array = progandpage_value.split("/")
97 if (len(prog_page_array) == 2):
98 [prog_value,page_value] = prog_page_array;
99
100 worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
101 worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
102
103 #
104 # Massage Artist
105 #
106 artist_cell = worksheet.cell(row=row_i,column=Artist)
107 artist_value = artist_cell.value
108
109 gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
110
111 worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
112
113 row_i = row_i + 1
114
115
116# print(repr(date_value))
117
118# formated_date_value=date_value.strftime("%Y%m/%d")
119# print(formatted_date_value)
120
121# date_value_without_time = date_value.split(" ",1)[0]
122# date_value_without_hypens = date_value_without_time.replace('-','')
123
124# print(date_value_without_time)
125
126
127
128
129ob = csv.writer(open(csv_ofilename,'w', newline = ""))
130
131for r in worksheet.rows:
132 row = [a.value for a in r]
133 ob.writerow(row)
Note: See TracBrowser for help on using the repository browser.