source: gs3-installations/intermuse/trunk/sites/intermuse/collect/performers/prepare/xlsx-to-csv.py@ 37070

Last change on this file since 37070 was 37070, checked in by davidb, 16 months ago

Converts a MS native XSLX file to CSV, also taking the time to fix up some of the values, and introduce new columsn, helpfuo to GS3

  • Property svn:executable set to *
File size: 3.2 KB
Line 
1#!/usr/bin/env python3
2
3import csv
4import datetime
5import openpyxl
6import re
7
8
9print("")
10print("***** don't forget to:")
11print(" pip install openpyxl")
12print("")
13
14xlsx_ifilename="HMS-Catalogue.xlsx"
15csv_ofilename ="HMS-Catalogue.csv"
16
17
18workbook = openpyxl.load_workbook(xlsx_ifilename)
19worksheet = workbook['Table']
20
21
22# Useful example
23# https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file
24
25
26# No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
27IMID = 1
28Date = 2
29ProgAndPage = 3
30Artist = 4
31Composer = 5
32Work = 6
33Venue = 7
34Attendance = 8
35Review = 9
36Notes = 10
37
38GSFriendlyID = 11
39GSFriendlyDate = 12
40GSFriendlyProg = 13
41GSFriendlyPage = 14
42GSFriendlyArtist = 15
43
44
45worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
46worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
47worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
48worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
49worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
50
51
52num_rows = worksheet.max_row;
53
54# Tweak the data
55
56# (skip the first row with headers)
57row_i = 2
58
59print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
60
61while row_i<=num_rows:
62
63 print(" Processing row: " + str(row_i))
64
65 #
66 # Massage Intermuse ID
67 #
68
69 id_cell = worksheet.cell(row=row_i,column=IMID)
70 id_value = id_cell.value
71 gs_id_value_str = "D"+id_value
72
73 worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
74
75 #
76 # Massage Date
77 #
78 date_cell = worksheet.cell(row=row_i,column=Date)
79 date_value = date_cell.value
80
81 #print(repr(date_value))
82
83 date_str = date_value.strftime('%d-%m-%Y')
84 gs_date_str = date_value.strftime('%Y%m%d')
85
86 worksheet.cell(row=row_i,column=Date, value=date_str)
87 worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
88
89
90 #
91 # Split ProgAndPage into two separate parts
92 #
93 progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
94 progandpage_value = progandpage_cell.value
95 prog_page_array = progandpage_value.split("/")
96 if (len(prog_page_array) == 2):
97 [prog_value,page_value] = prog_page_array;
98
99 worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
100 worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
101
102 #
103 # Massage Artist
104 #
105 artist_cell = worksheet.cell(row=row_i,column=Artist)
106 artist_value = artist_cell.value
107
108 gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
109
110 worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
111
112 row_i = row_i + 1
113
114
115# print(repr(date_value))
116
117# formated_date_value=date_value.strftime("%Y%m/%d")
118# print(formatted_date_value)
119
120# date_value_without_time = date_value.split(" ",1)[0]
121# date_value_without_hypens = date_value_without_time.replace('-','')
122
123# print(date_value_without_time)
124
125
126
127
128ob = csv.writer(open(csv_ofilename,'w', newline = ""))
129
130for r in worksheet.rows:
131 row = [a.value for a in r]
132 ob.writerow(row)
Note: See TracBrowser for help on using the repository browser.