source: gs3-installations/intermuse/trunk/sites/intermuse/collect/performers/prepare/xlsx-to-csv.py

Last change on this file was 37220, checked in by davidb, 15 months ago

Message about openpyxl now handled in bash script

  • Property svn:executable set to *
File size: 3.2 KB
Line 
1#!/usr/bin/env python3
2
3import csv
4import datetime
5import openpyxl
6import re
7
8
9
10xlsx_ifilename="HMS-Catalogue.xlsx"
11csv_ofilename ="HMS-Catalogue.csv"
12
13
14workbook = openpyxl.load_workbook(xlsx_ifilename)
15worksheet = workbook['Table']
16
17
18# Useful example
19# https://stackoverflow.com/questions/35597346/python-3-and-openpyxl-formatting-a-list-of-dates-when-writing-to-a-file
20
21
22# No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
23IMID = 1
24Date = 2
25ProgAndPage = 3
26Artist = 4
27Composer = 5
28Work = 6
29Venue = 7
30Attendance = 8
31Review = 9
32Notes = 10
33
34GSFriendlyID = 11
35GSFriendlyDate = 12
36GSFriendlyProg = 13
37GSFriendlyPage = 14
38GSFriendlyArtist = 15
39
40
41worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
42worksheet.cell(row=1,column=GSFriendlyID, value="GSFriendlyID")
43worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
44worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
45worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
46worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
47
48
49num_rows = worksheet.max_row;
50
51# Tweak the data
52
53# (skip the first row with headers)
54row_i = 2
55
56print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
57
58while row_i<=num_rows:
59
60 print(" Processing row: " + str(row_i))
61
62 #
63 # Massage Intermuse ID
64 #
65
66 id_cell = worksheet.cell(row=row_i,column=IMID)
67 id_value = id_cell.value
68 gs_id_value_str = "D"+id_value
69
70 worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
71
72 #
73 # Massage Date
74 #
75 date_cell = worksheet.cell(row=row_i,column=Date)
76 date_value = date_cell.value
77
78 #print(repr(date_value))
79
80 date_str = date_value.strftime('%d-%m-%Y')
81 gs_date_str = date_value.strftime('%Y%m%d')
82
83 worksheet.cell(row=row_i,column=Date, value=date_str)
84 worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
85
86
87 #
88 # Split ProgAndPage into two separate parts
89 #
90 progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
91 progandpage_value = progandpage_cell.value
92 prog_page_array = progandpage_value.split("/")
93 if (len(prog_page_array) == 2):
94 [prog_value,page_value] = prog_page_array;
95
96 worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
97 worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
98
99 #
100 # Massage Artist
101 #
102 artist_cell = worksheet.cell(row=row_i,column=Artist)
103 artist_value = artist_cell.value
104
105 gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
106
107 worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
108
109 row_i = row_i + 1
110
111
112# print(repr(date_value))
113
114# formated_date_value=date_value.strftime("%Y%m/%d")
115# print(formatted_date_value)
116
117# date_value_without_time = date_value.split(" ",1)[0]
118# date_value_without_hypens = date_value_without_time.replace('-','')
119
120# print(date_value_without_time)
121
122
123
124
125ob = csv.writer(open(csv_ofilename,'w', newline = ""))
126
127for r in worksheet.rows:
128 row = [a.value for a in r]
129 ob.writerow(row)
Note: See TracBrowser for help on using the repository browser.