source: gs3-installations/whakatohea-dl/trunk/sites/wmtb/bin/script/metamaster-xlsx-to-csv.py@ 37416

Last change on this file since 37416 was 37416, checked in by davidb, 14 months ago

Commandline scripts designed to work with UnknownConverterPlugin

  • Property svn:executable set to *
File size: 4.5 KB
Line 
1#!/usr/bin/env python3
2
3import os
4import sys
5
6import csv
7import openpyxl
8import re
9
10import os
11import re
12import sys
13
14#import argparse
15
16
17
18
19
20argc=len(sys.argv)
21progname= sys.argv[0]
22
23if argc == 1 or argc == 2:
24 print("",file=sys.stderr)
25 print("Usage: ", file=sys.stderr)
26 print(" " + progname + " sheet-name metamaster.xslx [metamaster.csv]",file=sys.stderr)
27 print("",file=sys.stderr)
28 print("For example:",file=sys.stderr)
29 print(" " + progname + " Image Metamaster.xlsx",file=sys.stderr)
30 print("",file=sys.stderr)
31 sys.exit(1)
32
33sheet_name = sys.argv[1]
34xlsx_ifilename = sys.argv[2]
35
36if argc == 3:
37 csv_ofilename= os.path.splitext(xlsx_ifilename)[0]+'.csv'
38else:
39 csv_ofilename = sys.argv[2]
40
41
42if os.path.exists(xlsx_ifilename) and os.path.exists(csv_ofilename):
43
44 diff_time = os.stat(csv_ofilename).st_mtime - os.stat(xlsx_ifilename).st_mtime
45 if diff_time >= 0:
46 print("CSV file newer than XSLX file => no need to convert")
47 sys.exit(0)
48
49workbook = openpyxl.load_workbook(xlsx_ifilename)
50worksheet = workbook[sheet_name]
51
52
53# No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
54IMID = 1
55Date = 2
56ProgAndPage = 3
57Artist = 4
58Composer = 5
59Work = 6
60Venue = 7
61Attendance = 8
62Review = 9
63Notes = 10
64
65GSFriendlyID = 11
66GSFriendlyDate = 12
67GSFriendlyProg = 13
68GSFriendlyPage = 14
69GSFriendlyArtist = 15
70
71
72#worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
73#worksheet.cell(row=1,column=GSFriendlyID, value="GSFriendlyID")
74#worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
75#worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
76#worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
77#worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
78
79
80num_rows = worksheet.max_row;
81
82# Tweak the data
83
84# (skip the first row with headers)
85row_i = 2
86
87#print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
88print("Converting Excel spreadsheet to CSV")
89
90while row_i<=num_rows:
91
92 print(" Processing row: " + str(row_i))
93
94 #
95 # Massage Intermuse ID
96 #
97
98# id_cell = worksheet.cell(row=row_i,column=IMID)
99# id_value = id_cell.value
100# gs_id_value_str = "D"+id_value
101
102# worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
103
104 #
105 # Massage Date
106 #
107# date_cell = worksheet.cell(row=row_i,column=Date)
108# date_value = date_cell.value
109
110 #print(repr(date_value))
111
112# date_str = date_value.strftime('%d-%m-%Y')
113# gs_date_str = date_value.strftime('%Y%m%d')
114
115# worksheet.cell(row=row_i,column=Date, value=date_str)
116# worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
117
118
119 #
120 # Split ProgAndPage into two separate parts
121 #
122# progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
123# progandpage_value = progandpage_cell.value
124# prog_page_array = progandpage_value.split("/")
125# if (len(prog_page_array) == 2):
126# [prog_value,page_value] = prog_page_array;
127
128# worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
129# worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
130
131 #
132 # Massage Artist
133 #
134# artist_cell = worksheet.cell(row=row_i,column=Artist)
135# artist_value = artist_cell.value
136
137# gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
138
139# worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
140
141 row_i = row_i + 1
142
143
144
145
146
147ob = csv.writer(open(csv_ofilename,'w', newline = ""))
148
149for r in worksheet.rows:
150 row = [a.value for a in r]
151 ob.writerow(row)
152
153
154
155
156
157
158
159
160
161
162#if __name__ == "__main__":
163
164# parser = argparse.ArgumentParser()
165# parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from")
166# parser.add_argument('--votingtype', choices=["J","T", "JT"], help="Filter to only J=Jury, T=Tele cast votes, JT=Combined jury and tele votes")
167# parser.add_argument('input-file.xlsx')
168# parser.add_argument('output-file.json', nargs='?')
169
170# args = parser.parse_args()
171
172# sheetname = getattr(args,'sheetname');
173# voting_type = getattr(args,'votingtype');
174
175# excel_input_filename = getattr(args,'input-file.xlsx');
176# json_output_filename = getattr(args,'output-file.json');
177
178# if (json_output_filename == None):
179# json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
180
181
Note: See TracBrowser for help on using the repository browser.