1 | #!/usr/bin/env python3
|
---|
2 |
|
---|
3 | import os
|
---|
4 | import sys
|
---|
5 |
|
---|
6 | import csv
|
---|
7 | import openpyxl
|
---|
8 | import re
|
---|
9 |
|
---|
10 | import os
|
---|
11 | import re
|
---|
12 | import sys
|
---|
13 |
|
---|
14 | #import argparse
|
---|
15 |
|
---|
16 |
|
---|
17 |
|
---|
18 |
|
---|
19 |
|
---|
20 | argc=len(sys.argv)
|
---|
21 | progname= sys.argv[0]
|
---|
22 |
|
---|
23 | if argc == 1 or argc == 2:
|
---|
24 | print("",file=sys.stderr)
|
---|
25 | print("Usage: ", file=sys.stderr)
|
---|
26 | print(" " + progname + " sheet-name metamaster.xslx [metamaster.csv]",file=sys.stderr)
|
---|
27 | print("",file=sys.stderr)
|
---|
28 | print("For example:",file=sys.stderr)
|
---|
29 | print(" " + progname + " Image Metamaster.xlsx",file=sys.stderr)
|
---|
30 | print("",file=sys.stderr)
|
---|
31 | sys.exit(1)
|
---|
32 |
|
---|
33 | sheet_name = sys.argv[1]
|
---|
34 | xlsx_ifilename = sys.argv[2]
|
---|
35 |
|
---|
36 | if argc == 3:
|
---|
37 | csv_ofilename= os.path.splitext(xlsx_ifilename)[0]+'.csv'
|
---|
38 | else:
|
---|
39 | csv_ofilename = sys.argv[2]
|
---|
40 |
|
---|
41 |
|
---|
42 | if os.path.exists(xlsx_ifilename) and os.path.exists(csv_ofilename):
|
---|
43 |
|
---|
44 | diff_time = os.stat(csv_ofilename).st_mtime - os.stat(xlsx_ifilename).st_mtime
|
---|
45 | if diff_time >= 0:
|
---|
46 | print("CSV file newer than XSLX file => no need to convert")
|
---|
47 | sys.exit(0)
|
---|
48 |
|
---|
49 | workbook = openpyxl.load_workbook(xlsx_ifilename)
|
---|
50 | worksheet = workbook[sheet_name]
|
---|
51 |
|
---|
52 |
|
---|
53 | # No,Date,S'n/Conc,Artist,Composer,Work,Venue,Attendance,Review,Notes
|
---|
54 | IMID = 1
|
---|
55 | Date = 2
|
---|
56 | ProgAndPage = 3
|
---|
57 | Artist = 4
|
---|
58 | Composer = 5
|
---|
59 | Work = 6
|
---|
60 | Venue = 7
|
---|
61 | Attendance = 8
|
---|
62 | Review = 9
|
---|
63 | Notes = 10
|
---|
64 |
|
---|
65 | GSFriendlyID = 11
|
---|
66 | GSFriendlyDate = 12
|
---|
67 | GSFriendlyProg = 13
|
---|
68 | GSFriendlyPage = 14
|
---|
69 | GSFriendlyArtist = 15
|
---|
70 |
|
---|
71 |
|
---|
72 | #worksheet.cell(row=1,column=ProgAndPage, value="ProgAndPage")
|
---|
73 | #worksheet.cell(row=1,column=GSFriendlyID, value="GSFriendlyID")
|
---|
74 | #worksheet.cell(row=1,column=GSFriendlyDate, value="GSFriendlyDate")
|
---|
75 | #worksheet.cell(row=1,column=GSFriendlyProg, value="GSFriendlyProg")
|
---|
76 | #worksheet.cell(row=1,column=GSFriendlyPage, value="GSFriendlyPage")
|
---|
77 | #worksheet.cell(row=1,column=GSFriendlyArtist, value="GSFriendlyArtist")
|
---|
78 |
|
---|
79 |
|
---|
80 | num_rows = worksheet.max_row;
|
---|
81 |
|
---|
82 | # Tweak the data
|
---|
83 |
|
---|
84 | # (skip the first row with headers)
|
---|
85 | row_i = 2
|
---|
86 |
|
---|
87 | #print("Converting Excel spreadsheet to CSV (+ Greenstone friendly fields)")
|
---|
88 | print("Converting Excel spreadsheet to CSV")
|
---|
89 |
|
---|
90 | while row_i<=num_rows:
|
---|
91 |
|
---|
92 | print(" Processing row: " + str(row_i))
|
---|
93 |
|
---|
94 | #
|
---|
95 | # Massage Intermuse ID
|
---|
96 | #
|
---|
97 |
|
---|
98 | # id_cell = worksheet.cell(row=row_i,column=IMID)
|
---|
99 | # id_value = id_cell.value
|
---|
100 | # gs_id_value_str = "D"+id_value
|
---|
101 |
|
---|
102 | # worksheet.cell(row=row_i,column=GSFriendlyID, value=gs_id_value_str)
|
---|
103 |
|
---|
104 | #
|
---|
105 | # Massage Date
|
---|
106 | #
|
---|
107 | # date_cell = worksheet.cell(row=row_i,column=Date)
|
---|
108 | # date_value = date_cell.value
|
---|
109 |
|
---|
110 | #print(repr(date_value))
|
---|
111 |
|
---|
112 | # date_str = date_value.strftime('%d-%m-%Y')
|
---|
113 | # gs_date_str = date_value.strftime('%Y%m%d')
|
---|
114 |
|
---|
115 | # worksheet.cell(row=row_i,column=Date, value=date_str)
|
---|
116 | # worksheet.cell(row=row_i,column=GSFriendlyDate, value=gs_date_str)
|
---|
117 |
|
---|
118 |
|
---|
119 | #
|
---|
120 | # Split ProgAndPage into two separate parts
|
---|
121 | #
|
---|
122 | # progandpage_cell = worksheet.cell(row=row_i,column=ProgAndPage)
|
---|
123 | # progandpage_value = progandpage_cell.value
|
---|
124 | # prog_page_array = progandpage_value.split("/")
|
---|
125 | # if (len(prog_page_array) == 2):
|
---|
126 | # [prog_value,page_value] = prog_page_array;
|
---|
127 |
|
---|
128 | # worksheet.cell(row=row_i,column=GSFriendlyProg, value=prog_value)
|
---|
129 | # worksheet.cell(row=row_i,column=GSFriendlyPage, value=page_value)
|
---|
130 |
|
---|
131 | #
|
---|
132 | # Massage Artist
|
---|
133 | #
|
---|
134 | # artist_cell = worksheet.cell(row=row_i,column=Artist)
|
---|
135 | # artist_value = artist_cell.value
|
---|
136 |
|
---|
137 | # gs_artist_value = re.sub("\s+\(.*?\)","",artist_value)
|
---|
138 |
|
---|
139 | # worksheet.cell(row=row_i,column=GSFriendlyArtist, value=gs_artist_value)
|
---|
140 |
|
---|
141 | row_i = row_i + 1
|
---|
142 |
|
---|
143 |
|
---|
144 |
|
---|
145 |
|
---|
146 |
|
---|
147 | ob = csv.writer(open(csv_ofilename,'w', newline = ""))
|
---|
148 |
|
---|
149 | for r in worksheet.rows:
|
---|
150 | row = [a.value for a in r]
|
---|
151 | ob.writerow(row)
|
---|
152 |
|
---|
153 |
|
---|
154 |
|
---|
155 |
|
---|
156 |
|
---|
157 |
|
---|
158 |
|
---|
159 |
|
---|
160 |
|
---|
161 |
|
---|
162 | #if __name__ == "__main__":
|
---|
163 |
|
---|
164 | # parser = argparse.ArgumentParser()
|
---|
165 | # parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from")
|
---|
166 | # parser.add_argument('--votingtype', choices=["J","T", "JT"], help="Filter to only J=Jury, T=Tele cast votes, JT=Combined jury and tele votes")
|
---|
167 | # parser.add_argument('input-file.xlsx')
|
---|
168 | # parser.add_argument('output-file.json', nargs='?')
|
---|
169 |
|
---|
170 | # args = parser.parse_args()
|
---|
171 |
|
---|
172 | # sheetname = getattr(args,'sheetname');
|
---|
173 | # voting_type = getattr(args,'votingtype');
|
---|
174 |
|
---|
175 | # excel_input_filename = getattr(args,'input-file.xlsx');
|
---|
176 | # json_output_filename = getattr(args,'output-file.json');
|
---|
177 |
|
---|
178 | # if (json_output_filename == None):
|
---|
179 | # json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
|
---|
180 |
|
---|
181 |
|
---|