source: main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-to-jsonmetadata.py@ 34893

Last change on this file since 34893 was 34893, checked in by davidb, 3 years ago

Voting totals added in

  • Property svn:executable set to *
File size: 7.7 KB
Line 
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5import os
6import re
7import sys
8import json
9
10import argparse
11import openpyxl
12
13import util
14import xlsxutil
15
16jury_from_countries = {}
17tele_from_countries = {}
18
19#def fileset_voting_for_esc_country_in_year(data_hashmap):
20#
21# # Only generating a single fileset record here, however
22# # the Greenstone format allows for this to be an array
23# # of fileset entries => return [ fileset ]
24#
25# fileset = {}
26#
27# return [ fileset ]
28
29
30#def filter_finalist_votes(entry):
31# return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
32#
33#def sortkey_year_to_country(entry):
34# return str(entry.get('Year'))+entry.get('To country')
35
36
37def create_to_country_voting_groups(data_hashmap_array):
38
39 # Example values for header-names
40 # (semi-) final: f, sf
41 # Jury or Televoting: J, T
42 # Year: 1975
43 # To country: Belgium
44 # From country: Belgium
45 # Edition: 1975f, 1975sf
46
47
48 # Filter down to just the voting results concerning finals
49 data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
50
51 # Sort so array entries are grouped by the country receiving the votes in a given year
52 data_hashmap_array_finals.sort(key=xlsxutil.sortkey_year_to_country)
53
54
55 # Debug output
56 #
57 # for data_hashmap in data_hashmap_array_finals:
58 # print(data_hashmap.get('To country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points from " + data_hashmap.get('From country') + ")")
59
60
61 # Build array of country groups
62 # A country grouping includes all the votes that country receive that year
63 country_groups = []
64
65 prev_data_hashmap = data_hashmap_array_finals[0]
66 country_group = [ ]
67
68 i = 1
69 num_finals = len(data_hashmap_array_finals)
70
71 while (i < num_finals):
72 country_group.append(prev_data_hashmap)
73 data_hashmap = data_hashmap_array_finals[i]
74
75 if (data_hashmap.get('To country') != prev_data_hashmap.get('To country')):
76 # moving on to a new country group
77 country_groups.append(country_group)
78 country_group = [ ]
79
80 prev_data_hashmap = data_hashmap
81 i = i + 1
82
83 country_group.append(prev_data_hashmap)
84 country_groups.append(country_group)
85
86 return country_groups
87
88
89def fileset_voting_for_esc_country_in_year(to_country_year_votes):
90
91 # Looking to build data-structure (for output as JSON) in the form
92 # { "FileSet":
93 # [
94 # { "FileName": "France1991\.nul" },
95 # { "Description":
96 # {
97 # "Metadata":
98 # [
99 # { "name": "Germany-J", "content": 12 }, # J = Jury Vote
100 # { "name": "Germany-T", "content": 6 }, # T = Televote (if present)
101 # ...
102 # ]
103 # }
104 # }
105 # ]
106 # }
107
108 fileset_array = []
109
110 metadata_array = []
111
112 jury_metadata_vals = []
113 tele_metadata_vals = []
114
115 to_country_jury_total = 0
116 to_country_tele_total = 0
117
118 for to_country_year_vote in to_country_year_votes:
119 to_country = to_country_year_vote.get('To country')
120 year = to_country_year_vote.get('Year')
121 from_country = to_country_year_vote.get('From country')
122 vote_type = to_country_year_vote.get('Jury or Televoting')
123 points = to_country_year_vote.get('Points')
124
125 id_from_country = re.sub(r'\s+', '', from_country)
126
127 voting_rec = { "name": id_from_country+"-"+vote_type, "content": points }
128
129 metadata_array.append(voting_rec)
130
131 if (vote_type == "J"):
132 jury_metadata_vals.append(id_from_country+"-J")
133 jury_from_countries[id_from_country] = 1
134 to_country_jury_total = to_country_jury_total + points
135
136 elif (vote_type == "T"):
137 tele_metadata_vals.append(id_from_country+"-T")
138 tele_from_countries[id_from_country] = 1
139 to_country_tele_total = to_country_tele_total + points
140 else:
141 util.eprint("Warning: Unrecognized voting type: " + vote_type)
142
143 if (len(jury_metadata_vals)>0):
144 metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) })
145 metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total})
146 if (len(tele_metadata_vals)>0):
147 metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) })
148 metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total})
149
150 id_to_country = to_country_year_votes[0].get('To country')
151 id_to_country = re.sub(r'\s+', '', id_to_country)
152 id_year = to_country_year_votes[0].get('Year');
153 id = id_to_country + str(id_year);
154 filename_id = id + "\\.nul"
155
156 fileset = {
157 "FileSet" : [
158 { "FileName": filename_id, },
159 { "Description" : { "Metadata" : metadata_array } }
160 ]
161 }
162
163 return fileset
164
165
166if __name__ == "__main__":
167
168 parser = argparse.ArgumentParser()
169 parser.add_argument('input-file.xlsx')
170 parser.add_argument('output-file.json', nargs='?')
171 parser.add_argument('--sheetname')
172
173 args = parser.parse_args()
174
175 excel_input_filename = getattr(args,'input-file.xlsx');
176 json_output_filename = getattr(args,'output-file.json');
177 sheetname = getattr(args,'sheetname');
178
179 if (json_output_filename == None):
180 json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
181
182 worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname)
183
184 data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet)
185
186 print("Number of data rows: " + str(len(data_hashmap_array)))
187
188
189 to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array)
190
191 # Debug output
192 #
193 # print(to_country_year_voting_groups)
194
195 # Next step is to express the grouped to-country voting data
196 # in the Greenstone JSON metadata format:
197
198 # { "DirectoryMetadata":
199 # [
200 # { "FileSet":
201 # [
202 # { "FileName": "France1991\.nul" },
203 # { "Description":
204 # {
205 # "Metadata":
206 # [
207 # { "name": "Germany-J", "content": "12" }, # J = Jury Vote
208 # ...
209 # ]
210 # }
211 # }
212 # ]
213 # }
214 # ...
215 # ]
216 # }
217
218 directory_metadata = []
219
220 print("Creating Greenstone JSON voting metadata for:")
221 for to_country_year_votes in to_country_year_voting_groups:
222
223 fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes)
224 directory_metadata.append(fileset)
225
226 filename_id = fileset.get('FileSet')[0].get('FileName')
227 num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata'))
228
229 print(" " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
230
231
232 print("")
233 print("")
234 print("For, e.g., '<display><format>' section of collectionConfig.xml:")
235 print("")
236 print(" <gsf:headMetaTags>")
237
238 for from_country in sorted(jury_from_countries.keys()):
239 print(" <gsf:metadata name=\""+from_country+"-J\" />")
240
241 print("")
242 for from_country in sorted(tele_from_countries.keys()):
243 print(" <gsf:metadata name=\""+from_country+"-T\" />")
244
245 print(" </gsf:headMetaTags>")
246 print("")
247
248 greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
249
250 with open(json_output_filename, 'w') as outfile:
251 json.dump(greenstone_metadata_json, outfile, indent=2)
Note: See TracBrowser for help on using the repository browser.