Changeset 34960 for main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-tocountry-jsonmetadata.py
- Timestamp:
- 2021-03-09T23:30:43+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-tocountry-jsonmetadata.py
r34957 r34960 17 17 jury_from_countries = {} 18 18 tele_from_countries = {} 19 20 19 comb_from_countries = {} 20 sing_from_countries = {} 21 22 # 1997: 23 # Televoting by 24 # Austria, Germany, Sweeden, Switzerland, United Kingdom 25 26 # 1998-2000: Televote, with Jury voting as backup plan 27 28 # 2001-2002: All televote, or 50-50 mix if country so chooses, Jury backup 29 # 2001: Jury: Bosnia & Herzegovina, Turkey, Russia 30 # 2001: Mix: Croatia, Greece, Malta 31 # 32 # 2002: Jury: Bosnia & Herzegovina, Turkey, Russia, FYR Macedonia, Romania 33 # 2002: Mix: Cyprus, Greece, Spain, Croatia, Finland, Malta, Slovenia, Lithuania 34 35 # 2003: back to all Televote, with Jury voting as backup 36 # 2004-2008: same as 2003 37 38 # 2009-2012: Combined, except San Marino which was 100% Jury (due to size), Jury as backup 39 40 # 2013-2015: As before, but how two types of votes combined different 41 42 # 2016- Jury and Tele given separately, added in 43 44 def XXXXvoting_mode(year): 45 46 vote_mode = None 47 48 if (year>=2013): 49 # Separate voting: Jury and Tele 50 vote_mode = "Separated" 51 elif ((year>=2001) and (year<=2012)): 52 vote_mode = "Combined" 53 else: 54 vote_mode = "Single" 55 56 return vote_mode 57 58 59 def XXXXnuanced_voting_type(country,year): 60 61 vote_type = None 62 63 if (year>=2013): 64 # Separate voting: Jury and Tele 65 vote_type = "JT" 66 67 elif ((year>=2009) and (year<=2012)): 68 if (country=="San Marino"): 69 vote_type = "J" 70 else: 71 # Combined Jury and Tele 72 vote_type = "C" 73 elif ((year>=2003) and (year<=2008)): 74 vote_type = "T" 75 elif (year==2002): 76 jury_list = [ "Bosnia & Herzegovina", "Turkey", "Russia", "FYR Macedonia", "Romania"] 77 comb_list = [ "Cyprus", "Greece", "Spain", "Croatia", "Finland", "Malta", "Slovenia", "Lithuania" ] 78 if (country in jury_list): 79 vote_type = "J" 80 elif (country in comb_list): 81 vote_type = "C" 82 else: 83 vote_type = "T" 84 elif (year==2001): 85 jury_list = [ "Bosnia & Herzegovina", "Turkey", "Russia" ] 86 comb_list = [ "Croatia", "Greece", "Malta" ] 87 if (country in jury_list): 88 vote_type = "J" 89 elif (country in comb_list): 90 vote_type = "C" 91 elif ((year>=1998) and (year<=2000)): 92 vote_type = "T" 93 elif (year==1997): 94 tele_list = [ "Austria", "Germany", "Sweeden", "Switzerland", "United Kingdom" ] 95 if (country in tele_list): 96 vote_type = "T" 97 else: 98 vote_type = "J" 99 else: 100 vote_type = "J" 101 102 return vote_type 103 104 105 21 106 def create_to_country_voting_groups(data_hashmap_array): 22 107 … … 68 153 69 154 70 def fileset_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping): 155 def create_to_year_then_country_voting_groups(data_hashmap_array): 156 157 # Example values for header-names 158 # (semi-) final: f, sf 159 # Jury or Televoting: J, T 160 # Year: 1975, ... 161 # To country: Belgium 162 # From country: Belgium 163 # Edition: 1975f, 1975sf 164 165 # Build array of year groups 166 # A year group contains within it a country grouping that in turn 167 # includes all the votes that country receive that year 168 year_groups = [] 169 170 # Rely on sorting as done in first line of create_to_country_voting_groups, 171 # which is the (innermost) place where the sorting is needed 172 country_groups = create_to_country_voting_groups(data_hashmap_array) 173 174 prev_to_country_votes = country_groups[0] 175 year_group = [ ] 176 177 i = 1 178 num_rows = len(country_groups) 179 180 while (i < num_rows): 181 year_group.append(prev_to_country_votes) 182 to_country_votes = country_groups[i] 183 184 if (to_country_votes[0].get('Year') != prev_to_country_votes[0].get('Year')): 185 # moving on to a new year group 186 ## print("**** year change from:" + str(to_country_votes[0].get('Year')) + " to " + str(prev_to_country_votes[0].get('Year'))) 187 year_groups.append(year_group) 188 year_group = [ ] 189 190 prev_to_country_votes = to_country_votes 191 i = i + 1 192 193 year_group.append(prev_to_country_votes) 194 year_groups.append(year_group) 195 196 print("****") 197 print("Number of years voting is available for: " + str(len(year_groups))); 198 print("****") 199 200 return year_groups 201 202 203 204 def augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping): 205 206 jury_metadata_vals = [] 207 tele_metadata_vals = [] 208 comb_metadata_vals = [] 209 sing_metadata_vals = [] 210 211 to_country_jury_total = 0 212 to_country_tele_total = 0 213 to_country_comb_total = 0 214 to_country_sing_total = 0 215 216 to_country_grand_total = 0 217 218 # Use the first record to be a representative for 'top level' (tl) 219 # metadata about the votes cast 'To country' 220 tl_rec = to_country_year_votes[0] 221 222 tl_to_country = tl_rec.get('To country') 223 tl_year = tl_rec.get('Year') 224 tl_final = tl_rec.get("(semi-) final") == "f" 225 226 tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year) 227 228 tl_rec["Final"] = tl_final 229 230 # tl_to_country_lc = tl_to_country.lower() 231 tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country) 232 if (tl_to_country_uri != None): 233 tl_rec["CountryDBURI"] = tl_to_country_uri 234 else: 235 util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country) 236 237 for to_country_year_vote in to_country_year_votes: 238 # to_country = to_country_year_vote.get('To country') 239 year = to_country_year_vote.get('Year') 240 from_country = to_country_year_vote.get('From country') 241 vote_type = to_country_year_vote.get('Jury or Televoting') 242 points = to_country_year_vote.get('Points') 243 244 id_from_country = re.sub(r'\s+', '', from_country) 245 246 # tl_rec[id_from_country+"-"+vote_type] = points 247 248 vote_mode = xlsxutil.voting_mode(year) 249 nuanced_vote_type = xlsxutil.nuanced_voting_type(from_country,year) 250 251 if (vote_mode == "Separated"): 252 253 if (vote_type == "J"): 254 jury_metadata_vals.append(id_from_country+"-J") 255 jury_from_countries[id_from_country] = 1 256 to_country_jury_total = to_country_jury_total + points 257 258 elif (vote_type == "T"): 259 tele_metadata_vals.append(id_from_country+"-T") 260 tele_from_countries[id_from_country] = 1 261 to_country_tele_total = to_country_tele_total + points 262 else: 263 util.eprint("Warning: Unrecognized voting type: " + vote_type) 264 265 elif (vote_mode == "Combined"): 266 comb_metadata_vals.append(id_from_country+"-C") 267 comb_from_countries[id_from_country] = 1 268 to_country_comb_total = to_country_comb_total + points 269 else: 270 # Single 271 sing_metadata_vals.append(id_from_country+"-S") 272 sing_from_countries[id_from_country] = 1 273 to_country_sing_total = to_country_sing_total + points 274 275 to_country_grand_total = to_country_grand_total + points 276 277 tl_rec["VoteMode"] = vote_mode 278 279 if (len(jury_metadata_vals)>0): 280 tl_rec["JuryVotesJSON"] = jury_metadata_vals 281 tl_rec["JuryVotesTotal"] = to_country_jury_total 282 if (len(tele_metadata_vals)>0): 283 tl_rec["TeleVotesJSON"] = tele_metadata_vals 284 tl_rec["TeleVotesTotal"] = to_country_tele_total 285 if (len(comb_metadata_vals)>0): 286 tl_rec["CombVotesJSON"] = comb_metadata_vals 287 tl_rec["CombVotesTotal"] = to_country_comb_total 288 if (len(sing_metadata_vals)>0): 289 tl_rec["SingVotesJSON"] = sing_metadata_vals 290 tl_rec["SingVotesTotal"] = to_country_sing_total 291 292 tl_rec["VoteGrandTotal"] = to_country_grand_total 293 294 def fileset_voting_for_esc_country_in_year(to_country_year_votes): 71 295 72 296 # Looking to build data-structure (for output as JSON) in the form … … 89 313 metadata_array = [] 90 314 91 jury_metadata_vals = []92 tele_metadata_vals = []93 94 to_country_jury_total = 095 to_country_tele_total = 0315 # jury_metadata_vals = [] 316 # tele_metadata_vals = [] 317 318 # to_country_jury_total = 0 319 # to_country_tele_total = 0 96 320 97 321 # Use the first record to be a representative for 'top level' (tl) … … 101 325 tl_to_country = tl_rec.get('To country') 102 326 tl_year = tl_rec.get('Year') 103 tl_final = tl_rec.get("(semi-) final") == "f"327 # tl_final = tl_rec.get("(semi-) final") == "f" 104 328 105 329 tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year) 106 330 331 tl_final = tl_rec["Final"] 332 tl_to_country_uri = tl_rec.get("CountryDBURI") 333 334 vote_mode = tl_rec.get("VoteMode") 335 336 jury_metadata_vals = tl_rec.get("JuryVotesJSON") 337 tele_metadata_vals = tl_rec.get("TeleVotesJSON") 338 comb_metadata_vals = tl_rec.get("CombVotesJSON") 339 sing_metadata_vals = tl_rec.get("SingVotesJSON") 340 341 to_country_jury_total = tl_rec.get("JuryVotesTotal") 342 to_country_tele_total = tl_rec.get("TeleVotesTotal") 343 to_country_comb_total = tl_rec.get("CombVotesTotal") 344 to_country_sing_total = tl_rec.get("SingVotesTotal") 345 346 to_country_grand_total = tl_rec.get("VoteGrandTotal") 347 to_country_finishing_pos = tl_rec.get("FinishingPos") 348 107 349 # Country and Year are set as metadata elsewhere so don't need 108 350 # to set them here -- however, do want DBpedia *Country* *URI* … … 115 357 metadata_array.append({ "name": "Final", "content": tl_final }) 116 358 117 # tl_to_country_lc = tl_to_country.lower()118 tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country)359 ## tl_to_country_lc = tl_to_country.lower() 360 #tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country) 119 361 if (tl_to_country_uri != None): 120 362 metadata_array.append({ "name": "CountryDBURI","content": tl_to_country_uri }) 121 else:122 util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country)123 363 # else: 364 # util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country) 365 124 366 for to_country_year_vote in to_country_year_votes: 125 367 to_country = to_country_year_vote.get('To country') … … 135 377 metadata_array.append(voting_rec) 136 378 137 if (vote_type == "J"): 138 jury_metadata_vals.append(id_from_country+"-J") 139 jury_from_countries[id_from_country] = 1 140 to_country_jury_total = to_country_jury_total + points 141 142 elif (vote_type == "T"): 143 tele_metadata_vals.append(id_from_country+"-T") 144 tele_from_countries[id_from_country] = 1 145 to_country_tele_total = to_country_tele_total + points 146 else: 147 util.eprint("Warning: Unrecognized voting type: " + vote_type) 148 149 if (len(jury_metadata_vals)>0): 379 # if (vote_type == "J"): 380 # jury_metadata_vals.append(id_from_country+"-J") 381 # jury_from_countries[id_from_country] = 1 382 # to_country_jury_total = to_country_jury_total + points 383 # 384 # elif (vote_type == "T"): 385 # tele_metadata_vals.append(id_from_country+"-T") 386 # tele_from_countries[id_from_country] = 1 387 # to_country_tele_total = to_country_tele_total + points 388 # else: 389 # util.eprint("Warning: Unrecognized voting type: " + vote_type) 390 391 metadata_array.append({ "name": "VoteMode", "content": vote_mode }) 392 393 if ((jury_metadata_vals != None) and len(jury_metadata_vals)>0): 150 394 metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) }) 151 395 metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total}) 152 if ( len(tele_metadata_vals)>0):396 if ((tele_metadata_vals != None) and len(tele_metadata_vals)>0): 153 397 metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) }) 154 398 metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total}) 155 399 if ((comb_metadata_vals != None) and len(comb_metadata_vals)>0): 400 metadata_array.append({ "name": "CombVotesJSON", "content": json.dumps(comb_metadata_vals) }) 401 metadata_array.append({ "name": "CombVotesTotal", "content": to_country_comb_total}) 402 if ((sing_metadata_vals != None) and len(sing_metadata_vals)>0): 403 metadata_array.append({ "name": "SingVotesJSON", "content": json.dumps(sing_metadata_vals) }) 404 metadata_array.append({ "name": "SingVotesTotal", "content": to_country_sing_total}) 405 406 metadata_array.append({ "name": "VoteGrandTotal", "content": to_country_grand_total}) 407 metadata_array.append({ "name": "FinishingPos", "content": to_country_finishing_pos}) 408 156 409 filename_id = tl_to_country_id + "\\.nul" 157 410 … … 193 446 194 447 for to_country_year_votes in to_country_year_voting_groups: 195 196 fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping) 448 449 augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping) 450 fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes) 197 451 directory_metadata.append(fileset) 198 452 … … 201 455 202 456 print(" " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes") 457 458 459 greenstone_metadata_json = { "DirectoryMetadata": directory_metadata } 460 461 return greenstone_metadata_json 462 463 464 465 def augment_voting_in_year_then_country(to_country_year_votes,countrylabel_to_uri_mapping): 466 467 for to_year_votes in to_year_then_country_voting_groups: 468 469 for to_country_year_votes in to_year_votes: 470 471 augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping) 472 473 474 to_year_votes.sort(key=lambda v: v[0].get("VoteGrandTotal"), reverse=True) 475 to_year_num_votes = len(to_year_votes) 476 for i in range(to_year_num_votes): 477 finishing_pos = i+1 478 to_year_votes[i][0]["FinishingPos"] = finishing_pos 479 480 481 def gs_yc_directory_metadata(to_year_then_country_voting_groups): 482 # Next step is to express the grouped to-country voting data 483 # in the Greenstone JSON metadata format: 484 485 # { "DirectoryMetadata": 486 # [ 487 # { "FileSet": 488 # [ 489 # { "FileName": "France1991\.nul" }, 490 # { "Description": 491 # { 492 # "Metadata": 493 # [ 494 # { "name": "Germany-J", "content": "12" }, # J = Jury Vote 495 # ... 496 # ] 497 # } 498 # } 499 # ] 500 # } 501 # ... 502 # ] 503 # } 504 505 directory_metadata = [] 506 507 for to_year_votes in to_year_then_country_voting_groups: 508 509 for to_country_year_votes in to_year_votes: 510 511 ## augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping) 512 fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes) 513 directory_metadata.append(fileset) 514 515 filename_id = fileset.get('FileSet')[0].get('FileName') 516 num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata')) 517 518 print(" " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes") 203 519 204 520 … … 223 539 print(" <gsf:metadata name=\""+from_country+"-T\" />") 224 540 541 print() 542 for from_country in sorted(comb_from_countries.keys()): 543 print(" <gsf:metadata name=\""+from_country+"-C\" />") 544 545 print() 546 for from_country in sorted(sing_from_countries.keys()): 547 print(" <gsf:metadata name=\""+from_country+"-S\" />") 548 225 549 print(" </gsf:headMetaTags>") 226 550 … … 271 595 272 596 273 to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered) 597 # to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered) 598 to_year_then_country_voting_groups = create_to_year_then_country_voting_groups(data_hashmap_array_filtered) 274 599 275 600 # Debug output 276 601 # 277 # print(to_ country_year_voting_groups)602 # print(to_year_then_country_voting_groups) 278 603 279 604 print() 280 605 print("Generating Greenstone JSON to-country voting metadata received by:") 281 greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups,countrylabel_to_uri_mapping) 606 ##greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups,countrylabel_to_uri_mapping) 607 augment_voting_in_year_then_country(to_year_then_country_voting_groups,countrylabel_to_uri_mapping) 608 greenstone_metadata_json = gs_yc_directory_metadata(to_year_then_country_voting_groups) 282 609 283 610 print("Saving output as: " + json_output_filename)
Note:
See TracChangeset
for help on using the changeset viewer.