Ignore:
Timestamp:
2021-03-09T23:30:43+13:00 (3 years ago)
Author:
davidb
Message:

Changes associated with adding in Combined and Single Voting totals; adjustments to DL based on Terhi's feedback

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-tocountry-jsonmetadata.py

    r34957 r34960  
    1717jury_from_countries = {}
    1818tele_from_countries = {}
    19 
    20 
     19comb_from_countries = {}
     20sing_from_countries = {}
     21
     22# 1997:
     23# Televoting by
     24#  Austria, Germany, Sweeden, Switzerland, United Kingdom
     25
     26# 1998-2000: Televote, with Jury voting as backup plan
     27
     28# 2001-2002: All televote, or 50-50 mix if country so chooses, Jury backup
     29#    2001: Jury: Bosnia & Herzegovina, Turkey, Russia
     30#    2001: Mix:  Croatia, Greece, Malta
     31#
     32#    2002: Jury: Bosnia & Herzegovina, Turkey, Russia, FYR Macedonia, Romania
     33#    2002: Mix:  Cyprus, Greece, Spain, Croatia, Finland, Malta, Slovenia, Lithuania
     34
     35# 2003: back to all Televote, with Jury voting as backup
     36# 2004-2008: same as 2003
     37
     38# 2009-2012: Combined, except San Marino which was 100% Jury (due to size), Jury as backup
     39
     40# 2013-2015: As before, but how two types of votes combined different
     41
     42# 2016-      Jury and Tele given separately, added in
     43
     44def XXXXvoting_mode(year):
     45
     46    vote_mode = None
     47
     48    if (year>=2013):
     49        # Separate voting: Jury and Tele
     50        vote_mode = "Separated"
     51    elif ((year>=2001) and (year<=2012)):
     52        vote_mode = "Combined"
     53    else:
     54        vote_mode = "Single"
     55
     56    return vote_mode
     57
     58
     59def XXXXnuanced_voting_type(country,year):
     60
     61    vote_type = None
     62
     63    if (year>=2013):
     64        # Separate voting: Jury and Tele
     65        vote_type = "JT"
     66       
     67    elif ((year>=2009) and (year<=2012)):
     68        if (country=="San Marino"):
     69            vote_type = "J"
     70        else:
     71            # Combined Jury and Tele
     72            vote_type = "C"
     73    elif ((year>=2003) and (year<=2008)):
     74        vote_type = "T"
     75    elif (year==2002):
     76        jury_list = [ "Bosnia & Herzegovina", "Turkey", "Russia", "FYR Macedonia", "Romania"]
     77        comb_list = [ "Cyprus", "Greece", "Spain", "Croatia", "Finland", "Malta", "Slovenia", "Lithuania" ]
     78        if (country in jury_list):
     79            vote_type = "J"
     80        elif (country in comb_list):
     81            vote_type = "C"
     82        else:
     83            vote_type = "T"
     84    elif (year==2001):
     85        jury_list = [ "Bosnia & Herzegovina", "Turkey", "Russia" ]
     86        comb_list = [ "Croatia", "Greece", "Malta" ]
     87        if (country in jury_list):
     88            vote_type = "J"
     89        elif (country in comb_list):
     90            vote_type = "C"
     91    elif ((year>=1998) and (year<=2000)):
     92        vote_type = "T"
     93    elif (year==1997):
     94        tele_list = [ "Austria", "Germany", "Sweeden", "Switzerland", "United Kingdom" ]
     95        if (country in tele_list):
     96            vote_type = "T"
     97        else:
     98            vote_type = "J"
     99    else:
     100        vote_type = "J"
     101
     102    return vote_type
     103
     104
     105           
    21106def create_to_country_voting_groups(data_hashmap_array):
    22107
     
    68153
    69154
    70 def fileset_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping):
     155def create_to_year_then_country_voting_groups(data_hashmap_array):
     156
     157    # Example values for header-names
     158    #   (semi-) final:      f, sf
     159    #   Jury or Televoting: J, T
     160    #   Year:               1975, ...
     161    #   To country:         Belgium
     162    #   From country:       Belgium
     163    #   Edition:            1975f, 1975sf
     164
     165    # Build array of year groups
     166    #   A year group contains within it a country grouping that in turn
     167    #   includes all the votes that country receive that year
     168    year_groups = []
     169
     170    # Rely on sorting as done in first line of create_to_country_voting_groups,
     171    # which is the (innermost) place where the sorting is needed
     172    country_groups = create_to_country_voting_groups(data_hashmap_array)
     173   
     174    prev_to_country_votes = country_groups[0]
     175    year_group = [ ]
     176
     177    i = 1
     178    num_rows = len(country_groups)
     179
     180    while (i < num_rows):       
     181        year_group.append(prev_to_country_votes)
     182        to_country_votes = country_groups[i]
     183
     184        if (to_country_votes[0].get('Year') != prev_to_country_votes[0].get('Year')):
     185            # moving on to a new year group
     186##            print("**** year change from:" + str(to_country_votes[0].get('Year')) + " to " + str(prev_to_country_votes[0].get('Year')))
     187            year_groups.append(year_group)
     188            year_group = [ ]
     189           
     190        prev_to_country_votes = to_country_votes
     191        i = i + 1
     192       
     193    year_group.append(prev_to_country_votes)       
     194    year_groups.append(year_group)
     195
     196    print("****")
     197    print("Number of years voting is available for: " + str(len(year_groups)));
     198    print("****")
     199   
     200    return year_groups
     201
     202
     203
     204def augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping):
     205
     206    jury_metadata_vals = []
     207    tele_metadata_vals = []
     208    comb_metadata_vals = []
     209    sing_metadata_vals = []
     210
     211    to_country_jury_total = 0
     212    to_country_tele_total = 0
     213    to_country_comb_total = 0
     214    to_country_sing_total = 0
     215
     216    to_country_grand_total = 0
     217   
     218    # Use the first record to be a representative for 'top level' (tl)
     219    # metadata about the votes cast 'To country'
     220    tl_rec = to_country_year_votes[0]
     221
     222    tl_to_country = tl_rec.get('To country')
     223    tl_year       = tl_rec.get('Year')
     224    tl_final      = tl_rec.get("(semi-) final") == "f"   
     225
     226    tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year)
     227
     228    tl_rec["Final"] = tl_final
     229
     230    # tl_to_country_lc = tl_to_country.lower()
     231    tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country)
     232    if (tl_to_country_uri != None):
     233        tl_rec["CountryDBURI"] = tl_to_country_uri
     234    else:
     235        util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country)
     236               
     237    for to_country_year_vote in to_country_year_votes:
     238#        to_country   = to_country_year_vote.get('To country')
     239        year         = to_country_year_vote.get('Year')
     240        from_country = to_country_year_vote.get('From country')
     241        vote_type    = to_country_year_vote.get('Jury or Televoting')
     242        points       = to_country_year_vote.get('Points')
     243
     244        id_from_country = re.sub(r'\s+', '', from_country)
     245           
     246#        tl_rec[id_from_country+"-"+vote_type] = points
     247
     248        vote_mode = xlsxutil.voting_mode(year)
     249        nuanced_vote_type = xlsxutil.nuanced_voting_type(from_country,year)
     250
     251        if (vote_mode == "Separated"):
     252           
     253            if (vote_type == "J"):
     254                jury_metadata_vals.append(id_from_country+"-J")
     255                jury_from_countries[id_from_country] = 1
     256                to_country_jury_total = to_country_jury_total + points
     257           
     258            elif (vote_type == "T"):
     259                tele_metadata_vals.append(id_from_country+"-T")
     260                tele_from_countries[id_from_country] = 1
     261                to_country_tele_total = to_country_tele_total + points
     262            else:           
     263                util.eprint("Warning: Unrecognized voting type: " + vote_type)
     264
     265        elif (vote_mode == "Combined"):
     266            comb_metadata_vals.append(id_from_country+"-C")
     267            comb_from_countries[id_from_country] = 1
     268            to_country_comb_total = to_country_comb_total + points
     269        else:
     270            # Single
     271            sing_metadata_vals.append(id_from_country+"-S")
     272            sing_from_countries[id_from_country] = 1
     273            to_country_sing_total = to_country_sing_total + points
     274
     275        to_country_grand_total = to_country_grand_total + points
     276           
     277    tl_rec["VoteMode"] = vote_mode
     278       
     279    if (len(jury_metadata_vals)>0):
     280        tl_rec["JuryVotesJSON"] = jury_metadata_vals
     281        tl_rec["JuryVotesTotal"] = to_country_jury_total
     282    if (len(tele_metadata_vals)>0):
     283        tl_rec["TeleVotesJSON"] = tele_metadata_vals
     284        tl_rec["TeleVotesTotal"] = to_country_tele_total
     285    if (len(comb_metadata_vals)>0):
     286        tl_rec["CombVotesJSON"] = comb_metadata_vals
     287        tl_rec["CombVotesTotal"] = to_country_comb_total
     288    if (len(sing_metadata_vals)>0):
     289        tl_rec["SingVotesJSON"] = sing_metadata_vals
     290        tl_rec["SingVotesTotal"] = to_country_sing_total
     291
     292    tl_rec["VoteGrandTotal"] = to_country_grand_total
     293
     294def fileset_voting_for_esc_country_in_year(to_country_year_votes):
    71295
    72296    # Looking to build data-structure (for output as JSON) in the form
     
    89313    metadata_array = []
    90314
    91     jury_metadata_vals = []
    92     tele_metadata_vals = []
    93 
    94     to_country_jury_total = 0
    95     to_country_tele_total = 0
     315#    jury_metadata_vals = []
     316#    tele_metadata_vals = []
     317
     318#    to_country_jury_total = 0
     319#    to_country_tele_total = 0
    96320
    97321    # Use the first record to be a representative for 'top level' (tl)
     
    101325    tl_to_country = tl_rec.get('To country')
    102326    tl_year       = tl_rec.get('Year')
    103     tl_final      = tl_rec.get("(semi-) final") == "f"   
     327    # tl_final      = tl_rec.get("(semi-) final") == "f"   
    104328
    105329    tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year)
    106330
     331    tl_final = tl_rec["Final"]
     332    tl_to_country_uri = tl_rec.get("CountryDBURI")
     333
     334    vote_mode = tl_rec.get("VoteMode")
     335   
     336    jury_metadata_vals = tl_rec.get("JuryVotesJSON")
     337    tele_metadata_vals = tl_rec.get("TeleVotesJSON")
     338    comb_metadata_vals = tl_rec.get("CombVotesJSON")
     339    sing_metadata_vals = tl_rec.get("SingVotesJSON")
     340
     341    to_country_jury_total = tl_rec.get("JuryVotesTotal")
     342    to_country_tele_total = tl_rec.get("TeleVotesTotal")
     343    to_country_comb_total = tl_rec.get("CombVotesTotal")
     344    to_country_sing_total = tl_rec.get("SingVotesTotal")
     345
     346    to_country_grand_total = tl_rec.get("VoteGrandTotal")
     347    to_country_finishing_pos = tl_rec.get("FinishingPos")
     348   
    107349    # Country and Year are set as metadata elsewhere so don't need
    108350    # to set them here -- however, do want DBpedia *Country* *URI*
     
    115357    metadata_array.append({ "name": "Final",  "content": tl_final })
    116358
    117     # tl_to_country_lc = tl_to_country.lower()
    118     tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country)
     359    ## tl_to_country_lc = tl_to_country.lower()
     360    #tl_to_country_uri = countrylabel_to_uri_mapping.get(tl_to_country)
    119361    if (tl_to_country_uri != None):
    120362        metadata_array.append({ "name": "CountryDBURI","content": tl_to_country_uri })
    121     else:
    122         util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country)
    123                
     363#    else:
     364#        util.eprint("Warning: Failed to find DBpedia Country URI match for: " + tl_to_country)
     365
    124366    for to_country_year_vote in to_country_year_votes:
    125367        to_country   = to_country_year_vote.get('To country')
     
    135377        metadata_array.append(voting_rec)
    136378
    137         if (vote_type == "J"):
    138             jury_metadata_vals.append(id_from_country+"-J")
    139             jury_from_countries[id_from_country] = 1
    140             to_country_jury_total = to_country_jury_total + points
    141            
    142         elif (vote_type == "T"):
    143             tele_metadata_vals.append(id_from_country+"-T")
    144             tele_from_countries[id_from_country] = 1
    145             to_country_tele_total = to_country_tele_total + points
    146         else:           
    147             util.eprint("Warning: Unrecognized voting type: " + vote_type)
    148 
    149     if (len(jury_metadata_vals)>0):
     379#        if (vote_type == "J"):
     380#            jury_metadata_vals.append(id_from_country+"-J")
     381#            jury_from_countries[id_from_country] = 1
     382#            to_country_jury_total = to_country_jury_total + points
     383#           
     384#        elif (vote_type == "T"):
     385#            tele_metadata_vals.append(id_from_country+"-T")
     386#            tele_from_countries[id_from_country] = 1
     387#            to_country_tele_total = to_country_tele_total + points
     388#        else:           
     389#            util.eprint("Warning: Unrecognized voting type: " + vote_type)
     390
     391    metadata_array.append({ "name": "VoteMode", "content": vote_mode })
     392 
     393    if ((jury_metadata_vals != None) and len(jury_metadata_vals)>0):
    150394        metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) })
    151395        metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total})
    152     if (len(tele_metadata_vals)>0):
     396    if ((tele_metadata_vals != None) and len(tele_metadata_vals)>0):
    153397        metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) })
    154398        metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total})
    155                          
     399    if ((comb_metadata_vals != None) and len(comb_metadata_vals)>0):
     400        metadata_array.append({ "name": "CombVotesJSON", "content": json.dumps(comb_metadata_vals) })
     401        metadata_array.append({ "name": "CombVotesTotal", "content": to_country_comb_total})
     402    if ((sing_metadata_vals != None) and len(sing_metadata_vals)>0):
     403        metadata_array.append({ "name": "SingVotesJSON", "content": json.dumps(sing_metadata_vals) })
     404        metadata_array.append({ "name": "SingVotesTotal", "content": to_country_sing_total})
     405
     406    metadata_array.append({ "name": "VoteGrandTotal", "content": to_country_grand_total})
     407    metadata_array.append({ "name": "FinishingPos", "content": to_country_finishing_pos})
     408   
    156409    filename_id = tl_to_country_id + "\\.nul"
    157410
     
    193446       
    194447    for to_country_year_votes in to_country_year_voting_groups:
    195        
    196         fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping)
     448
     449        augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping)
     450        fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes)
    197451        directory_metadata.append(fileset)
    198452
     
    201455       
    202456        print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
     457
     458
     459    greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
     460
     461    return greenstone_metadata_json
     462
     463
     464
     465def augment_voting_in_year_then_country(to_country_year_votes,countrylabel_to_uri_mapping):
     466
     467    for to_year_votes in to_year_then_country_voting_groups:
     468
     469        for to_country_year_votes in to_year_votes:
     470
     471            augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping)
     472
     473
     474        to_year_votes.sort(key=lambda v: v[0].get("VoteGrandTotal"), reverse=True)
     475        to_year_num_votes = len(to_year_votes)
     476        for i in range(to_year_num_votes):
     477            finishing_pos = i+1
     478            to_year_votes[i][0]["FinishingPos"] = finishing_pos
     479
     480           
     481def gs_yc_directory_metadata(to_year_then_country_voting_groups):
     482    # Next step is to express the grouped to-country voting data
     483    # in the Greenstone JSON metadata format:
     484   
     485    # { "DirectoryMetadata":
     486    #   [
     487    #     { "FileSet":
     488    #       [
     489    #         { "FileName": "France1991\.nul" },
     490    #         { "Description":
     491    #           {
     492    #             "Metadata":
     493    #              [
     494    #                { "name": "Germany-J", "content": "12" }, # J = Jury Vote
     495    #                  ...
     496    #              ]
     497    #           }
     498    #         }
     499    #       ]
     500    #     }
     501    #     ...
     502    #    ]
     503    #  }
     504
     505    directory_metadata = []
     506
     507    for to_year_votes in to_year_then_country_voting_groups:
     508       
     509        for to_country_year_votes in to_year_votes:
     510
     511##            augment_voting_for_esc_country_in_year(to_country_year_votes,countrylabel_to_uri_mapping)
     512            fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes)
     513            directory_metadata.append(fileset)
     514
     515            filename_id = fileset.get('FileSet')[0].get('FileName')
     516            num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata'))
     517           
     518            print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
    203519
    204520
     
    223539        print("    <gsf:metadata name=\""+from_country+"-T\" />")
    224540
     541    print()
     542    for from_country in sorted(comb_from_countries.keys()):
     543        print("    <gsf:metadata name=\""+from_country+"-C\" />")
     544
     545    print()
     546    for from_country in sorted(sing_from_countries.keys()):
     547        print("    <gsf:metadata name=\""+from_country+"-S\" />")
     548       
    225549    print("  </gsf:headMetaTags>")
    226550
     
    271595
    272596
    273     to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered)
     597    # to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered)
     598    to_year_then_country_voting_groups = create_to_year_then_country_voting_groups(data_hashmap_array_filtered)
    274599
    275600    # Debug output
    276601    #
    277     # print(to_country_year_voting_groups)
     602    # print(to_year_then_country_voting_groups)
    278603   
    279604    print()
    280605    print("Generating Greenstone JSON to-country voting metadata received by:")
    281     greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups,countrylabel_to_uri_mapping)
     606    ##greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups,countrylabel_to_uri_mapping)
     607    augment_voting_in_year_then_country(to_year_then_country_voting_groups,countrylabel_to_uri_mapping)
     608    greenstone_metadata_json = gs_yc_directory_metadata(to_year_then_country_voting_groups)
    282609   
    283610    print("Saving output as: " + json_output_filename)   
Note: See TracChangeset for help on using the changeset viewer.