Changeset 34910


Ignore:
Timestamp:
2021-02-22T23:38:30+13:00 (3 years ago)
Author:
davidb
Message:

Completion of initial work on supporting from-country voting to produce docs in the DL; written, then tested with building collection. This is the result after debugging

Location:
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-fromcountry-jsonmetadata.py

    r34908 r34910  
    77import sys
    88import json
    9 #import csv
    109
    1110import argparse
     
    1716#jury_from_countries = {}
    1817#tele_from_countries = {}
    19 
    2018   
    2119
     
    3129
    3230   
    33     # Filter down to just the voting results concerning finals
    34     data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
    35 
    3631    # Sort so array entries are grouped by the country receiving the votes in a given year
    37     data_hashmap_array_finals.sort(key=xlsxutil.sortkey_year_from_country)
     32    data_hashmap_array.sort(key=xlsxutil.sortkey_year_from_country)
    3833
    3934
    4035    # Debug output
    4136    #
    42     # for data_hashmap in data_hashmap_array_finals:
     37    # for data_hashmap in data_hashmap_array:
    4338    #    print(data_hashmap.get('From country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points to " + data_hashmap.get('To country') + ")")
    4439
     
    4843    country_groups = []
    4944
    50     prev_data_hashmap = data_hashmap_array_finals[0]
     45    prev_data_hashmap = data_hashmap_array[0]
    5146    country_group = [ ]
    5247
    5348    i = 1
    54     num_finals = len(data_hashmap_array_finals)
    55 
    56     while (i < num_finals):       
     49    num_rows = len(data_hashmap_array)
     50
     51    while (i < num_rows):       
    5752        country_group.append(prev_data_hashmap)
    58         data_hashmap = data_hashmap_array_finals[i]
     53        data_hashmap = data_hashmap_array[i]
    5954
    6055        if (data_hashmap.get('From country') != prev_data_hashmap.get('From country')):
     
    6964    country_groups.append(country_group)
    7065
    71     # Debug output
     66    return country_groups
     67
     68
     69def debug_print_country_groups(country_groups):
    7270   
    7371    for from_country_voting_array in country_groups:
     
    7876
    7977        for to_country_entry in from_country_voting_array:
    80             print("  ", to_country_entry.get('Year'), to_country_entry.get('Points'),
    81                   " ("+to_country_entry.get('Jury or Televoting')+") ",
    82                   " Points to " + to_country_entry.get('To country'))
    83 
    84 
    85     return country_groups
    86 
    87 
    88 def fileset_voting_for_esc_country_in_year(to_country_year_votes):
     78            print("  "+ str(to_country_entry.get('Year')) + str(to_country_entry.get('Points')).rjust(3)
     79                  +" ("+to_country_entry.get('Jury or Televoting')+") "
     80                  +" Points to " + to_country_entry.get('To country'))
     81
     82   
     83def gs_fileset_voting_by_esc_country_in_year(from_country_year_votes, nul_output_dir_name):
    8984
    9085    # Looking to build data-structure (for output as JSON) in the form
    9186    # { "FileSet":
    9287    #   [
    93     #     { "FileName": "France1991\.nul" },
     88    #     { "FileName": "VotesFrom-France1991-J\.nul" }, // The country that cast the votes (J=Jury, T=Tele)
    9489    #     { "Description":
    9590    #       {
    9691    #         "Metadata":
    9792    #           [
    98     #             { "name": "Germany-J", "content": 12 }, # J = Jury Vote
    99     #             { "name": "Germany-T", "content": 6 },  # T = Televote (if present)
     93    #             { "name": "Germany", "content": 12 },
     94    #             { "name": "Denmake", "content": 6 }, 
    10095    #             ...
    10196    #           ]
     
    106101
    107102
    108 
    109     # Scan all voting to build up complete list of all countries that
    110     # either cast of received votes
    111 
    112     all_from_countries_year = []
    113     all_to_countries_tj     = []
    114    
    115     for from_country_voting_array in country_groups:
    116         from_country = from_country_voting_array[0].get('From country')
    117         year = from_country_voting_array[0].get('Year')
    118 
    119         ### Remove spaces!!!
    120         from_country_year = from_country + "-" + year
    121 
    122         all_from_countries_year[from_country_year] = 1
    123        
    124         for to_country_entry in from_country_voting_array:
    125             to_country = to_country_entry.get('To country')
    126             vote_type = to_country_entry.get('Jury or Televoting')
    127 
    128             to_country_vote_type = to_country+"-"+vote_type
    129            
    130             all_to_countries[to_country_vote_type] = 1
    131 
    132 
    133     from_country_to_hashmap = []
    134    
    135    
    136     for to_country_entry in from_country_voting_array:
    137         print("  ", to_country_entry.get('Year'), to_country_entry.get('Points'),
    138                   " ("+to_country_entry.get('Jury or Televoting')+") ",
    139                   " Points to " + to_country_entry.get('To country'))
    140            
    141 
    142     csv_header_array = sorted(all_to_countries_tj.keys())
    143 
    144            
    145     csv_ofile = open(csv_filename, 'wb')
    146 
    147     csv_wr  = csv.writer(csv_ofile, quoting=csv.QUOTE_ALL)
    148 
    149            
    150 #            csv_wr.writerow(filtered_utf8_row)
    151 
    152    
    153 
    154     csv_ofile.close()
    155    
    156     fileset_array = []
    157    
     103    # Debug output
     104   
     105#    for to_country_entry in from_country_year_votes:
     106#       print("  ", to_country_entry.get('Year'), to_country_entry.get('Points'),
     107#                 " ("+to_country_entry.get('Jury or Televoting')+") ",
     108#                 " Points to " + to_country_entry.get('To country'))
     109           
    158110    metadata_array = []
    159 
    160     jury_metadata_vals = []
    161     tele_metadata_vals = []
    162 
    163     to_country_jury_total = 0
    164     to_country_tele_total = 0
    165    
    166     for to_country_year_vote in to_country_year_votes:
    167         to_country   = to_country_year_vote.get('To country')
    168         year         = to_country_year_vote.get('Year')
    169         from_country = to_country_year_vote.get('From country')
    170         vote_type    = to_country_year_vote.get('Jury or Televoting')
    171         points       = to_country_year_vote.get('Points')
    172 
    173         id_from_country = re.sub(r'\s+', '', from_country)
    174            
    175         voting_rec = { "name": id_from_country+"-"+vote_type, "content": points }
     111    metadata_country_vals = []
     112    metadata_points_vals  = []
     113
     114    from_country_total = 0
     115
     116    # Use the first record to be a representative for 'top level' (tl)
     117    # metadata about the voting 'From country'
     118    tl_rec = from_country_year_votes[0]
     119    tl_from_country = tl_rec.get('From country')
     120    tl_year         = tl_rec.get('Year')
     121    tl_vote_type    = tl_rec.get('Jury or Televoting')
     122    tl_from_country_id = "FromCountry-" + re.sub(r'\s+', '', tl_from_country) + str(tl_year) + "-" + tl_vote_type
     123
     124    metadata_array.append({"name": "Identifier",      "content" : tl_from_country_id})
     125    metadata_array.append({"name": "FromCountry",     "content" : tl_from_country})
     126    metadata_array.append({"name": "FromCountryYear", "content" : tl_year})
     127    metadata_array.append({"name": "FromCountryType", "content" : tl_vote_type})
     128   
     129    for from_country_year_vote in from_country_year_votes:
     130        to_country   = from_country_year_vote.get('To country')
     131        year         = from_country_year_vote.get('Year')
     132        from_country = from_country_year_vote.get('From country')
     133        vote_type    = from_country_year_vote.get('Jury or Televoting')
     134        points       = from_country_year_vote.get('Points')
     135
     136        to_country_year_id = re.sub(r'\s+', '', to_country) + str(year)
     137           
     138        voting_rec = { "name": to_country_year_id, "content": points }
    176139
    177140        metadata_array.append(voting_rec)
    178 
    179         if (vote_type == "J"):
    180             jury_metadata_vals.append(id_from_country+"-J")
    181             jury_from_countries[id_from_country] = 1
    182             to_country_jury_total = to_country_jury_total + points
    183            
    184         elif (vote_type == "T"):
    185             tele_metadata_vals.append(id_from_country+"-T")
    186             tele_from_countries[id_from_country] = 1
    187             to_country_tele_total = to_country_tele_total + points
    188         else:           
    189             util.eprint("Warning: Unrecognized voting type: " + vote_type)
    190 
    191     if (len(jury_metadata_vals)>0):
    192         metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) })
    193         metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total})
    194     if (len(tele_metadata_vals)>0):
    195         metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) })
    196         metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total})
    197                          
    198     id_to_country = to_country_year_votes[0].get('To country')
    199     id_to_country = re.sub(r'\s+', '', id_to_country)   
    200     id_year = to_country_year_votes[0].get('Year');
    201     id = id_to_country + str(id_year);
    202 <    filename_id = id + "\\.nul"
    203 
     141        metadata_country_vals.append(to_country)
     142        metadata_points_vals.append(points)
     143       
     144        from_country_total = from_country_total + points
     145           
     146       
     147    if (len(metadata_country_vals)>0):
     148        metadata_array.append({ "name": "VotesCountryJSON-"+tl_vote_type,  "content": json.dumps(metadata_country_vals) })
     149        metadata_array.append({ "name": "VotesPointsJSON-"+tl_vote_type,  "content": json.dumps(metadata_points_vals) })
     150        # metadata_array.append({ "name": "VotesTotal-"+tl_vote_type, "content": from_country_total})
     151
     152
     153    # id encodes from-country, year, and vote_type
     154    filename_id = tl_from_country_id + "\\.nul"
     155
     156    nul_filename = os.path.join(nul_output_dir_name,tl_from_country_id+".nul");
     157
     158    print("Creating: " + nul_filename)
     159    with open(nul_filename, 'w') as outfile:
     160        outfile.write("")
     161   
    204162    fileset = {
    205163        "FileSet" : [           
     
    211169    return fileset
    212170
    213    
     171
     172def gs_directory_metadata(from_country_year_voting_groups):
     173
     174    # Express the grouped from-country voting data
     175    # in the Greenstone JSON metadata format:
     176   
     177    # { "DirectoryMetadata":
     178    #   [
     179    #     { "FileSet":
     180    #       [
     181    #         { "FileName": "FromCountry-France1991-J\.nul" },
     182    #         { "Description":
     183    #           {
     184    #             "Metadata":
     185    #              [
     186    #                { "name": "Germany", "content": "12" },
     187    #                  ...
     188    #              ]
     189    #           }
     190    #         }
     191    #       ]
     192    #     }
     193    #     ...
     194    #    ]
     195    #  }
     196
     197    nul_output_dir_name = os.path.dirname(json_output_filename)
     198   
     199    directory_metadata = []
     200    for from_country_year_votes in from_country_year_voting_groups:
     201
     202        fileset = gs_fileset_voting_by_esc_country_in_year(from_country_year_votes, nul_output_dir_name)
     203        directory_metadata.append(fileset)
     204
     205        filename_id = fileset.get('FileSet')[0].get('FileName')
     206        num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata'))
     207       
     208        print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
     209       
     210
     211    greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
     212
     213    return greenstone_metadata_json
     214
     215
     216       
    214217if __name__ == "__main__":
    215218
    216219    parser = argparse.ArgumentParser()
    217220    parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from")
    218     parser.add_argument('--voting-type', choices=['J','T'], help="Filter to only J=Jury or T=Tele cast votes")
     221    parser.add_argument('--votingtype', choices=['J','T'], help="Filter to only J=Jury or T=Tele cast votes")
    219222    parser.add_argument('input-file.xlsx')
    220     parser.add_argument('output-file.csv', nargs='?')
     223    parser.add_argument('output-file.json', nargs='?')
    221224   
    222225    args = parser.parse_args()
    223226
    224227    sheetname = getattr(args,'sheetname');
    225     if (csv_output_filename == None):
    226         csv_output_filename = os.path.splitext(excel_input_filename)[0]+'.csv'
    227 
    228     voting_type = getattr(args,'voting-type');
     228    voting_type = getattr(args,'votingtype');
    229229   
    230230    excel_input_filename = getattr(args,'input-file.xlsx');
    231     csv_output_filename = getattr(args,'output-file.csv');
     231    json_output_filename = getattr(args,'output-file.json');
     232
     233    if (json_output_filename == None):
     234        json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
    232235   
    233236       
     
    236239    data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet)
    237240   
    238     print("Number of data rows in Excel file: " + str(len(data_hashmap_array)))
     241    print("Number of data rows in Excel file:   " + str(len(data_hashmap_array)))
    239242
    240243    # Filter down to just the voting results concerning finals
    241     data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
    242 
    243     print("Number of finalist voting data rows: " + str(len(data_hashmap_array_finals)))
     244    data_hashmap_array_filtered = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
     245
     246    print("Number of finalist voting data rows: " + str(len(data_hashmap_array_filtered)))
    244247
    245248    if voting_type != None:
    246249        # Further filter down by the type of voting results cast
    247250        if (voting_type == "J"):
    248             data_hashmap_array_finals = list(filter(xlsxutil.filter_jury_votes, data_hashmap_array_finals))
    249             print("Number Jury cast data rows: " + str(len(data_hashmap_array_finals)))
     251            data_hashmap_array_filtered = list(filter(xlsxutil.filter_jury_votes, data_hashmap_array_filtered))
     252            print("Number Jury cast data rows:          " + str(len(data_hashmap_array_filtered)))
    250253        else:
    251254            # Must be "T"
    252             data_hashmap_array_finals = list(filter(xlsxutil.filter_tele_votes, data_hashmap_array_finals))
    253             print("Number Jury cast data rows: " + str(len(data_hashmap_array_finals)))
    254 
    255 
    256 
    257     from_country_year_voting_groups = create_from_country_voting_groups(data_hashmap_array_finals)
     255            data_hashmap_array_filtered = list(filter(xlsxutil.filter_tele_votes, data_hashmap_array_filtered))
     256            print("Number Televoting cast data rows:    " + str(len(data_hashmap_array_filtered)))
     257
     258    print()
     259
     260    from_country_year_voting_groups = create_from_country_voting_groups(data_hashmap_array_filtered)
     261    debug_print_country_groups(from_country_year_voting_groups)
    258262
    259263    # Debug output
    260264    #
    261     #print(from_country_year_voting_groups)
    262    
    263     # Next step is to express the grouped from-country voting data
    264     # as a CSV file in the form used by Greenstone's CSVPlugin
    265    
    266    
    267 #    directory_metadata = []
    268        
    269 #    print("Creating Greenstone JSON voting metadata for:")
    270 #    for from_country_year_votes in from_country_year_voting_groups:
    271 #       
    272 #        fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes)
    273 #        directory_metadata.append(fileset)
    274 #
    275 #        filename_id = fileset.get('FileSet')[0].get('FileName')
    276 #        num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata'))
    277 #       
    278 #        print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
    279 
    280 
    281 #    greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
    282 #
    283 #    with open(json_output_filename, 'w') as outfile:
    284 #        json.dump(greenstone_metadata_json, outfile, indent=2)
     265    # print(from_country_year_voting_groups)
     266
     267    print()
     268    print("Generating Greenstone JSON from-country voting metadata for:")
     269    greenstone_metadata_json = gs_directory_metadata(from_country_year_voting_groups)
     270   
     271    print("Saving output as: " + json_output_filename)   
     272    xlsxutil.save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename)
     273
     274    print()
     275       
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-tocountry-jsonmetadata.py

    r34907 r34910  
    1717tele_from_countries = {}
    1818
    19 #def fileset_voting_for_esc_country_in_year(data_hashmap):
    20 #
    21 #    # Only generating a single fileset record here, however
    22 #    # the Greenstone format allows for this to be an array
    23 #    # of fileset entries => return [ fileset ]
    24 #   
    25 #    fileset = {}
    26 #   
    27 #    return [ fileset ]
    28 
    29 
    30 #def filter_finalist_votes(entry):
    31 #    return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
    32 #
    33 #def sortkey_year_to_country(entry):
    34 #    return str(entry.get('Year'))+entry.get('To country')
    35 
    36 
    37 def create_to_country_voting_groups(data_hashmap_array_finals):
     19
     20def create_to_country_voting_groups(data_hashmap_array):
    3821
    3922    # Example values for header-names
     
    4629
    4730   
    48 #    # Filter down to just the voting results concerning finals
    49 #    data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
    50 
    5131    # Sort so array entries are grouped by the country receiving the votes in a given year
    52     data_hashmap_array_finals.sort(key=xlsxutil.sortkey_year_to_country)
     32    data_hashmap_array.sort(key=xlsxutil.sortkey_year_to_country)
    5333
    5434
    5535    # Debug output
    5636    #
    57     # for data_hashmap in data_hashmap_array_finals:
     37    # for data_hashmap in data_hashmap_array:
    5838    #    print(data_hashmap.get('To country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points from " + data_hashmap.get('From country') + ")")
    5939
     
    6343    country_groups = []
    6444
    65     prev_data_hashmap = data_hashmap_array_finals[0]
     45    prev_data_hashmap = data_hashmap_array[0]
    6646    country_group = [ ]
    6747
    6848    i = 1
    69     num_finals = len(data_hashmap_array_finals)
    70 
    71     while (i < num_finals):       
     49    num_rows = len(data_hashmap_array)
     50
     51    while (i < num_rows):       
    7252        country_group.append(prev_data_hashmap)
    73         data_hashmap = data_hashmap_array_finals[i]
     53        data_hashmap = data_hashmap_array[i]
    7454
    7555        if (data_hashmap.get('To country') != prev_data_hashmap.get('To country')):
     
    10686    # }
    10787
    108     fileset_array = []
    109    
    11088    metadata_array = []
    11189
     
    11593    to_country_jury_total = 0
    11694    to_country_tele_total = 0
     95
     96    # Use the first record to be a representative for 'top level' (tl)
     97    # metadata about the votes cast 'To country'
     98    tl_rec = to_country_year_votes[0]
     99    tl_to_country = tl_rec.get('To country')
     100    tl_year       = tl_rec.get('Year')
     101    # tl_vote_type  = tl_rec.get('Jury or Televoting')
     102    tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year)
    117103   
    118104    for to_country_year_vote in to_country_year_votes:
     
    148134        metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total})
    149135                         
    150     id_to_country = to_country_year_votes[0].get('To country')
    151     id_to_country = re.sub(r'\s+', '', id_to_country)   
    152     id_year = to_country_year_votes[0].get('Year');
    153     id = id_to_country + str(id_year);
    154     filename_id = id + "\\.nul"
     136    # id_to_country = to_country_year_votes[0].get('To country')
     137    # id_to_country = re.sub(r'\s+', '', id_to_country)   
     138    #id_year = to_country_year_votes[0].get('Year');
     139    #id = id_to_country + str(id_year);
     140
     141    filename_id = tl_to_country_id + "\\.nul"
    155142
    156143    fileset = {
     
    163150    return fileset
    164151
    165    
    166 if __name__ == "__main__":
    167 
    168     parser = argparse.ArgumentParser()
    169     parser.add_argument('input-file.xlsx')
    170     parser.add_argument('output-file.json', nargs='?')
    171     parser.add_argument('--sheetname')
    172    
    173     args = parser.parse_args()
    174 
    175     excel_input_filename = getattr(args,'input-file.xlsx');
    176     json_output_filename = getattr(args,'output-file.json');
    177     sheetname = getattr(args,'sheetname');
    178    
    179     if (json_output_filename == None):
    180         json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
    181        
    182     worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname)
    183 
    184     data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet)
    185 
    186     print("Number of data rows: " + str(len(data_hashmap_array)))
    187 
    188 
    189     # Filter down to just the voting results concerning finals
    190     data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
    191 
    192     print("Number of finalist voting data rows: " + str(len(data_hashmap_array_finals)))
    193    
    194     to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_finals)
    195 
    196     # Debug output
    197     #
    198     # print(to_country_year_voting_groups)
    199    
     152
     153def gs_directory_metadata(to_country_year_voting_groups):
    200154    # Next step is to express the grouped to-country voting data
    201155    # in the Greenstone JSON metadata format:
     
    223177    directory_metadata = []
    224178       
    225     print("Creating Greenstone JSON voting metadata for:")
    226179    for to_country_year_votes in to_country_year_voting_groups:
    227180       
     
    234187        print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
    235188
    236  
    237     print("")
    238     print("")
     189
     190    greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
     191
     192    return greenstone_metadata_json
     193
     194
     195def display_gs_head_metadata_tags():
     196
     197    print()
     198    print()
    239199    print("For, e.g., '<display><format>' section of collectionConfig.xml:")
    240     print("")
     200    print()
    241201    print("  <gsf:headMetaTags>")
    242202
     
    244204        print("    <gsf:metadata name=\""+from_country+"-J\" />")
    245205
    246     print("")
     206    print()
    247207    for from_country in sorted(tele_from_countries.keys()):
    248208        print("    <gsf:metadata name=\""+from_country+"-T\" />")
    249209
    250210    print("  </gsf:headMetaTags>")
    251     print("")
    252 
    253     greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
    254 
    255     with open(json_output_filename, 'w') as outfile:
    256         json.dump(greenstone_metadata_json, outfile, indent=2)
     211
     212   
     213if __name__ == "__main__":
     214
     215    parser = argparse.ArgumentParser()
     216    parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from")
     217    parser.add_argument('--votingtype', choices=['J','T'], help="Filter to only J=Jury or T=Tele cast votes")       
     218    parser.add_argument('input-file.xlsx')
     219    parser.add_argument('output-file.json', nargs='?')
     220
     221   
     222    args = parser.parse_args()
     223
     224    sheetname = getattr(args,'sheetname');
     225    voting_type = getattr(args,'votingtype');
     226
     227    excel_input_filename = getattr(args,'input-file.xlsx');
     228    json_output_filename = getattr(args,'output-file.json');
     229   
     230    if (json_output_filename == None):
     231        json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json'
     232       
     233    worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname)
     234
     235    data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet)
     236
     237    print("Number of data rows in Excel file:   " + str(len(data_hashmap_array)))
     238
     239    # Filter down to just the voting results concerning finals
     240    data_hashmap_array_filtered = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
     241
     242   
     243    print("Number of finalist voting data rows: " + str(len(data_hashmap_array_filtered)))
     244
     245    if voting_type != None:
     246        # Further filter down by the type of voting results cast
     247        if (voting_type == "J"):
     248            data_hashmap_array_filtered = list(filter(xlsxutil.filter_jury_votes, data_hashmap_array_filtered))
     249            print("Number Jury cast data rows:          " + str(len(data_hashmap_array_filtered)))
     250        else:
     251            # Must be "T"
     252            data_hashmap_array_filtered = list(filter(xlsxutil.filter_tele_votes, data_hashmap_array_filtered))
     253            print("Number Televoting cast data rows:    " + str(len(data_hashmap_array_filtered)))
     254
     255
     256    to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered)
     257
     258    # Debug output
     259    #
     260    # print(to_country_year_voting_groups)
     261   
     262    print()
     263    print("Generating Greenstone JSON to-country voting metadata received by:")
     264    greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups)
     265   
     266    print("Saving output as: " + json_output_filename)   
     267    xlsxutil.save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename)
     268
     269    display_gs_head_metadata_tags()
     270   
     271    print()   
     272
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsxutil.py

    r34892 r34910  
    22import os
    33import sys
     4import json
    45
    56import openpyxl
     
    3637
    3738def filter_finalist_votes(entry):
    38     return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
     39    return entry.get("(semi-) final") == "f" and entry.get('Duplicate') == None
     40
     41def filter_jury_votes(entry):
     42    return entry.get("Jury or Televoting") == "J" and entry.get('Duplicate') == None
     43
     44def filter_tele_votes(entry):
     45    return entry.get("Jury or Televoting") == "T" and entry.get('Duplicate') == None
     46
    3947
    4048def sortkey_year_to_country(entry):
     
    141149    print ""
    142150    return data_hashmap_array
     151
     152def save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename):
     153
     154    with open(json_output_filename, 'w') as outfile:
     155        json.dump(greenstone_metadata_json, outfile, indent=2)
Note: See TracChangeset for help on using the changeset viewer.