Changeset 34908


Ignore:
Timestamp:
2021-02-21T15:06:11+13:00 (3 years ago)
Author:
davidb
Message:

Name change to better reflect what the script does

File:
1 moved

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-fromcountry-jsonmetadata.py

    r34907 r34908  
    77import sys
    88import json
     9#import csv
    910
    1011import argparse
     
    3940    # Debug output
    4041    #
    41     for data_hashmap in data_hashmap_array_finals:
    42         print(data_hashmap.get('From country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points to " + data_hashmap.get('To country') + ")")
     42    # for data_hashmap in data_hashmap_array_finals:
     43    #    print(data_hashmap.get('From country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points to " + data_hashmap.get('To country') + ")")
    4344
    4445
     
    6768    country_group.append(prev_data_hashmap)       
    6869    country_groups.append(country_group)
     70
     71    # Debug output
     72   
     73    for from_country_voting_array in country_groups:
     74        # from_country_voting_array => Votes case by given country (in given year) to other countries
     75
     76        from_country_name = from_country_voting_array[0].get('From country')
     77        print(from_country_name+":")
     78
     79        for to_country_entry in from_country_voting_array:
     80            print("  ", to_country_entry.get('Year'), to_country_entry.get('Points'),
     81                  " ("+to_country_entry.get('Jury or Televoting')+") ",
     82                  " Points to " + to_country_entry.get('To country'))
     83
    6984
    7085    return country_groups
     
    90105    # }
    91106
     107
     108
     109    # Scan all voting to build up complete list of all countries that
     110    # either cast of received votes
     111
     112    all_from_countries_year = []
     113    all_to_countries_tj     = []
     114   
     115    for from_country_voting_array in country_groups:
     116        from_country = from_country_voting_array[0].get('From country')
     117        year = from_country_voting_array[0].get('Year')
     118
     119        ### Remove spaces!!!
     120        from_country_year = from_country + "-" + year
     121
     122        all_from_countries_year[from_country_year] = 1
     123       
     124        for to_country_entry in from_country_voting_array:
     125            to_country = to_country_entry.get('To country')
     126            vote_type = to_country_entry.get('Jury or Televoting')
     127
     128            to_country_vote_type = to_country+"-"+vote_type
     129           
     130            all_to_countries[to_country_vote_type] = 1
     131
     132
     133    from_country_to_hashmap = []
     134   
     135   
     136    for to_country_entry in from_country_voting_array:
     137        print("  ", to_country_entry.get('Year'), to_country_entry.get('Points'),
     138                  " ("+to_country_entry.get('Jury or Televoting')+") ",
     139                  " Points to " + to_country_entry.get('To country'))
     140           
     141
     142    csv_header_array = sorted(all_to_countries_tj.keys())
     143
     144           
     145    csv_ofile = open(csv_filename, 'wb')
     146
     147    csv_wr  = csv.writer(csv_ofile, quoting=csv.QUOTE_ALL)
     148
     149           
     150#            csv_wr.writerow(filtered_utf8_row)
     151
     152   
     153
     154    csv_ofile.close()
     155   
    92156    fileset_array = []
    93157   
     
    136200    id_year = to_country_year_votes[0].get('Year');
    137201    id = id_to_country + str(id_year);
    138     filename_id = id + "\\.nul"
     202<    filename_id = id + "\\.nul"
    139203
    140204    fileset = {
     
    151215
    152216    parser = argparse.ArgumentParser()
     217    parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from")
     218    parser.add_argument('--voting-type', choices=['J','T'], help="Filter to only J=Jury or T=Tele cast votes")
    153219    parser.add_argument('input-file.xlsx')
    154220    parser.add_argument('output-file.csv', nargs='?')
    155     parser.add_argument('--sheetname')
    156221   
    157222    args = parser.parse_args()
    158223
     224    sheetname = getattr(args,'sheetname');
     225    if (csv_output_filename == None):
     226        csv_output_filename = os.path.splitext(excel_input_filename)[0]+'.csv'
     227
     228    voting_type = getattr(args,'voting-type');
     229   
    159230    excel_input_filename = getattr(args,'input-file.xlsx');
    160231    csv_output_filename = getattr(args,'output-file.csv');
    161     sheetname = getattr(args,'sheetname');
    162    
    163     if (csv_output_filename == None):
    164         csv_output_filename = os.path.splitext(excel_input_filename)[0]+'.csv'
     232   
    165233       
    166234    worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname)
    167235
    168236    data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet)
    169 
    170     print("Number of data rows: " + str(len(data_hashmap_array)))
    171 
    172    
    173     from_country_year_voting_groups = create_from_country_voting_groups(data_hashmap_array)
     237   
     238    print("Number of data rows in Excel file: " + str(len(data_hashmap_array)))
     239
     240    # Filter down to just the voting results concerning finals
     241    data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
     242
     243    print("Number of finalist voting data rows: " + str(len(data_hashmap_array_finals)))
     244
     245    if voting_type != None:
     246        # Further filter down by the type of voting results cast
     247        if (voting_type == "J"):
     248            data_hashmap_array_finals = list(filter(xlsxutil.filter_jury_votes, data_hashmap_array_finals))
     249            print("Number Jury cast data rows: " + str(len(data_hashmap_array_finals)))
     250        else:
     251            # Must be "T"
     252            data_hashmap_array_finals = list(filter(xlsxutil.filter_tele_votes, data_hashmap_array_finals))
     253            print("Number Jury cast data rows: " + str(len(data_hashmap_array_finals)))
     254
     255
     256
     257    from_country_year_voting_groups = create_from_country_voting_groups(data_hashmap_array_finals)
    174258
    175259    # Debug output
    176260    #
    177     print(from_country_year_voting_groups)
     261    #print(from_country_year_voting_groups)
    178262   
    179263    # Next step is to express the grouped from-country voting data
    180     # in the Greenstone JSON metadata format:
    181    
    182 
     264    # as a CSV file in the form used by Greenstone's CSVPlugin
     265   
    183266   
    184267#    directory_metadata = []
Note: See TracChangeset for help on using the changeset viewer.