#!/usr/bin/env python from __future__ import print_function import os import re import sys import json import argparse import openpyxl import util import xlsxutil jury_from_countries = {} tele_from_countries = {} def create_to_country_voting_groups(data_hashmap_array): # Example values for header-names # (semi-) final: f, sf # Jury or Televoting: J, T # Year: 1975, ... # To country: Belgium # From country: Belgium # Edition: 1975f, 1975sf # Sort so array entries are grouped by the country receiving the votes in a given year data_hashmap_array.sort(key=xlsxutil.sortkey_year_to_country) # Debug output # # for data_hashmap in data_hashmap_array: # print(data_hashmap.get('To country'), data_hashmap.get('Year'), data_hashmap.get('Points'), "(Points from " + data_hashmap.get('From country') + ")") # Build array of country groups # A country grouping includes all the votes that country receive that year country_groups = [] prev_data_hashmap = data_hashmap_array[0] country_group = [ ] i = 1 num_rows = len(data_hashmap_array) while (i < num_rows): country_group.append(prev_data_hashmap) data_hashmap = data_hashmap_array[i] if (data_hashmap.get('To country') != prev_data_hashmap.get('To country')): # moving on to a new country group country_groups.append(country_group) country_group = [ ] prev_data_hashmap = data_hashmap i = i + 1 country_group.append(prev_data_hashmap) country_groups.append(country_group) return country_groups def fileset_voting_for_esc_country_in_year(to_country_year_votes): # Looking to build data-structure (for output as JSON) in the form # { "FileSet": # [ # { "FileName": "France1991\.nul" }, # { "Description": # { # "Metadata": # [ # { "name": "Germany-J", "content": 12 }, # J = Jury Vote # { "name": "Germany-T", "content": 6 }, # T = Televote (if present) # ... # ] # } # } # ] # } metadata_array = [] jury_metadata_vals = [] tele_metadata_vals = [] to_country_jury_total = 0 to_country_tele_total = 0 # Use the first record to be a representative for 'top level' (tl) # metadata about the votes cast 'To country' tl_rec = to_country_year_votes[0] tl_to_country = tl_rec.get('To country') tl_year = tl_rec.get('Year') tl_final = tl_rec.get("(semi-) final") == "f" tl_to_country_id = re.sub(r'\s+', '', tl_to_country) + str(tl_year) metadata_array.append({ "name": "Final","content": tl_final }) for to_country_year_vote in to_country_year_votes: to_country = to_country_year_vote.get('To country') year = to_country_year_vote.get('Year') from_country = to_country_year_vote.get('From country') vote_type = to_country_year_vote.get('Jury or Televoting') points = to_country_year_vote.get('Points') id_from_country = re.sub(r'\s+', '', from_country) voting_rec = { "name": id_from_country+"-"+vote_type, "content": points } metadata_array.append(voting_rec) if (vote_type == "J"): jury_metadata_vals.append(id_from_country+"-J") jury_from_countries[id_from_country] = 1 to_country_jury_total = to_country_jury_total + points elif (vote_type == "T"): tele_metadata_vals.append(id_from_country+"-T") tele_from_countries[id_from_country] = 1 to_country_tele_total = to_country_tele_total + points else: util.eprint("Warning: Unrecognized voting type: " + vote_type) if (len(jury_metadata_vals)>0): metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) }) metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total}) if (len(tele_metadata_vals)>0): metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) }) metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total}) filename_id = tl_to_country_id + "\\.nul" fileset = { "FileSet" : [ { "FileName": filename_id, }, { "Description" : { "Metadata" : metadata_array } } ] } return fileset def gs_directory_metadata(to_country_year_voting_groups): # Next step is to express the grouped to-country voting data # in the Greenstone JSON metadata format: # { "DirectoryMetadata": # [ # { "FileSet": # [ # { "FileName": "France1991\.nul" }, # { "Description": # { # "Metadata": # [ # { "name": "Germany-J", "content": "12" }, # J = Jury Vote # ... # ] # } # } # ] # } # ... # ] # } directory_metadata = [] for to_country_year_votes in to_country_year_voting_groups: fileset = fileset_voting_for_esc_country_in_year(to_country_year_votes) directory_metadata.append(fileset) filename_id = fileset.get('FileSet')[0].get('FileName') num_countries_voting_data = len(fileset.get('FileSet')[1].get('Description').get('Metadata')) print(" " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes") greenstone_metadata_json = { "DirectoryMetadata": directory_metadata } return greenstone_metadata_json def display_gs_head_metadata_tags(): print() print() print("For, e.g., '' section of collectionConfig.xml:") print() print(" ") for from_country in sorted(jury_from_countries.keys()): print(" ") print() for from_country in sorted(tele_from_countries.keys()): print(" ") print(" ") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--sheetname', help="The name of the sheet within the Excel file to extractc data from") parser.add_argument('--votingtype', choices=['J','T'], help="Filter to only J=Jury or T=Tele cast votes") parser.add_argument('input-file.xlsx') parser.add_argument('output-file.json', nargs='?') args = parser.parse_args() sheetname = getattr(args,'sheetname'); voting_type = getattr(args,'votingtype'); excel_input_filename = getattr(args,'input-file.xlsx'); json_output_filename = getattr(args,'output-file.json'); if (json_output_filename == None): json_output_filename = os.path.splitext(excel_input_filename)[0]+'.json' worksheet = xlsxutil.load_xslx_sheet(excel_input_filename,sheetname) data_hashmap_array = xlsxutil.convert_worksheet_to_hashmaps(worksheet) print("Number of data rows in Excel file: " + str(len(data_hashmap_array))) # Filter down to just the voting results concerning finals data_hashmap_array_filtered = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array)) print("Number of finalist voting data rows: " + str(len(data_hashmap_array_filtered))) if voting_type != None: # Further filter down by the type of voting results cast if (voting_type == "J"): data_hashmap_array_filtered = list(filter(xlsxutil.filter_jury_votes, data_hashmap_array_filtered)) print("Number Jury cast data rows: " + str(len(data_hashmap_array_filtered))) else: # Must be "T" data_hashmap_array_filtered = list(filter(xlsxutil.filter_tele_votes, data_hashmap_array_filtered)) print("Number Televoting cast data rows: " + str(len(data_hashmap_array_filtered))) to_country_year_voting_groups = create_to_country_voting_groups(data_hashmap_array_filtered) # Debug output # # print(to_country_year_voting_groups) print() print("Generating Greenstone JSON to-country voting metadata received by:") greenstone_metadata_json = gs_directory_metadata(to_country_year_voting_groups) print("Saving output as: " + json_output_filename) xlsxutil.save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename) display_gs_head_metadata_tags() print()