Changeset 34891


Ignore:
Timestamp:
2021-02-16T23:19:47+13:00 (3 years ago)
Author:
davidb
Message:

Some refactoring of where routines should be based

Location:
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-to-jsonmetadata.py

    r34876 r34891  
    1111import openpyxl
    1212
     13import util
    1314import xlsxutil
    1415
     
    1617tele_from_countries = {}
    1718
    18 def eprint(*args, **kwargs):
    19     print(*args, file=sys.stderr, **kwargs)
    20    
    21 def fileset_voting_for_esc_country_in_year(data_hashmap):
    22 
    23     # Only generating a single fileset record here, however
    24     # the Greenstone format allows for this to be an array
    25     # of fileset entries => return [ fileset ]
    26    
    27     fileset = {}
    28    
    29     return [ fileset ]
    30 
    31 
    32 def filter_finalist_votes(entry):
    33     return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
    34 
    35 def sortkey_year_to_country(entry):
    36     return str(entry.get('Year'))+entry.get('To country')
     19#def fileset_voting_for_esc_country_in_year(data_hashmap):
     20#
     21#    # Only generating a single fileset record here, however
     22#    # the Greenstone format allows for this to be an array
     23#    # of fileset entries => return [ fileset ]
     24#   
     25#    fileset = {}
     26#   
     27#    return [ fileset ]
     28
     29
     30#def filter_finalist_votes(entry):
     31#    return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
     32#
     33#def sortkey_year_to_country(entry):
     34#    return str(entry.get('Year'))+entry.get('To country')
    3735
    3836
     
    4947   
    5048    # Filter down to just the voting results concerning finals
    51     data_hashmap_array_finals = list(filter(filter_finalist_votes, data_hashmap_array))
     49    data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array))
    5250
    5351    # Sort so array entries are grouped by the country receiving the votes in a given year
    54     data_hashmap_array_finals.sort(key=sortkey_year_to_country)
     52    data_hashmap_array_finals.sort(key=xlsxutil.sortkey_year_to_country)
    5553
    5654
     
    114112    jury_metadata_vals = []
    115113    tele_metadata_vals = []
     114
     115    to_country_jury_total = 0
     116    to_country_tele_total = 0
    116117   
    117118    for to_country_year_vote in to_country_year_votes:
     
    131132            jury_metadata_vals.append(id_from_country+"-J")
    132133            jury_from_countries[id_from_country] = 1
     134            to_country_jury_total = to_country_jury_total + points
    133135           
    134136        elif (vote_type == "T"):
    135137            tele_metadata_vals.append(id_from_country+"-T")
    136138            tele_from_countries[id_from_country] = 1
     139            to_country_tele_total = to_country_tele_total + points
    137140        else:           
    138             eprint("Warning: Unrecognized voting type: " + vote_type)
     141            util.eprint("Warning: Unrecognized voting type: " + vote_type)
    139142
    140143    if (len(jury_metadata_vals)>0):
    141144        metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) })
     145        metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total})
    142146    if (len(tele_metadata_vals)>0):
    143147        metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) })
     148        metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total})
    144149                         
    145150    id_to_country = to_country_year_votes[0].get('To country')
     
    224229        print("  " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes")
    225230
    226 
    227     print("")
     231 
     232    print("")
     233    print("")
     234    print("For, e.g., '<display><format>' section of collectionConfig.xml:")
     235    print("")
     236    print("  <gsf:headMetaTags>")
     237
    228238    for from_country in sorted(jury_from_countries.keys()):
    229         print("<gsf:metadata name=\""+from_country+"-J\" />")
     239        print("    <gsf:metadata name=\""+from_country+"-J\" />")
    230240
    231241    print("")
    232242    for from_country in sorted(tele_from_countries.keys()):
    233         print("<gsf:metadata name=\""+from_country+"-T\" />")
     243        print("    <gsf:metadata name=\""+from_country+"-T\" />")
     244
     245    print("  </gsf:headMetaTags>")
     246    print("")
    234247
    235248    greenstone_metadata_json = { "DirectoryMetadata": directory_metadata }
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsxutil.py

    r34875 r34891  
    11
    22import os
    3 #import re
    43import sys
    54
    65import openpyxl
    7 
    8 
    9 
     6   
    107## Fix up names to match those used in DBpedia ESC
    118
    129# In Excel data:
    13 #   The Netherlands
     10#   The Netherlands (also in one case 'The Netherands')
    1411#   Serbia & Montenegro
    1512#   Bosnia & Herzegovina
     
    2118#   Serbia and Montenegro
    2219#   Bosnia and Herzegovina
    23 #
    24 # Some values noted in ESC that potentially need to be changed at SPARQL query point
     20#   Macedonia
     21
     22#*****
     23# TODO:
     24# Some values noted in ESC that potentially need to be changed when queries via SPARQL query point
    2525#   Republic of Macedonia (2005)
    2626#   Federal Republic of Yugoslavia (1992)
     
    3434    "F.Y.R. Macedonia":     "Macedonia"
    3535}
     36
     37def filter_finalist_votes(entry):
     38    return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None
     39
     40def sortkey_year_to_country(entry):
     41    return str(entry.get('Year'))+entry.get('To country')
     42
    3643
    3744def load_xslx_sheet(excel_input_filename,sheetname):
Note: See TracChangeset for help on using the changeset viewer.