Changeset 34891
- Timestamp:
- 2021-02-16T23:19:47+13:00 (3 years ago)
- Location:
- main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsx-to-jsonmetadata.py
r34876 r34891 11 11 import openpyxl 12 12 13 import util 13 14 import xlsxutil 14 15 … … 16 17 tele_from_countries = {} 17 18 18 def eprint(*args, **kwargs): 19 print(*args, file=sys.stderr, **kwargs) 20 21 def fileset_voting_for_esc_country_in_year(data_hashmap): 22 23 # Only generating a single fileset record here, however 24 # the Greenstone format allows for this to be an array 25 # of fileset entries => return [ fileset ] 26 27 fileset = {} 28 29 return [ fileset ] 30 31 32 def filter_finalist_votes(entry): 33 return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None 34 35 def sortkey_year_to_country(entry): 36 return str(entry.get('Year'))+entry.get('To country') 19 #def fileset_voting_for_esc_country_in_year(data_hashmap): 20 # 21 # # Only generating a single fileset record here, however 22 # # the Greenstone format allows for this to be an array 23 # # of fileset entries => return [ fileset ] 24 # 25 # fileset = {} 26 # 27 # return [ fileset ] 28 29 30 #def filter_finalist_votes(entry): 31 # return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None 32 # 33 #def sortkey_year_to_country(entry): 34 # return str(entry.get('Year'))+entry.get('To country') 37 35 38 36 … … 49 47 50 48 # Filter down to just the voting results concerning finals 51 data_hashmap_array_finals = list(filter( filter_finalist_votes, data_hashmap_array))49 data_hashmap_array_finals = list(filter(xlsxutil.filter_finalist_votes, data_hashmap_array)) 52 50 53 51 # Sort so array entries are grouped by the country receiving the votes in a given year 54 data_hashmap_array_finals.sort(key= sortkey_year_to_country)52 data_hashmap_array_finals.sort(key=xlsxutil.sortkey_year_to_country) 55 53 56 54 … … 114 112 jury_metadata_vals = [] 115 113 tele_metadata_vals = [] 114 115 to_country_jury_total = 0 116 to_country_tele_total = 0 116 117 117 118 for to_country_year_vote in to_country_year_votes: … … 131 132 jury_metadata_vals.append(id_from_country+"-J") 132 133 jury_from_countries[id_from_country] = 1 134 to_country_jury_total = to_country_jury_total + points 133 135 134 136 elif (vote_type == "T"): 135 137 tele_metadata_vals.append(id_from_country+"-T") 136 138 tele_from_countries[id_from_country] = 1 139 to_country_tele_total = to_country_tele_total + points 137 140 else: 138 eprint("Warning: Unrecognized voting type: " + vote_type)141 util.eprint("Warning: Unrecognized voting type: " + vote_type) 139 142 140 143 if (len(jury_metadata_vals)>0): 141 144 metadata_array.append({ "name": "JuryVotesJSON", "content": json.dumps(jury_metadata_vals) }) 145 metadata_array.append({ "name": "JuryVotesTotal", "content": to_country_jury_total}) 142 146 if (len(tele_metadata_vals)>0): 143 147 metadata_array.append({ "name": "TeleVotesJSON", "content": json.dumps(tele_metadata_vals) }) 148 metadata_array.append({ "name": "TeleVotesTotal", "content": to_country_tele_total}) 144 149 145 150 id_to_country = to_country_year_votes[0].get('To country') … … 224 229 print(" " + filename_id.ljust(28) + ": " + str(num_countries_voting_data) + " votes") 225 230 226 227 print("") 231 232 print("") 233 print("") 234 print("For, e.g., '<display><format>' section of collectionConfig.xml:") 235 print("") 236 print(" <gsf:headMetaTags>") 237 228 238 for from_country in sorted(jury_from_countries.keys()): 229 print(" <gsf:metadata name=\""+from_country+"-J\" />")239 print(" <gsf:metadata name=\""+from_country+"-J\" />") 230 240 231 241 print("") 232 242 for from_country in sorted(tele_from_countries.keys()): 233 print("<gsf:metadata name=\""+from_country+"-T\" />") 243 print(" <gsf:metadata name=\""+from_country+"-T\" />") 244 245 print(" </gsf:headMetaTags>") 246 print("") 234 247 235 248 greenstone_metadata_json = { "DirectoryMetadata": directory_metadata } -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/voting-excel/xlsxutil.py
r34875 r34891 1 1 2 2 import os 3 #import re4 3 import sys 5 4 6 5 import openpyxl 7 8 9 6 10 7 ## Fix up names to match those used in DBpedia ESC 11 8 12 9 # In Excel data: 13 # The Netherlands 10 # The Netherlands (also in one case 'The Netherands') 14 11 # Serbia & Montenegro 15 12 # Bosnia & Herzegovina … … 21 18 # Serbia and Montenegro 22 19 # Bosnia and Herzegovina 23 # 24 # Some values noted in ESC that potentially need to be changed at SPARQL query point 20 # Macedonia 21 22 #***** 23 # TODO: 24 # Some values noted in ESC that potentially need to be changed when queries via SPARQL query point 25 25 # Republic of Macedonia (2005) 26 26 # Federal Republic of Yugoslavia (1992) … … 34 34 "F.Y.R. Macedonia": "Macedonia" 35 35 } 36 37 def filter_finalist_votes(entry): 38 return entry.get('(semi-) final') == "f" and entry.get('Duplicate') == None 39 40 def sortkey_year_to_country(entry): 41 return str(entry.get('Year'))+entry.get('To country') 42 36 43 37 44 def load_xslx_sheet(excel_input_filename,sheetname):
Note:
See TracChangeset
for help on using the changeset viewer.