Changeset 34976
- Timestamp:
- 2021-03-26T17:46:12+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-process-votes.py
r34975 r34976 4 4 5 5 import os 6 import re 6 7 7 8 import bs4 … … 27 28 28 29 table_header = table_rows[0] 29 # print("table header = " + repr(table_header))30 30 31 31 header_to_vals = {} … … 38 38 header_label = "Language" 39 39 40 # print("header: '" + header_label+"'")41 42 40 headers.append(header_label) 43 41 header_to_vals[header_label] = [] … … 54 52 return header_to_vals 55 53 56 def convert_cols_to_country_recs(header_to_vals ):54 def convert_cols_to_country_recs(header_to_vals,year): 57 55 58 56 country_recs = {} 59 57 60 for country in header_to_vals.get("Country"): 58 for country_tag in header_to_vals.get("Country"): 59 # print("@@@@@ td = " + repr(country_tag)); 60 61 country = country_tag.find("a",href=re.compile(r"^.*" + str(year) + r"$")).string 62 # print("**** country = " + country) 63 61 64 country_recs[country] = {} 62 65 … … 68 71 69 72 for l in range(0,len(vals)): 70 country = header_to_vals.get("Country")[l] 73 country_tag = header_to_vals.get("Country")[l] 74 country_flag_img = country_tag.find("img") 75 country = country_tag.find("a",href=re.compile(r"^.*" + str(year) + r"$")).string 76 71 77 val = vals[l] 72 78 print("@@@ val = " + repr(val)) 79 80 if key == "Artist": 81 a_val = val.find("a") 82 if (a_val is not None): 83 val = a_val 84 elif key == "Song": 85 a_val = val.find("a") 86 if (a_val is not None): 87 val = a_val 88 elif key == "Language": 89 a_val = val.find("a") 90 if (a_val is not None): 91 val = a_val 92 elif key == "Place": 93 span_val = val.find("span") 94 if (span_val is not None): 95 val = span_val 96 97 for inner_val in val.contents: 98 if (inner_val.string and re.search("[^\s]",inner_val.string)): 99 val = inner_val 100 break 101 102 val = val.string.strip() 103 104 #print("country = " + country); 105 #print("key = " + key); 106 #print("*** storing: " + country + "[" + key + "] = " + val) 107 73 108 country_recs[country][key] = val 74 109 75 110 return country_recs 76 111 112 def output_country_recs(country_recs): 113 114 for country_name in country_recs.keys(): 115 country_rec = country_recs.get(country_name) 116 117 print("[" + country_name + "]") 118 119 for metadata_key in country_rec.keys(): 120 metadata_val = country_rec.get(metadata_key) 121 print(" " + metadata_key + " = " + repr(metadata_val)) 122 77 123 def process_category_page(year): 78 124 esc_wiki_page = "Eurovision_Song_Contest_" + str(year) … … 118 164 header_to_vals = html_tablerows_to_hashmap(table_rows) 119 165 120 convert_cols_to_country_recs(header_to_vals) 121 166 country_recs = convert_cols_to_country_recs(header_to_vals,year) 167 168 print() 169 print("==========") 170 171 output_country_recs(country_recs) 122 172 123 173
Note:
See TracChangeset
for help on using the changeset viewer.