- Timestamp:
- 2021-03-25T18:47:20+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-process-votes.py
r34974 r34975 24 24 f.close() 25 25 26 def html_tablerows_to_hashmap(table_rows): 27 28 table_header = table_rows[0] 29 # print("table header = " + repr(table_header)) 30 31 header_to_vals = {} 32 headers = [] 33 34 header_cols = table_header.find_all("th"); 35 for header in header_cols: 36 header_label = header.contents[0].strip() 37 if (header_label == "Language(s)"): 38 header_label = "Language" 39 40 # print("header: '" + header_label+"'") 41 42 headers.append(header_label) 43 header_to_vals[header_label] = [] 44 45 print(" Headers = " + ",".join(header_to_vals.keys())) 26 46 47 for y in range(1, len(table_rows)): 48 tds = table_rows[y].find_all("td"); 49 for x in range(0,len(tds)): 50 val = tds[x] 51 header_label = headers[x] 52 header_to_vals[header_label].append(val) 53 54 return header_to_vals 55 56 def convert_cols_to_country_recs(header_to_vals): 57 58 country_recs = {} 59 60 for country in header_to_vals.get("Country"): 61 country_recs[country] = {} 62 63 for key in header_to_vals.keys(): 64 if (key == "Country"): 65 continue 66 67 vals = header_to_vals.get(key) 68 69 for l in range(0,len(vals)): 70 country = header_to_vals.get("Country")[l] 71 val = vals[l] 72 73 country_recs[country][key] = val 74 75 return country_recs 27 76 28 77 def process_category_page(year): … … 50 99 results_heading = None 51 100 for fr_id in final_result_ids: 52 101 102 if ((year == 1996) and (fr_id == "Final")): 103 continue 104 53 105 results_text_span = esc_year_soup.find("span",id=fr_id) 54 106 if (results_text_span is not None): 55 107 print(" Found Final Results heading with id: " + fr_id); 56 results_heading = results_text_span.parent 108 results_heading = results_text_span.parent 109 # print("**** parent tag: " + results_heading.name); 57 110 break 58 111 … … 61 114 results_table = results_heading.findNext('table') 62 115 table_rows = results_table.find_all('tr'); 116 print(" " + esc_wiki_page_file + ": number of rows in Results table = " + str(len(table_rows))) 63 117 64 print(esc_wiki_page_file + ": number of rows in Results table = " + str(len(table_rows)))118 header_to_vals = html_tablerows_to_hashmap(table_rows) 65 119 120 convert_cols_to_country_recs(header_to_vals) 121 122 66 123 67 124 if __name__ == "__main__":
Note:
See TracChangeset
for help on using the changeset viewer.