Changeset 34975 for main


Ignore:
Timestamp:
2021-03-25T18:47:20+13:00 (3 years ago)
Author:
davidb
Message:

Digging into the country results table

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-process-votes.py

    r34974 r34975  
    2424    f.close()
    2525
     26def html_tablerows_to_hashmap(table_rows):
     27
     28    table_header = table_rows[0]
     29    # print("table header = " + repr(table_header))
     30
     31    header_to_vals = {}
     32    headers = []
     33       
     34    header_cols = table_header.find_all("th");
     35    for header in header_cols:
     36        header_label = header.contents[0].strip()
     37        if (header_label == "Language(s)"):
     38            header_label = "Language"
     39           
     40        # print("header: '" + header_label+"'")
     41
     42        headers.append(header_label)
     43        header_to_vals[header_label] = []
     44
     45    print("  Headers = " + ",".join(header_to_vals.keys()))
    2646   
     47    for y in range(1, len(table_rows)):
     48        tds = table_rows[y].find_all("td");
     49        for x in range(0,len(tds)):
     50            val = tds[x]
     51            header_label = headers[x]
     52            header_to_vals[header_label].append(val)
     53           
     54    return header_to_vals
     55
     56def convert_cols_to_country_recs(header_to_vals):
     57
     58    country_recs = {}
     59
     60    for country in header_to_vals.get("Country"):
     61        country_recs[country] = {}
     62                   
     63    for key in header_to_vals.keys():
     64        if (key == "Country"):
     65            continue
     66
     67        vals = header_to_vals.get(key)
     68       
     69        for l in range(0,len(vals)):
     70            country = header_to_vals.get("Country")[l]
     71            val = vals[l]
     72
     73            country_recs[country][key] = val
     74
     75    return country_recs
    2776
    2877def process_category_page(year):
     
    5099    results_heading = None
    51100    for fr_id in final_result_ids:
    52        
     101
     102        if ((year == 1996) and (fr_id == "Final")):
     103            continue
     104           
    53105        results_text_span = esc_year_soup.find("span",id=fr_id)
    54106        if (results_text_span is not None):
    55107            print("  Found Final Results heading with id: " + fr_id);
    56             results_heading = results_text_span.parent       
     108            results_heading = results_text_span.parent
     109            # print("**** parent tag: " + results_heading.name);
    57110            break
    58111
     
    61114    results_table = results_heading.findNext('table')
    62115    table_rows = results_table.find_all('tr');
     116    print("  " + esc_wiki_page_file + ": number of rows in Results table = " + str(len(table_rows)))
    63117
    64     print(esc_wiki_page_file + ": number of rows in Results table = " + str(len(table_rows)))
     118    header_to_vals = html_tablerows_to_hashmap(table_rows)
    65119
     120    convert_cols_to_country_recs(header_to_vals)
     121   
     122   
    66123   
    67124if __name__ == "__main__":
Note: See TracChangeset for help on using the changeset viewer.