Changeset 35032 for main


Ignore:
Timestamp:
2021-04-05T15:48:36+12:00 (3 years ago)
Author:
davidb
Message:

optional extra_metadata argument added in

Location:
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-detect-missing-cat-entries.py

    r35018 r35032  
    197197
    198198    greenstone_metadata_json = gs_directory_metadata(missing_year_countries,json_output_filename)
     199    print("Saving output as: " + json_output_filename)
    199200    save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename)
    200201   
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-process-votes.py

    r35018 r35032  
    150150        article_year_html = escwikipedia.retrieve_article_page(year)
    151151        country_year_recs = escwikipedia.process_article_page(article_year_html,year,final_result_ids,
    152                                                                       stop_at_first=True)
     152                                                              stop_at_first=True,
     153                                                              extra_metadata={"Final": True})
     154
    153155
    154156        all_country_year_recs.append(country_year_recs)
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/escwikipedia.py

    r35014 r35032  
    136136
    137137
    138 def convert_cols_to_country_year_recs(header_to_vals,year,accumulated_country_year_recs):
     138def convert_cols_to_country_year_recs(header_to_vals,year,accumulated_country_year_recs,extra_metadata):
    139139
    140140    a_href_re = re.compile(r"^.*" + str(year) + r"$")
     
    158158        country_year = country + str(year)
    159159
    160         this_country_year_rec = { "Country": country, "Year": year }
     160        this_country_year_rec = {}
     161        if (extra_metadata is not None):
     162            this_country_year_rec = extra_metadata.copy()
     163           
     164        this_country_year_rec["Country"] = country
     165        this_country_year_rec["Year"]    = year
    161166       
    162167        for key in header_to_vals.keys():
    163168            if (key == "Country"):
    164                 country_flag_img = country_tag.find("img")
    165                 #print(repr(country_flag_img))
    166                 #flag_img = country_flag_img.src
    167                 #if (flag_img is None):
    168                 #    print("***** Flag image empty!!!!")
    169                    
     169                country_flag_img = country_tag.find("img")                   
    170170                this_country_year_rec["FlagImg"] = str(country_flag_img)
    171171                continue
     
    189189                if (span_val is not None):
    190190                    val = span_val
     191            elif key == "Points":
     192                key = "VoteGrandTotal"
    191193
    192194            for inner_val in val.contents:
     
    259261    return esc_article_year_html
    260262
    261 def process_article_page(esc_article_year_html,year,result_ids, stop_at_first):
     263def process_article_page(esc_article_year_html,year,result_ids, stop_at_first, extra_metadata=None):
    262264
    263265    country_year_recs = {}
     
    294296        header_to_vals = html_tablerows_to_hashmap(table_rows)
    295297
    296         convert_cols_to_country_year_recs(header_to_vals,year,country_year_recs)
     298        convert_cols_to_country_year_recs(header_to_vals,year,country_year_recs, extra_metadata)
    297299
    298300    print("==========")
Note: See TracChangeset for help on using the changeset viewer.