Changeset 35032
- Timestamp:
- 2021-04-05T15:48:36+12:00 (3 years ago)
- Location:
- main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-detect-missing-cat-entries.py
r35018 r35032 197 197 198 198 greenstone_metadata_json = gs_directory_metadata(missing_year_countries,json_output_filename) 199 print("Saving output as: " + json_output_filename) 199 200 save_greenstone_json_metadata(greenstone_metadata_json,json_output_filename) 200 201 -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/esc-wikipedia-download-and-process-votes.py
r35018 r35032 150 150 article_year_html = escwikipedia.retrieve_article_page(year) 151 151 country_year_recs = escwikipedia.process_article_page(article_year_html,year,final_result_ids, 152 stop_at_first=True) 152 stop_at_first=True, 153 extra_metadata={"Final": True}) 154 153 155 154 156 all_country_year_recs.append(country_year_recs) -
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/escwikipedia.py
r35014 r35032 136 136 137 137 138 def convert_cols_to_country_year_recs(header_to_vals,year,accumulated_country_year_recs ):138 def convert_cols_to_country_year_recs(header_to_vals,year,accumulated_country_year_recs,extra_metadata): 139 139 140 140 a_href_re = re.compile(r"^.*" + str(year) + r"$") … … 158 158 country_year = country + str(year) 159 159 160 this_country_year_rec = { "Country": country, "Year": year } 160 this_country_year_rec = {} 161 if (extra_metadata is not None): 162 this_country_year_rec = extra_metadata.copy() 163 164 this_country_year_rec["Country"] = country 165 this_country_year_rec["Year"] = year 161 166 162 167 for key in header_to_vals.keys(): 163 168 if (key == "Country"): 164 country_flag_img = country_tag.find("img") 165 #print(repr(country_flag_img)) 166 #flag_img = country_flag_img.src 167 #if (flag_img is None): 168 # print("***** Flag image empty!!!!") 169 169 country_flag_img = country_tag.find("img") 170 170 this_country_year_rec["FlagImg"] = str(country_flag_img) 171 171 continue … … 189 189 if (span_val is not None): 190 190 val = span_val 191 elif key == "Points": 192 key = "VoteGrandTotal" 191 193 192 194 for inner_val in val.contents: … … 259 261 return esc_article_year_html 260 262 261 def process_article_page(esc_article_year_html,year,result_ids, stop_at_first ):263 def process_article_page(esc_article_year_html,year,result_ids, stop_at_first, extra_metadata=None): 262 264 263 265 country_year_recs = {} … … 294 296 header_to_vals = html_tablerows_to_hashmap(table_rows) 295 297 296 convert_cols_to_country_year_recs(header_to_vals,year,country_year_recs )298 convert_cols_to_country_year_recs(header_to_vals,year,country_year_recs, extra_metadata) 297 299 298 300 print("==========")
Note:
See TracChangeset
for help on using the changeset viewer.