Changeset 35127


Ignore:
Timestamp:
2021-04-27T14:11:45+12:00 (3 years ago)
Author:
davidb
Message:

Code to extract ESC Logo for year

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata-categories/escwikipedia.py

    r35117 r35127  
    302302    ## debug_output_country_year_recs(country_year_recs)
    303303
     304    # Splice in logo image for that year into each country_year_rec
     305    #infobox_table = esc_year_soup.find("table",{"class": "infobox"})
     306    #infobox_td = infobox_table.find("td",{"class": "infobox-image"})
     307    #infobox_img = infobox_td.find("img")
     308
     309    #infobox_img = esc_year_soup.select("table.infobox td.infobox-image img")
     310
     311    #infobox_img = esc_year_soup.select("table.infobox td.infobox-image img")
     312    #infobox_img = esc_year_soup.select("table.infobox td.infobox-image")
     313    # infobox_img = esc_year_soup.select("table.infobox")
     314
     315    #infobox_table = esc_year_soup.find("table",{"class": "infobox"})
     316    #infobox_img = infobox_table.tbody.tr[0].td[0].a.img
     317
     318    # Looks like Wikipedia has changed some of its infobox CSS
     319    # If processing newer downloads, then the following is probably the select
     320    # statement to use
     321
     322    # new school!
     323    #infobox_img = esc_year_soup.select("table.infobox td.infobox-image img")
     324
     325    # old school
     326    infobox_imgs = esc_year_soup.select("table.infobox tr td a.image img")
     327   
     328    if (len(infobox_imgs) == 0):
     329        print("****")
     330        print("****!!! No ESC Logo image found!!!")
     331        print("****")
     332    else:
     333        # Some pages include addition image graphics, such as a map showing country entrants
     334        # => Want the first one
     335        infobox_logo_img = infobox_imgs[0];
     336
     337        for country_year_key in country_year_recs.keys():
     338            country_year_rec = country_year_recs.get(country_year_key)
     339           
     340            country_year_rec["YearLogoImg"] = str(infobox_logo_img)
     341
     342           
    304343    return country_year_recs
    305344
Note: See TracChangeset for help on using the changeset viewer.