Changeset 34845


Ignore:
Timestamp:
2021-02-14T15:09:45+13:00 (3 years ago)
Author:
davidb
Message:

Explicit filters added in to resolve song titles/entrants that resolve to identical integer vals

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/errata/local--countries-in-esc-by-year-after-1956--with-errata.sparql

    r34831 r34845  
    5555  BIND (IF(isIRI(?song),?song,?song_errata_uri) AS ?song_robust)
    5656
    57 
    58 #    BIND(REPLACE(REPLACE(str(?entrant_robust), "^.*/", "", "i"),"_"," ","i") AS ?entrant_label_raw).
    59 #    # Create more DL friendly entrant-title metadata from Wikipedia entrant-uri/label
    60 #    BIND(REPLACE(?entrant_label_raw, "\\s*\\(.*?singer\\)$", "", "i") AS ?entrant_label_p1).
    61 #    BIND(REPLACE(?entrant_label_p1, "\\s*\\(.*?duo\\)$", "", "i") AS ?entrant_label_p2).
    62 #    BIND(REPLACE(?entrant_label_p2, "\\s*\\(.*?band\\)$", "", "i") AS ?entrant_label_p3).
    63 #    BIND(REPLACE(?entrant_label_p3, "\\s*\\(.*?group\\)$", "", "i") AS ?entrant_label_p4).
    64 #    BIND(REPLACE(?entrant_label_p4, "\\s*\\(.*?musician\\)$", "", "i") AS ?entrant_label).
    65 
    6657  # Create more DL friendly entrant-title metadata from Wikipedia entrant-uri/label
    6758  BIND(REPLACE(REPLACE(str(?entrant_robust), "^.*/(.*?)(_+\\(.*(singer|duo|band|group|musician)\\))?$", "$1"),"_"," ") AS ?entrant_label).
    6859
    69 
    70 #    BIND(REPLACE(REPLACE(str(?song_robust), "^.*/", "", "i"),"_"," ","i") AS ?song_label_raw).
    71 #    # Create more DL friendly song-title metadata from Wikipedia song-uri/label
    72 #    BIND(REPLACE(?song_label_raw, "\\s*\\(.*?song\\)$", "", "i") AS ?song_label).
    73 ##    BIND(REPLACE(?song_label_p1, "\\s*\\(.*?band\\)$", "", "i") AS ?song_label_p2).
    74 ##    BIND(REPLACE(?song_label_p2, "\\s*\\(.*?duo\\)$", "", "i") AS ?song_label).
    75 
    7660  # Create more DL friendly song-title metadata from Wikipedia song-uri/label
    7761  BIND(REPLACE(REPLACE(str(?song_robust), "^.*/(.*?)(_+\\(.*song\\))?$", "$1"),"_"," ") AS ?song_label).
     62
     63# DBpedia incorrectly maps some songs/entrants to integer numbers
     64# Most of these can be fixed with the usual errata approach,
     65# however there are a few examples that map to identical
     66# integer vals, and therefore are not unique.
     67#
     68# The chosen solution to address this is to include some
     69# bespoke filtering to explicitly rule in the song+entrant combination
     70# we want (i.e., the filter has the affect of ruling out the
     71# (incorrect) combination we don't want.
     72
     73#dbr:Macedonia_in_the_Eurovision_Song_Contest_2000
     74# song: "100"^^xsd:integer
     75#   dbr:100%25_te_ljubam
     76#   dbr:XXL_\(Macedonian_band\))
     77#
     78#Luxembourg_in_the_Eurovision_Song_Contest_1984
     79# song: "100"^^xsd:integer
     80#   dbr:100%25_d%27amour
     81#   dbr:Sophie_Carle
     82
     83#dbr:Belarus_in_the_Eurovision_Song_Contest_2010
     84# entrant: "3"^^xsd:integer
     85#   dbr:3%2B2_\(band\)
     86#   dbr:Butterflies_\(3%2B2_song\)
     87#
     88#dbr:Netherlands_in_the_Eurovision_Song_Contest_2011
     89# entrant: "3"^^xsd:integer
     90#   dbr:3JS
     91#   dbr:Je_vecht_nooit_alleen
     92   
     93  FILTER(
     94    (?song != "100"^^xsd:integer && ?entrant != "3"^^xsd:integer)
     95    ||
     96    (?song = "100"^^xsd:integer
     97      && ?song_robust = dbr:100%25_te_ljubam && ?entrant = dbr:XXL_\(Macedonian_band\))
     98    ||
     99    (?song = "100"^^xsd:integer
     100      && ?song_robust = dbr:100%25_d%27amour && ?entrant = dbr:Sophie_Carle)
     101    ||
     102    (?entrant = "3"^^xsd:integer
     103      && ?entrant_robust = dbr:3\+2_\(band\) && ?song_robust = dbr:Butterflies_\(3\+2_song\))
     104    ||
     105    (?entrant = "3"^^xsd:integer
     106      && ?entrant_robust = dbr:3JS && ?song_robust = dbr:Je_vecht_nooit_alleen)
     107  ).
     108
     109#      && ?entrant_robust = dbr:3%2B2_\(band\) && ?song_robust = dbr:Butterflies_\(3%2B2_song\))
     110
    78111
    79112  SERVICE <https://dbpedia.demo.openlinksw.com/sparql> {
Note: See TracChangeset for help on using the changeset viewer.