Changeset 35862 for main/trunk
- Timestamp:
- 2022-01-03T15:45:40+13:00 (2 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/spotify-musicbrainz/eurosparqlify.py
r35141 r35862 47 47 return "http://motools.sourceforge.net/keys/keys.owl#" + keys[key] + modes[mode] 48 48 49 def get_dbp_results( ):49 def get_dbp_results(start_year,end_year): 50 50 euroSparql = SPARQLWrapper(euroEndpoint) 51 51 euroSparql.setReturnFormat(JSON) … … 53 53 dbpSparql.setReturnFormat(JSON) 54 54 55 opt_filter_terms = [] 56 if (start_year is not None): 57 opt_filter_terms.append(" FILTER ( <http://www.w3.org/2001/XMLSchema#integer>(?entryYear) >= " + str(start_year) + ") ."); 58 59 if (end_year is not None): 60 opt_filter_terms.append(" FILTER ( <http://www.w3.org/2001/XMLSchema#integer>(?entryYear) <= " + str(end_year) + ") ."); 61 62 opt_filter = "" 63 64 if (len(opt_filter_terms) > 0): 65 opt_filter_terms.insert(0,"bind( REPLACE(str(?entryUri), \".*(\\\\d{4})\", \"$1\") AS ?entryYear).") 66 67 opt_filter = "\n".join(opt_filter_terms) 68 69 # bind( REPLACE(str(?entryUri), ".*(\\d{4})", "$1") AS ?entryYear). 70 71 # prefix xsd: <http://www.w3.org/2001/XMLSchema#> 72 # FILTER ( http://www.w3.org/2001/XMLSchema\#integer(?entryYear) >= 2014). 73 # FILTER ( http://www.w3.org/2001/XMLSchema\#integer(?entryYear) <= 2016). 74 75 76 55 77 # how many entry - song pairings are there? 56 euroSparql.setQuery("""57 SELECT (COUNT(DISTINCT *) as ?count) WHERE { 58 GRAPH <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> { 78 count_query_str = """ 79 SELECT (COUNT(DISTINCT *) as ?count) WHERE {{ 80 GRAPH <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {{ 59 81 ?entryUri <http://greenstone.org/gsdlextracted#song_robust_uri> ?songUri . 60 } 61 } 62 """) 82 {0} 83 }} 84 }} 85 """.format(opt_filter) 86 87 88 euroSparql.setQuery(count_query_str) 89 63 90 countResult = euroSparql.query().convert() 64 91 count = int(countResult["results"]["bindings"][0]["count"]["value"]) 65 92 93 print("Number of matching entries: " + str(count)) 94 sleep(3) 95 66 96 # retrieve entry-song pairings and corresponding dbpedia matches IN CHUNKS OF 500 67 97 n=0 68 98 dbpBindings = [] 69 99 while n < count: 70 euroSparql.setQuery("""71 SELECT DISTINCT ?entryUri ?songUri WHERE { 72 graph <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> { 100 query_offset_n_str = """ 101 SELECT DISTINCT ?entryUri ?songUri WHERE {{ 102 graph <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {{ 73 103 ?entryUri <http://greenstone.org/gsdlextracted#song_robust_uri> ?songUri . 74 } 75 } LIMIT 500 OFFSET """ + str(n) + """ 76 """) 104 {0} 105 }} 106 }} LIMIT 500 OFFSET {1} 107 """.format(opt_filter,str(n)) 108 109 euroSparql.setQuery(query_offset_n_str) 77 110 78 111 print("Issuing Greenstone SPARQL query with n=", n) … … 216 249 217 250 parser = argparse.ArgumentParser() 251 252 parser.add_argument('-sy', '--startyear', type=int, help="Limit entries to be from this year and onwards") 253 parser.add_argument('-ey', '--endyear', type=int, help="Limit entries to be up to this year") 254 218 255 parser.add_argument('-mb', '--musicbrainz', action='store_true', help="Query the MusicBrainz API") 219 256 parser.add_argument('-aa', '--audioanalysis', action='store_true', help="Include audio analysis features in output data") … … 224 261 args = parser.parse_args() 225 262 263 start_year = getattr(args,'startyear'); 264 end_year = getattr(args,'endyear'); 265 226 266 query_musicbrainz = getattr(args,'musicbrainz'); 227 267 output_audio_analysis = getattr(args,'audioanalysis'); … … 232 272 233 273 234 dbp_results = get_dbp_results( );274 dbp_results = get_dbp_results(start_year,end_year); 235 275 outcomes = process_dbp_results(dbp_results, query_musicbrainz, output_audio_analysis) 236 276 metadata_records = outcomes[0]
Note:
See TracChangeset
for help on using the changeset viewer.