Changeset 35862 for main/trunk


Ignore:
Timestamp:
2022-01-03T15:45:40+13:00 (2 years ago)
Author:
davidb
Message:

Extended to support startyear and endyear

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/prepare/spotify-musicbrainz/eurosparqlify.py

    r35141 r35862  
    4747    return "http://motools.sourceforge.net/keys/keys.owl#" + keys[key] + modes[mode]
    4848
    49 def get_dbp_results():
     49def get_dbp_results(start_year,end_year):
    5050    euroSparql = SPARQLWrapper(euroEndpoint)
    5151    euroSparql.setReturnFormat(JSON)
     
    5353    dbpSparql.setReturnFormat(JSON)
    5454
     55    opt_filter_terms = []
     56    if (start_year is not None):
     57        opt_filter_terms.append("      FILTER ( <http://www.w3.org/2001/XMLSchema#integer>(?entryYear) >= " + str(start_year) + ") .");
     58
     59    if (end_year is not None):
     60        opt_filter_terms.append("      FILTER ( <http://www.w3.org/2001/XMLSchema#integer>(?entryYear) <= " + str(end_year) + ") .");
     61
     62    opt_filter = ""
     63
     64    if (len(opt_filter_terms) > 0):
     65        opt_filter_terms.insert(0,"bind( REPLACE(str(?entryUri), \".*(\\\\d{4})\", \"$1\") AS ?entryYear).")
     66           
     67        opt_filter = "\n".join(opt_filter_terms)
     68           
     69        # bind( REPLACE(str(?entryUri), ".*(\\d{4})", "$1") AS ?entryYear).
     70
     71        # prefix xsd:    <http://www.w3.org/2001/XMLSchema#>
     72            #    FILTER ( http://www.w3.org/2001/XMLSchema\#integer(?entryYear) >= 2014).
     73            # FILTER ( http://www.w3.org/2001/XMLSchema\#integer(?entryYear) <= 2016).
     74
     75   
     76
    5577    # how many entry - song pairings are there?
    56     euroSparql.setQuery("""
    57       SELECT (COUNT(DISTINCT *) as ?count) WHERE {
    58         GRAPH <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {
     78    count_query_str = """
     79      SELECT (COUNT(DISTINCT *) as ?count) WHERE {{
     80        GRAPH <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {{
    5981          ?entryUri <http://greenstone.org/gsdlextracted#song_robust_uri> ?songUri .
    60         }
    61       }
    62     """)
     82          {0}
     83        }}
     84      }}
     85    """.format(opt_filter)
     86
     87   
     88    euroSparql.setQuery(count_query_str)
     89   
    6390    countResult = euroSparql.query().convert()
    6491    count = int(countResult["results"]["bindings"][0]["count"]["value"])
    6592
     93    print("Number of matching entries: " + str(count))
     94    sleep(3)
     95   
    6696    # retrieve entry-song pairings and corresponding dbpedia matches IN CHUNKS OF 500
    6797    n=0
    6898    dbpBindings = []
    6999    while n < count:
    70         euroSparql.setQuery("""
    71           SELECT DISTINCT ?entryUri ?songUri WHERE {
    72             graph <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {
     100        query_offset_n_str = """
     101          SELECT DISTINCT ?entryUri ?songUri WHERE {{
     102            graph <https://so-we-must-think.space/greenstone3/eurovision-library/collection/eurovision> {{
    73103              ?entryUri <http://greenstone.org/gsdlextracted#song_robust_uri> ?songUri .
    74             }
    75           } LIMIT 500 OFFSET """ + str(n) + """
    76         """)
     104              {0}
     105            }}
     106          }} LIMIT 500 OFFSET {1}
     107        """.format(opt_filter,str(n))
     108       
     109        euroSparql.setQuery(query_offset_n_str)
    77110
    78111        print("Issuing Greenstone SPARQL query with n=", n)
     
    216249
    217250    parser = argparse.ArgumentParser()
     251
     252    parser.add_argument('-sy', '--startyear', type=int, help="Limit entries to be from this year and onwards")
     253    parser.add_argument('-ey', '--endyear',   type=int, help="Limit entries to be up to this year")
     254
    218255    parser.add_argument('-mb', '--musicbrainz',   action='store_true', help="Query the MusicBrainz API")
    219256    parser.add_argument('-aa', '--audioanalysis', action='store_true', help="Include audio analysis features in output data")
     
    224261    args = parser.parse_args()
    225262
     263    start_year = getattr(args,'startyear');
     264    end_year   = getattr(args,'endyear');
     265       
    226266    query_musicbrainz     = getattr(args,'musicbrainz');
    227267    output_audio_analysis = getattr(args,'audioanalysis');
     
    232272
    233273   
    234     dbp_results = get_dbp_results();
     274    dbp_results = get_dbp_results(start_year,end_year);
    235275    outcomes = process_dbp_results(dbp_results, query_musicbrainz, output_audio_analysis)
    236276    metadata_records = outcomes[0]
Note: See TracChangeset for help on using the changeset viewer.