#!/usr/bin/python import json import os import os.path import urllib import sys base_url = "http://api.jamendo.com/v3.0/tracks/" base_url_args = base_url + "?client_id=54cc3f68&format=jsonpretty&audioformat=mp32&audiodlformat=flac" # OK, there are also the include fields 'licenses' and 'lyrics' but for GCUX, there are currently considered unnecessary base_url_args = base_url_args + "&include=musicinfo+stats" argc = len(sys.argv) input_file = sys.argv[1] if (argc==2) or (argc==3) else "jamendo-evened-out-10000-dataset-trackids.json" output_dir = sys.argv[2] if argc==3 else "download-json-evened-out" if not os.path.isdir(output_dir): print "Creating directory: " + output_dir os.mkdir(output_dir) evened_out_json_data=open(input_file).read() evened_out_jamendo_ids=json.loads(evened_out_json_data) for jid in evened_out_jamendo_ids: output_filename = os.path.join(output_dir ,jid + ".json") if os.path.isfile(output_filename): print " Skipping Track ID " + jid + " as downloaded JSON file already exists" continue download_url = base_url_args + "&id=" + jid print "Downloading: " + download_url download_url_handle = urllib.urlopen(download_url) json_data = download_url_handle.read() with open(output_filename, "w") as json_ofile: json_ofile.write(json_data) json_ofile.close()