[30005] | 1 | #!/usr/bin/python
|
---|
| 2 |
|
---|
| 3 | import json
|
---|
| 4 | import os
|
---|
| 5 | import os.path
|
---|
| 6 | import urllib
|
---|
| 7 | import sys
|
---|
| 8 |
|
---|
| 9 |
|
---|
| 10 | chunk_size=200
|
---|
| 11 |
|
---|
| 12 | base_url = "http://api.jamendo.com/v3.0/tracks/"
|
---|
| 13 | base_url_args = base_url + "?client_id=54cc3f68&format=json&audioformat=mp32&audiodlformat=flac&ccnc=1&ccsa=1"
|
---|
| 14 |
|
---|
| 15 | # OK, there are also the include fields 'licenses' and 'lyrics' but for GCUX, there are currently considered unnecessary
|
---|
| 16 |
|
---|
| 17 | base_url_args = base_url_args + "&include=musicinfo+stats"
|
---|
| 18 |
|
---|
| 19 | chunk_url = base_url_args + "&limit=" + str(chunk_size)
|
---|
| 20 |
|
---|
| 21 | output_dir = "download-json-all"
|
---|
| 22 |
|
---|
| 23 | if not os.path.isdir(output_dir):
|
---|
| 24 | print "Creating directory: " + output_dir
|
---|
| 25 | os.mkdir(output_dir)
|
---|
| 26 |
|
---|
| 27 |
|
---|
| 28 | more_to_download = 1
|
---|
| 29 | offset = 0
|
---|
| 30 | chunk_count = 0
|
---|
| 31 |
|
---|
| 32 | error_count = 0
|
---|
| 33 |
|
---|
| 34 | #171800 (example of an argv[1] values?)
|
---|
| 35 | #
|
---|
| 36 | # but note, as currently written, the code makes use of 'offset' but not the derived 'chunk_offset'
|
---|
| 37 |
|
---|
| 38 |
|
---|
| 39 | if (len(sys.argv)==2):
|
---|
| 40 | offset=argv[1]
|
---|
| 41 | chunk_offset=offset//chunk_size
|
---|
| 42 |
|
---|
| 43 | while (more_to_download) :
|
---|
| 44 |
|
---|
| 45 | output_filename = os.path.join(output_dir ,"nc-sa-chunk-{0:03d}.json".format(chunk_count))
|
---|
| 46 |
|
---|
| 47 | if os.path.isfile(output_filename):
|
---|
| 48 | print "Skipping Offset = " + str(offset) + " as downloaded file already exists"
|
---|
| 49 | offset += chunk_size;
|
---|
| 50 | chunk_count += 1;
|
---|
| 51 | continue
|
---|
| 52 |
|
---|
| 53 | download_url = chunk_url + "&offset=" + str(offset)
|
---|
| 54 | print "Downloading: " + download_url
|
---|
| 55 | download_url_handle = urllib.urlopen(download_url)
|
---|
| 56 |
|
---|
| 57 | json_data = download_url_handle.read()
|
---|
| 58 |
|
---|
| 59 | with open(output_filename, "w") as json_ofile:
|
---|
| 60 | json_ofile.write(json_data)
|
---|
| 61 | json_ofile.close()
|
---|
| 62 |
|
---|
| 63 | try:
|
---|
| 64 | data = json.loads(json_data)
|
---|
| 65 |
|
---|
| 66 | headers = data[u'headers']
|
---|
| 67 | print " Status: " + headers[u"status"]
|
---|
| 68 |
|
---|
| 69 | results_count = headers[u'results_count']
|
---|
| 70 |
|
---|
| 71 |
|
---|
| 72 | offset += results_count
|
---|
| 73 |
|
---|
| 74 | if results_count != chunk_size:
|
---|
| 75 | more_to_download = 0
|
---|
| 76 |
|
---|
| 77 |
|
---|
| 78 | except:
|
---|
| 79 | print "Warning: failed to process Offset = " + str(offset) + " (chunk = " + str(chunk_count) + ")"
|
---|
| 80 | print "Assuming failed block was standard size (" + str(chunk_size) + ")"
|
---|
| 81 | offset += chunk_size
|
---|
| 82 | error_count = error_count + 1
|
---|
| 83 |
|
---|
| 84 | if error_count >= 10:
|
---|
| 85 | more_to_download = 0
|
---|
| 86 |
|
---|
| 87 |
|
---|
| 88 | chunk_count += 1
|
---|
| 89 |
|
---|
| 90 | print "====="
|
---|
| 91 | print "Retrieved {0} Non-Copyright Share-Alike tracks from Jamendo".format(offset)
|
---|
| 92 | print "====="
|
---|