1 | #!/usr/bin/python
|
---|
2 |
|
---|
3 | import json
|
---|
4 | import os
|
---|
5 | import os.path
|
---|
6 | import urllib
|
---|
7 | import sys
|
---|
8 |
|
---|
9 |
|
---|
10 | chunk_size=200
|
---|
11 |
|
---|
12 | base_url = "http://api.jamendo.com/v3.0/tracks/"
|
---|
13 | base_url_args = base_url + "?client_id=54cc3f68&format=jsonpretty&audioformat=mp32&audiodlformat=flac&ccnc=true&ccsa=true"
|
---|
14 |
|
---|
15 | # OK, there are also the include fields 'licenses' and 'lyrics' but for GCUX, there are currently considered unnecessary
|
---|
16 |
|
---|
17 | base_url_args = base_url_args + "&include=musicinfo+stats"
|
---|
18 |
|
---|
19 | chunk_url = base_url_args + "&limit=" + str(chunk_size)
|
---|
20 |
|
---|
21 | output_dir = "download-json-all"
|
---|
22 |
|
---|
23 | if not os.path.isdir(output_dir):
|
---|
24 | print "Creating directory: " + output_dir
|
---|
25 | os.mkdir(output_dir)
|
---|
26 |
|
---|
27 |
|
---|
28 | more_to_download = 1
|
---|
29 | offset = 0
|
---|
30 | chunk_count = 0
|
---|
31 |
|
---|
32 | error_count = 0
|
---|
33 |
|
---|
34 | #171800 (example of an argv[1] values?)
|
---|
35 | #
|
---|
36 | # but note, as currently written, the code makes use of 'offset' but not the derived 'chunk_offset'
|
---|
37 |
|
---|
38 |
|
---|
39 | if (len(sys.argv)==2):
|
---|
40 | offset=argv[1]
|
---|
41 | chunk_offset=offset//chunk_size
|
---|
42 |
|
---|
43 | while (more_to_download) :
|
---|
44 |
|
---|
45 | output_filename = os.path.join(output_dir ,"nc-sa-chunk-{0:03d}.json".format(chunk_count))
|
---|
46 |
|
---|
47 | if os.path.isfile(output_filename):
|
---|
48 | print "Skipping Offset = " + str(offset) + " as downloaded file already exists"
|
---|
49 | offset += chunk_size;
|
---|
50 | chunk_count += 1;
|
---|
51 | continue
|
---|
52 |
|
---|
53 | download_url = chunk_url + "&offset=" + str(offset)
|
---|
54 | print "Downloading: " + download_url
|
---|
55 | download_url_handle = urllib.urlopen(download_url)
|
---|
56 |
|
---|
57 | json_data = download_url_handle.read()
|
---|
58 |
|
---|
59 | with open(output_filename, "w") as json_ofile:
|
---|
60 | json_ofile.write(json_data)
|
---|
61 | json_ofile.close()
|
---|
62 |
|
---|
63 | try:
|
---|
64 | data = json.loads(json_data)
|
---|
65 |
|
---|
66 | headers = data[u'headers']
|
---|
67 | print " Status: " + headers[u"status"]
|
---|
68 |
|
---|
69 | results_count = headers[u'results_count']
|
---|
70 |
|
---|
71 |
|
---|
72 | offset += results_count
|
---|
73 |
|
---|
74 | if results_count != chunk_size:
|
---|
75 | more_to_download = 0
|
---|
76 |
|
---|
77 |
|
---|
78 | except:
|
---|
79 | print "Warning: failed to process Offset = " + str(offset) + " (chunk = " + str(chunk_count) + ")"
|
---|
80 | print "Assuming failed block was standard size (" + str(chunk_size) + ")"
|
---|
81 | offset += chunk_size
|
---|
82 | error_count = error_count + 1
|
---|
83 |
|
---|
84 | if error_count >= 10:
|
---|
85 | more_to_download = 0
|
---|
86 |
|
---|
87 |
|
---|
88 | chunk_count += 1
|
---|
89 |
|
---|
90 | print "====="
|
---|
91 | print "Retrieved {0} Non-Copyright Share-Alike tracks from Jamendo".format(offset)
|
---|
92 | print "====="
|
---|