Context Navigation

get-nc-sa-all.py@ 30006

Last change on this file since 30006 was 30006, checked in by davidb, 9 years ago
Changed to use 'true' rather than '1' which is what the API desclares the type of this attribute to be
Property svn:executable set to ``*
File size: 2.3 KB

Line
1	#!/usr/bin/python
2
3	import json
4	import os
5	import os.path
6	import urllib
7	import sys
8
9
10	chunk_size=200
11
12	base_url = "http://api.jamendo.com/v3.0/tracks/"
13	base_url_args = base_url + "?client_id=54cc3f68&format=jsonpretty&audioformat=mp32&audiodlformat=flac&ccnc=true&ccsa=true"
14
15	# OK, there are also the include fields 'licenses' and 'lyrics' but for GCUX, there are currently considered unnecessary
16
17	base_url_args = base_url_args + "&include=musicinfo+stats"
18
19	chunk_url = base_url_args + "&limit=" + str(chunk_size)
20
21	output_dir = "download-json-all"
22
23	if not os.path.isdir(output_dir):
24	print "Creating directory: " + output_dir
25	os.mkdir(output_dir)
26
27
28	more_to_download = 1
29	offset = 0
30	chunk_count = 0
31
32	error_count = 0
33
34	#171800 (example of an argv[1] values?)
35	#
36	# but note, as currently written, the code makes use of 'offset' but not the derived 'chunk_offset'
37
38
39	if (len(sys.argv)==2):
40	offset=argv[1]
41	chunk_offset=offset//chunk_size
42
43	while (more_to_download) :
44
45	output_filename = os.path.join(output_dir ,"nc-sa-chunk-{0:03d}.json".format(chunk_count))
46
47	if os.path.isfile(output_filename):
48	print "Skipping Offset = " + str(offset) + " as downloaded file already exists"
49	offset += chunk_size;
50	chunk_count += 1;
51	continue
52
53	download_url = chunk_url + "&offset=" + str(offset)
54	print "Downloading: " + download_url
55	download_url_handle = urllib.urlopen(download_url)
56
57	json_data = download_url_handle.read()
58
59	with open(output_filename, "w") as json_ofile:
60	json_ofile.write(json_data)
61	json_ofile.close()
62
63	try:
64	data = json.loads(json_data)
65
66	headers = data[u'headers']
67	print " Status: " + headers[u"status"]
68
69	results_count = headers[u'results_count']
70
71
72	offset += results_count
73
74	if results_count != chunk_size:
75	more_to_download = 0
76
77
78	except:
79	print "Warning: failed to process Offset = " + str(offset) + " (chunk = " + str(chunk_count) + ")"
80	print "Assuming failed block was standard size (" + str(chunk_size) + ")"
81	offset += chunk_size
82	error_count = error_count + 1
83
84	if error_count >= 10:
85	more_to_download = 0
86
87
88	chunk_count += 1
89
90	print "====="
91	print "Retrieved {0} Non-Copyright Share-Alike tracks from Jamendo".format(offset)
92	print "====="

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/mirex/grand-challenge/generate-jamendo-dataset/scripts-2015/get-nc-sa-all.py@ 30006

Download in other formats: