source: other-projects/mirex/grand-challenge/generate-jamendo-dataset/scripts-2015/get-audio-download-mp3.py@ 30005

Last change on this file since 30005 was 30005, checked in by davidb, 9 years ago

Initial cut at scripts for generating a 'boosted' ggenre set of files

  • Property svn:executable set to *
File size: 1.2 KB
RevLine 
[30005]1#!/usr/bin/python
2
3import os
4import os.path
5
6import json
7import sys
8
9#input_dir = sys.argv[1] if len(sys.argv)==2 else "download-json"
10input_dir = sys.argv[1] if len(sys.argv)==2 else "download-json-musicinfo"
11
12output_dir = "download-audio"
13
14json_files = [ jf for jf in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir,jf)) ]
15
16for jf in json_files:
17 json_filename = os.path.join(input_dir,jf)
18 json_data=open(json_filename).read()
19 data = json.loads(json_data)
20
21 for rec in data[u'results']:
22
23 id = int(rec[u'id'])
24 prefix = id//1000
25
26 output_prefix_dir = os.path.join(output_dir,str(prefix));
27 if not os.path.isdir(output_prefix_dir):
28 print "Creating " + output_prefix_dir
29 os.mkdir(output_prefix_dir)
30
31 output_filename = os.path.join(output_prefix_dir, str(id) + ".mp3")
32
33 if not os.path.isfile(output_filename):
34 audio_download_url = rec[u'audio']
35
36 cmd = "wget -O " + output_filename + " \"" + audio_download_url + "\""
37
38 print "Downloading:" +cmd
39 os.system(cmd)
40 else:
41 print "Already downloaded: " + output_filename + " => skipping"
42
Note: See TracBrowser for help on using the repository browser.