root/other-projects/mirex/grand-challenge/generate-jamendo-dataset/scripts-2015/get-audio-download-mp3.py @ 30005

Revision 30005, 1.2 KB (checked in by davidb, 5 years ago)

Initial cut at scripts for generating a 'boosted' ggenre set of files

  • Property svn:executable set to *
Line 
1#!/usr/bin/python
2
3import os
4import os.path
5
6import json
7import sys
8
9#input_dir = sys.argv[1] if len(sys.argv)==2 else "download-json"
10input_dir = sys.argv[1] if len(sys.argv)==2 else "download-json-musicinfo"
11
12output_dir = "download-audio"
13
14json_files = [ jf for jf in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir,jf)) ]
15
16for jf in json_files:
17    json_filename = os.path.join(input_dir,jf)
18    json_data=open(json_filename).read()
19    data = json.loads(json_data)
20
21    for rec in data[u'results']:
22
23        id = int(rec[u'id'])
24        prefix = id//1000
25
26        output_prefix_dir = os.path.join(output_dir,str(prefix));
27        if not os.path.isdir(output_prefix_dir):
28            print "Creating " + output_prefix_dir
29            os.mkdir(output_prefix_dir)
30
31        output_filename = os.path.join(output_prefix_dir, str(id) + ".mp3")
32
33        if not os.path.isfile(output_filename):
34            audio_download_url = rec[u'audio']
35             
36            cmd = "wget -O " + output_filename +  " \"" + audio_download_url + "\""
37
38            print "Downloading:" +cmd
39            os.system(cmd)
40        else:
41            print "Already downloaded: " + output_filename + " => skipping"
42
Note: See TracBrowser for help on using the browser.