source: other-projects/mirex/grand-challenge/generate-jamendo-dataset/scripts-2015/generate-csv-musicinfo-tags.py@ 30007

Last change on this file since 30007 was 30005, checked in by davidb, 9 years ago

Initial cut at scripts for generating a 'boosted' ggenre set of files

  • Property svn:executable set to *
File size: 2.3 KB
Line 
1#!/usr/bin/python
2
3from os import listdir
4from os.path import isfile, join
5
6import json
7import sys
8import signal
9
10def tabulate_results():
11 print "===="
12 print "Number of records processed " + str(jid_count)
13 print "===="
14
15 ofd_freq = open(join(output_dir,"genre-tags-freq.csv"), "w")
16
17 print "Generating ..."
18 print " Genre Tags Frequency Distribution"
19 print " (one track can be given assigned multiple genres)"
20
21 print >> ofd_freq, "Genre tag,Frequency count"
22
23 for gk in genre_table.keys():
24 print >> ofd_freq, "{0:20}".format(gk) + "," + str(genre_table[gk])
25
26 ofd_freq.close();
27
28 ofd_distrib = open(join(output_dir, "genre-tags-distribution-count.csv"), "w")
29
30 print >> ofd_distrib, "Number of Genre Tags Assigned per Track"
31 print >> ofd_distrib, "Num of tags,Frequency count"
32 for gk in sorted(genre_table_tagcount.keys()):
33 print >> ofd_distrib, gk + "," + str(genre_table_tagcount[gk])
34
35 ofd_distrib.close();
36
37
38def trap_error(signal, frame):
39 print >> sys.stderr, "Ctrl-C Caught!"
40 tabulate_results()
41 sys.exit(0)
42
43
44
45def processTagset(jid,tagrec):
46
47 # genres, instruments, vartags
48
49 genres = tagrec[u'genres']
50 num_genres = len(genres)
51
52 num_genres_str = str(num_genres)
53 if not genre_table_tagcount.has_key(num_genres_str):
54 genre_table_tagcount[num_genres_str] = 0
55 genre_table_tagcount[num_genres_str] += 1
56
57 for g in genres:
58 if not genre_table.has_key(g):
59 genre_table[g] = 0
60 genre_table[g] += 1
61
62argc = len(sys.argv)
63
64input_dir = sys.argv[1] if (argc==2) or (argc==3) else "download-json-all"
65output_dir = sys.argv[2] if argc==3 else "csv-data"
66
67json_files = [ jf for jf in listdir(input_dir) if isfile(join(input_dir,jf)) ]
68
69
70genre_table = {}
71genre_table_tagcount = {}
72
73jid_count = 0
74
75signal.signal(signal.SIGINT, trap_error)
76
77
78for jf in json_files:
79 print "Processing" + jf
80
81 json_filename = join(input_dir,jf)
82 ifd = open(json_filename)
83 json_data = ifd.read()
84 data = json.loads(json_data)
85
86# if jid_count>100:
87# break
88
89 for rec in data[u'results']:
90
91 jid = rec[u'id']
92 jid_count += 1
93
94 musicinfo = rec[u'musicinfo']
95 tags = musicinfo[u'tags']
96 processTagset(jid,tags)
97
98 ifd.close()
99
100
101tabulate_results()
Note: See TracBrowser for help on using the repository browser.