1 | #!/usr/bin/python
|
---|
2 |
|
---|
3 | from os import listdir
|
---|
4 | from os.path import isfile, join
|
---|
5 |
|
---|
6 | import json
|
---|
7 | import sys
|
---|
8 | import signal
|
---|
9 |
|
---|
10 | def tabulate_results():
|
---|
11 | print "===="
|
---|
12 | print "Number of records processed " + str(jid_count)
|
---|
13 | print "===="
|
---|
14 |
|
---|
15 | ofd_freq = open(join(output_dir,"genre-tags-freq.csv"), "w")
|
---|
16 |
|
---|
17 | print "Generating ..."
|
---|
18 | print " Genre Tags Frequency Distribution"
|
---|
19 | print " (one track can be given assigned multiple genres)"
|
---|
20 |
|
---|
21 | print >> ofd_freq, "Genre tag,Frequency count"
|
---|
22 |
|
---|
23 | for gk in genre_table.keys():
|
---|
24 | print >> ofd_freq, "{0:20}".format(gk) + "," + str(genre_table[gk])
|
---|
25 |
|
---|
26 | ofd_freq.close();
|
---|
27 |
|
---|
28 | ofd_distrib = open(join(output_dir, "genre-tags-distribution-count.csv"), "w")
|
---|
29 |
|
---|
30 | print >> ofd_distrib, "Number of Genre Tags Assigned per Track"
|
---|
31 | print >> ofd_distrib, "Num of tags,Frequency count"
|
---|
32 | for gk in sorted(genre_table_tagcount.keys()):
|
---|
33 | print >> ofd_distrib, gk + "," + str(genre_table_tagcount[gk])
|
---|
34 |
|
---|
35 | ofd_distrib.close();
|
---|
36 |
|
---|
37 |
|
---|
38 | def trap_error(signal, frame):
|
---|
39 | print >> sys.stderr, "Ctrl-C Caught!"
|
---|
40 | tabulate_results()
|
---|
41 | sys.exit(0)
|
---|
42 |
|
---|
43 |
|
---|
44 |
|
---|
45 | def processTagset(jid,tagrec):
|
---|
46 |
|
---|
47 | # genres, instruments, vartags
|
---|
48 |
|
---|
49 | genres = tagrec[u'genres']
|
---|
50 | num_genres = len(genres)
|
---|
51 |
|
---|
52 | num_genres_str = str(num_genres)
|
---|
53 | if not genre_table_tagcount.has_key(num_genres_str):
|
---|
54 | genre_table_tagcount[num_genres_str] = 0
|
---|
55 | genre_table_tagcount[num_genres_str] += 1
|
---|
56 |
|
---|
57 | for g in genres:
|
---|
58 | if not genre_table.has_key(g):
|
---|
59 | genre_table[g] = 0
|
---|
60 | genre_table[g] += 1
|
---|
61 |
|
---|
62 | argc = len(sys.argv)
|
---|
63 |
|
---|
64 | input_dir = sys.argv[1] if (argc==2) or (argc==3) else "download-json-all"
|
---|
65 | output_dir = sys.argv[2] if argc==3 else "csv-data"
|
---|
66 |
|
---|
67 | json_files = [ jf for jf in listdir(input_dir) if isfile(join(input_dir,jf)) ]
|
---|
68 |
|
---|
69 |
|
---|
70 | genre_table = {}
|
---|
71 | genre_table_tagcount = {}
|
---|
72 |
|
---|
73 | jid_count = 0
|
---|
74 |
|
---|
75 | signal.signal(signal.SIGINT, trap_error)
|
---|
76 |
|
---|
77 |
|
---|
78 | for jf in json_files:
|
---|
79 | print "Processing" + jf
|
---|
80 |
|
---|
81 | json_filename = join(input_dir,jf)
|
---|
82 | ifd = open(json_filename)
|
---|
83 | json_data = ifd.read()
|
---|
84 | data = json.loads(json_data)
|
---|
85 |
|
---|
86 | # if jid_count>100:
|
---|
87 | # break
|
---|
88 |
|
---|
89 | for rec in data[u'results']:
|
---|
90 |
|
---|
91 | jid = rec[u'id']
|
---|
92 | jid_count += 1
|
---|
93 |
|
---|
94 | musicinfo = rec[u'musicinfo']
|
---|
95 | tags = musicinfo[u'tags']
|
---|
96 | processTagset(jid,tags)
|
---|
97 |
|
---|
98 | ifd.close()
|
---|
99 |
|
---|
100 |
|
---|
101 | tabulate_results()
|
---|