1 | #!/usr/bin/env python
|
---|
2 |
|
---|
3 | from __future__ import absolute_import
|
---|
4 | from __future__ import division
|
---|
5 | from __future__ import print_function
|
---|
6 | from __future__ import with_statement
|
---|
7 |
|
---|
8 |
|
---|
9 | import csv
|
---|
10 | import re
|
---|
11 | from collections import OrderedDict
|
---|
12 |
|
---|
13 | def csv_load_features_to_dict(csv_filename):
|
---|
14 | csv_features_file = open(csv_filename,"r")
|
---|
15 |
|
---|
16 | try:
|
---|
17 | csv_features_reader = csv.reader(csv_features_file, delimiter=',', quotechar='"')
|
---|
18 |
|
---|
19 | csv_features_dict = {}
|
---|
20 |
|
---|
21 | line_count = 0
|
---|
22 | header_row = None
|
---|
23 |
|
---|
24 | for row in csv_features_reader:
|
---|
25 | if line_count == 0:
|
---|
26 | row.pop(0)
|
---|
27 | header_row = row
|
---|
28 | else:
|
---|
29 | full_file_id = row[0]
|
---|
30 |
|
---|
31 | pat = re.compile('^import/(\\d+).json$')
|
---|
32 | mat = pat.match(full_file_id)
|
---|
33 | song_id = int(mat.group(1))
|
---|
34 |
|
---|
35 | row.pop(0)
|
---|
36 | row_ordered_dict = OrderedDict()
|
---|
37 |
|
---|
38 | for i in range(0, len(header_row)):
|
---|
39 | field = header_row[i];
|
---|
40 | value = row[i];
|
---|
41 |
|
---|
42 | row_ordered_dict[field] = value
|
---|
43 |
|
---|
44 | csv_features_dict[song_id] = row_ordered_dict
|
---|
45 |
|
---|
46 | line_count += 1
|
---|
47 |
|
---|
48 | finally:
|
---|
49 | csv_features_file.close()
|
---|
50 | return csv_features_dict
|
---|
51 |
|
---|
52 | def csv_load_groundtruth_to_dict(csv_filename):
|
---|
53 |
|
---|
54 | # Written in the 'with' style that automatically closes the open()
|
---|
55 | with open(csv_filename,"r") as csv_file:
|
---|
56 | csv_reader = csv.reader(csv_file, delimiter=',')
|
---|
57 |
|
---|
58 | csv_gt_dict = {}
|
---|
59 |
|
---|
60 | line_count = 0
|
---|
61 | header_row = None
|
---|
62 |
|
---|
63 | for row in csv_reader:
|
---|
64 | if line_count == 0:
|
---|
65 | row.pop(0)
|
---|
66 | header_row = row
|
---|
67 | else:
|
---|
68 | song_id = int(row[0])
|
---|
69 | row.pop(0)
|
---|
70 |
|
---|
71 | row_ordered_dict = OrderedDict()
|
---|
72 |
|
---|
73 | for i in range(0, len(row)):
|
---|
74 | field = header_row[i];
|
---|
75 | value = row[i];
|
---|
76 |
|
---|
77 | row_ordered_dict[field] = value
|
---|
78 |
|
---|
79 | csv_gt_dict[song_id] = row_ordered_dict
|
---|
80 |
|
---|
81 | line_count += 1
|
---|
82 |
|
---|
83 | return csv_gt_dict
|
---|
84 |
|
---|
85 |
|
---|
86 | def print_songkey_dict(message,songkey_dict,limit=3):
|
---|
87 | print()
|
---|
88 | print("****")
|
---|
89 | print("* " + message + " (Sample limit = " + str(limit) + "):")
|
---|
90 | print("****")
|
---|
91 |
|
---|
92 | i = 1
|
---|
93 | for song_id_key, ordered_vals in sorted(songkey_dict.items()):
|
---|
94 | print("Song id " + str(song_id_key) + ": \n" + str(ordered_vals))
|
---|
95 | if i >= limit:
|
---|
96 | break
|
---|
97 | i = i + 1
|
---|
98 |
|
---|
99 | def add_groundtruth_to_features(songkey_features_dict,gt_label,songkey_gt_dict,gt_field):
|
---|
100 | new_features_label = gt_label + "_" + gt_field
|
---|
101 |
|
---|
102 | # Doesn't need to be sorted, but tidier this way
|
---|
103 | for song_id_key, ordered_feature_vals in sorted(songkey_features_dict.items()):
|
---|
104 | songkey_features_dict[song_id_key][new_features_label] = songkey_gt_dict[song_id_key][gt_field]
|
---|
105 |
|
---|
106 |
|
---|
107 | def ordered_dict_features_to_list(songkey_combined_dict):
|
---|
108 |
|
---|
109 | features_list = []
|
---|
110 |
|
---|
111 | header = []
|
---|
112 |
|
---|
113 | # Set up the header labels
|
---|
114 | first_key = next(iter(songkey_combined_dict))
|
---|
115 | first_ordered_dict = songkey_combined_dict[first_key]
|
---|
116 |
|
---|
117 | for feature_key in first_ordered_dict:
|
---|
118 | header.append(feature_key)
|
---|
119 |
|
---|
120 | features_list.append(header)
|
---|
121 |
|
---|
122 | # Now move on to processing each of the song_key_id entries
|
---|
123 | #
|
---|
124 | # Doesn't need to be sorted, but tidier this way
|
---|
125 |
|
---|
126 | for song_id_key, ordered_combined_vals in sorted(songkey_combined_dict.items()):
|
---|
127 |
|
---|
128 | row = []
|
---|
129 |
|
---|
130 | for feature_key in ordered_combined_vals:
|
---|
131 | feature_val = ordered_combined_vals[feature_key]
|
---|
132 |
|
---|
133 | row.append(feature_val)
|
---|
134 |
|
---|
135 |
|
---|
136 | features_list.append(row)
|
---|
137 |
|
---|
138 | return features_list
|
---|
139 |
|
---|
140 |
|
---|
141 | def csv_save_combined_features(csv_filename,combined_features_list):
|
---|
142 | csv_features_file = open(csv_filename,"w",newline="")
|
---|
143 | csv_features_writer = csv.writer(csv_features_file, delimiter=',', quotechar='"')
|
---|
144 |
|
---|
145 | csv_features_writer.writerows(combined_features_list);
|
---|
146 |
|
---|
147 |
|
---|
148 |
|
---|
149 | csv_input_filename = 'etc/deam-essentia-features-collated.csv'
|
---|
150 | csv_output_filename = 'etc/deam-essentia-features-arousal-valence.csv'
|
---|
151 |
|
---|
152 | groundtruth_dir = 'prepare/annotations/annotations averaged per song/dynamic (per second annotations)'
|
---|
153 | arousal_csv_filename = groundtruth_dir + '/arousal.csv'
|
---|
154 | valence_csv_filename = groundtruth_dir + '/valence.csv'
|
---|
155 |
|
---|
156 | # Essentia Features extracted: 21 seconds in, for 6 second block
|
---|
157 | # This equates to DEAM ground-trugh date values:
|
---|
158 | # [sample_21000ms,sample_27000ms)
|
---|
159 | #
|
---|
160 | # => Take the ground-trugh value at 'sample_26500' as the culmination of
|
---|
161 | # the affect the music had on users participating in the experiment
|
---|
162 |
|
---|
163 | gt_field='sample_26500ms'
|
---|
164 |
|
---|
165 |
|
---|
166 | csv_features_dict = csv_load_features_to_dict(csv_input_filename)
|
---|
167 |
|
---|
168 | arousal_groundtruth_dict = csv_load_groundtruth_to_dict(arousal_csv_filename)
|
---|
169 | valence_groundtruth_dict = csv_load_groundtruth_to_dict(valence_csv_filename)
|
---|
170 |
|
---|
171 | print_songkey_dict("Arousal Ground-truth",arousal_groundtruth_dict)
|
---|
172 | print_songkey_dict("Valence Ground-truth",valence_groundtruth_dict)
|
---|
173 |
|
---|
174 | add_groundtruth_to_features(csv_features_dict,"arousal",arousal_groundtruth_dict,gt_field)
|
---|
175 | add_groundtruth_to_features(csv_features_dict,"valence",valence_groundtruth_dict,gt_field)
|
---|
176 |
|
---|
177 | print_songkey_dict("Essentia Features",csv_features_dict,1)
|
---|
178 |
|
---|
179 | combined_features_list = ordered_dict_features_to_list(csv_features_dict)
|
---|
180 |
|
---|
181 |
|
---|
182 | print()
|
---|
183 | print("****")
|
---|
184 | print("* Saving combined CSV data to: " + csv_output_filename)
|
---|
185 | print("****")
|
---|
186 | csv_save_combined_features(csv_output_filename,combined_features_list)
|
---|
187 |
|
---|