Context Navigation

source: main/trunk/model-sites-dev/mars/collect/deam/MERGE-CSV-FEATURES-AND-AV-FILES.py@ 34440

Last change on this file since 34440 was 34440, checked in by davidb, 4 years ago
Version that outputs the merged CSV data into a new file
Property svn:executable set to ``*
File size: 5.1 KB

Line
1	#!/usr/bin/env python
2
3	from __future__ import absolute_import
4	from __future__ import division
5	from __future__ import print_function
6
7	import csv
8	import re
9	from collections import OrderedDict
10
11	def csv_features_to_dict(csv_filename):
12	csv_features_file = open(csv_filename,"r")
13	csv_features_reader = csv.reader(csv_features_file, delimiter=',', quotechar='"')
14
15	csv_features_dict = {}
16
17	line_count = 0
18	header_row = None
19
20	for row in csv_features_reader:
21	if line_count == 0:
22	row.pop(0)
23	header_row = row
24	else:
25	full_file_id = row[0]
26
27	pat = re.compile('^import/(\\d+).json$')
28	mat = pat.match(full_file_id)
29	song_id = int(mat.group(1))
30
31	row.pop(0)
32	row_ordered_dict = OrderedDict()
33
34	for i in range(0, len(header_row)):
35	field = header_row[i];
36	value = row[i];
37
38	row_ordered_dict[field] = value
39
40	csv_features_dict[song_id] = row_ordered_dict
41
42	line_count += 1
43
44	return csv_features_dict
45
46	def csv_groundtruth_to_dict(csv_filename):
47
48	csv_file = open(csv_filename)
49	csv_reader = csv.reader(csv_file, delimiter=',')
50
51	csv_gt_dict = {}
52
53	line_count = 0
54	header_row = None
55
56	for row in csv_reader:
57	if line_count == 0:
58	row.pop(0)
59	header_row = row
60	else:
61	song_id = int(row[0])
62	row.pop(0)
63
64	row_ordered_dict = OrderedDict()
65
66	for i in range(0, len(row)):
67	field = header_row[i];
68	value = row[i];
69
70	row_ordered_dict[field] = value
71
72	csv_gt_dict[song_id] = row_ordered_dict
73
74	line_count += 1
75
76	return csv_gt_dict
77
78
79	def print_songkey_dict(message,songkey_dict,limit=3):
80	print()
81	print("****")
82	print("* " + message + " (Sample limit = " + str(limit) + "):")
83	print("****")
84
85	i = 1
86	for song_id_key, ordered_vals in sorted(songkey_dict.items()):
87	print("Song id " + str(song_id_key) + ": \n" + str(ordered_vals))
88	if i >= limit:
89	break
90	i = i + 1
91
92	def add_groundtruth_to_features(songkey_features_dict,gt_label,songkey_gt_dict,gt_field):
93	new_features_label = gt_label + "_" + gt_field
94
95	# Doesn't need to be sorted, but tidier this way
96	for song_id_key, ordered_feature_vals in sorted(songkey_features_dict.items()):
97	songkey_features_dict[song_id_key][new_features_label] = songkey_gt_dict[song_id_key][gt_field]
98
99
100	def ordered_dict_features_to_array(songkey_combined_dict):
101
102	features_array = []
103
104	header = []
105
106	# Set up the header labels
107	first_key = next(iter(songkey_combined_dict))
108	first_ordered_dict = songkey_combined_dict[first_key]
109
110	for feature_key in first_ordered_dict:
111	header.append(feature_key)
112
113	features_array.append(header)
114
115	# Now move on to processing each of the song_key_id entries
116	#
117	# Doesn't need to be sorted, but tidier this way
118
119	for song_id_key, ordered_combined_vals in sorted(songkey_combined_dict.items()):
120
121	row = []
122
123	for feature_key in ordered_combined_vals:
124	feature_val = ordered_combined_vals[feature_key]
125
126	row.append(feature_val)
127
128
129	features_array.append(row)
130
131	return features_array
132
133
134	def csv_save_combined_features(csv_filename,combined_features_array):
135	csv_features_file = open(csv_filename,"w",newline="")
136	csv_features_writer = csv.writer(csv_features_file, delimiter=',', quotechar='"')
137
138	csv_features_writer.writerows(combined_features_array);
139
140
141
142	csv_input_filename = 'etc/deam-essentia-features-collated.csv'
143	csv_output_filename = 'etc/deam-essentia-features-arousal-valence.csv'
144
145	groundtruth_dir = 'prepare/annotations/annotations averaged per song/dynamic (per second annotations)'
146	arousal_csv_filename = groundtruth_dir + '/arousal.csv'
147	valence_csv_filename = groundtruth_dir + '/valence.csv'
148
149	# Essentia Features extracted: 21 seconds in, for 6 second block
150	# This equates to DEAM ground-trugh date values:
151	# [sample_21000ms,sample_27000ms)
152	#
153	# => Take the ground-trugh value at 'sample_26500' as the culmination of
154	# the affect the music had on users participating in the experiment
155
156	gt_field='sample_26500ms'
157
158
159	csv_features_dict = csv_features_to_dict(csv_input_filename)
160
161	arousal_groundtruth_dict = csv_groundtruth_to_dict(arousal_csv_filename)
162	valence_groundtruth_dict = csv_groundtruth_to_dict(valence_csv_filename)
163
164	print_songkey_dict("Arousal Ground-truth",arousal_groundtruth_dict)
165	print_songkey_dict("Valence Ground-truth",valence_groundtruth_dict)
166
167	add_groundtruth_to_features(csv_features_dict,"arousal",arousal_groundtruth_dict,gt_field)
168	add_groundtruth_to_features(csv_features_dict,"valence",valence_groundtruth_dict,gt_field)
169
170	print_songkey_dict("Essentia Features",csv_features_dict,1)
171
172	combined_features_array = ordered_dict_features_to_array(csv_features_dict)
173
174
175	print()
176	print("****")
177	print("* Saving combined CSV data to: " + csv_output_filename)
178	print("****")
179	csv_save_combined_features(csv_output_filename,combined_features_array)
180

Note: See TracBrowser for help on using the repository browser.

Download in other formats: