Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

json_to_csv.py@ 34427

Last change on this file since 34427 was 34427, checked in by davidb, 4 years ago
Brought across from Essentia source, and preped for use from the commandline as part of the preparation step of working with DEAM dataset
Property svn:executable set to ``*
File size: 4.7 KB

Line
1	#!/usr/bin/env python
2
3	# Taken from essentia-full-git/src/examples/python/json_to_csv.py
4	# /usr/bin/env line above added
5	# chmod a+x added
6
7	import sys, json, csv
8	from fnmatch import fnmatch
9	from argparse import ArgumentParser
10
11	JSON_FILENAME = 'json_file_name'
12
13	def isMatch(name, patterns):
14	if not patterns:
15	return False
16	for pattern in patterns:
17	if fnmatch(name, pattern):
18	return True
19	return False
20
21
22	def parse_descriptors(d, include=None, ignore=None):
23	results = {}
24
25	stack = [(k, k, v) for k, v in d.items()]
26	while stack:
27	name, k, v = stack.pop()
28	if isinstance(v, dict):
29	stack.extend([(name + '.' + k1, k1, v1) for k1, v1 in v.items()])
30	elif isinstance(v, list):
31	stack.extend([(name + '.' + str(i), i, v[i]) for i in range(len(v))])
32	else:
33	if include:
34	# 'include' flag specified => apply both include and ignore
35	if isMatch(name, include) and not isMatch(name, ignore):
36	results[name] = v
37	else:
38	# 'include' flag not specified => apply only ignore
39	if not isMatch(name, ignore):
40	results[name] = v
41
42	return results
43
44
45	def convert(json_file, include, ignore):
46	print ('Converting %s' % json_file)
47	data = json.load(open(json_file, 'r'))
48
49	return parse_descriptors(data, include, ignore)
50
51	def convert_all(json_files, csv_file, include=None, ignore=None, add_filename=True):
52
53	with open(csv_file, 'w') as f_csv:
54	print("Writing to %s" % csv_file)
55	writer = csv.writer(f_csv,
56	delimiter=',',
57	quotechar='"',
58	quoting=csv.QUOTE_NONNUMERIC)
59	header = None
60
61	for f_json in json_files:
62	d = convert(f_json, include, ignore)
63
64	if add_filename:
65	if JSON_FILENAME in d:
66	print("Error appending json filename to the CSV: `%s` name is already used." % JSON_FILENAME)
67	sys.exit()
68	else:
69	d[JSON_FILENAME] = f_json
70
71	if not header:
72	header = sorted(d.keys())
73	if not len(header):
74	print("Error: no descriptors found to be written.")
75	sys.exit()
76	writer.writerow(header)
77
78	try:
79	if len(d.keys()) != len(header):
80	raise Exception()
81	raw = [d[h] for h in header]
82	except Exception:
83	print("Error: Incompatible descriptor layouts")
84	print("Layout difference:")
85	print(list(set(header).symmetric_difference(set(d.keys()))))
86	sys.exit()
87
88	writer.writerow(raw)
89
90
91	# TODO: Currently, the same descriptor layout is required for all
92	# input files (after filtering)
93	# Make alternative version that
94	# - gathers a list of all descriptors found in input files
95	# - creates a CSV based on such a list, so that files with
96	# different descriptor layouts can be merged into the same CSV
97
98	return
99
100
101	if __name__ == '__main__':
102	parser = ArgumentParser(description = """
103	Converts a bunch of descriptor files from json to csv format.
104	Descriptor trees are flattened, with additional indices added to descriptor
105	names in the case of lists or nested lists
106	(for example: {'group': {'name': [[1,2,3], [4,5,6]]}} will be mapped to descriptor names
107	'group.name.0.0', 'group.name.0.1', 'group.name.0.2', 'group.name.1.0', 'group.name 1.1', 'group.name 1.2').
108	Descriptors can then be included/ignored by their flattened names using wildcards.
109	After flattening and filtering, all inputs are expected to have exactly the same set
110	of descriptor names to be able to merge them into one csv.
111	""")
112
113	parser.add_argument('-i', '--input', nargs='+', help='Input JSON files', required=True)
114	parser.add_argument('-o', '--output', help='Output CSV file', required=True)
115
116	parser.add_argument('--include', nargs='+', help='Descriptors to include (can use wildcards)', required=False)
117	parser.add_argument('--ignore', nargs='+', help='Descriptors to ignore (can use wildcards)', required=False)
118
119	parser.add_argument('--add-filename', help='Add input filenames to "%s" field in CSV' % JSON_FILENAME, action='store_true', required=False)
120
121	args = parser.parse_args()
122
123	if args.include and args.ignore and not set(args.include).isdisjoint(args.ignore):
124	print('You cannot specify the same descriptor patterns in both --include and --ignore flags')
125	sys.exit()
126
127	convert_all(args.input, args.output, args.include, args.ignore, args.add_filename)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: gs3-extensions/mars-src/trunk/bin/script/json_to_csv.py@ 34427

Download in other formats: