source: gs3-extensions/mars-src/trunk/bin/script/json_to_csv.py@ 34427

Last change on this file since 34427 was 34427, checked in by davidb, 4 years ago

Brought across from Essentia source, and preped for use from the commandline as part of the preparation step of working with DEAM dataset

  • Property svn:executable set to *
File size: 4.7 KB
Line 
1#!/usr/bin/env python
2
3# Taken from essentia-full-git/src/examples/python/json_to_csv.py
4# /usr/bin/env line above added
5# chmod a+x added
6
7import sys, json, csv
8from fnmatch import fnmatch
9from argparse import ArgumentParser
10
11JSON_FILENAME = 'json_file_name'
12
13def isMatch(name, patterns):
14 if not patterns:
15 return False
16 for pattern in patterns:
17 if fnmatch(name, pattern):
18 return True
19 return False
20
21
22def parse_descriptors(d, include=None, ignore=None):
23 results = {}
24
25 stack = [(k, k, v) for k, v in d.items()]
26 while stack:
27 name, k, v = stack.pop()
28 if isinstance(v, dict):
29 stack.extend([(name + '.' + k1, k1, v1) for k1, v1 in v.items()])
30 elif isinstance(v, list):
31 stack.extend([(name + '.' + str(i), i, v[i]) for i in range(len(v))])
32 else:
33 if include:
34 # 'include' flag specified => apply both include and ignore
35 if isMatch(name, include) and not isMatch(name, ignore):
36 results[name] = v
37 else:
38 # 'include' flag not specified => apply only ignore
39 if not isMatch(name, ignore):
40 results[name] = v
41
42 return results
43
44
45def convert(json_file, include, ignore):
46 print ('Converting %s' % json_file)
47 data = json.load(open(json_file, 'r'))
48
49 return parse_descriptors(data, include, ignore)
50
51def convert_all(json_files, csv_file, include=None, ignore=None, add_filename=True):
52
53 with open(csv_file, 'w') as f_csv:
54 print("Writing to %s" % csv_file)
55 writer = csv.writer(f_csv,
56 delimiter=',',
57 quotechar='"',
58 quoting=csv.QUOTE_NONNUMERIC)
59 header = None
60
61 for f_json in json_files:
62 d = convert(f_json, include, ignore)
63
64 if add_filename:
65 if JSON_FILENAME in d:
66 print("Error appending json filename to the CSV: `%s` name is already used." % JSON_FILENAME)
67 sys.exit()
68 else:
69 d[JSON_FILENAME] = f_json
70
71 if not header:
72 header = sorted(d.keys())
73 if not len(header):
74 print("Error: no descriptors found to be written.")
75 sys.exit()
76 writer.writerow(header)
77
78 try:
79 if len(d.keys()) != len(header):
80 raise Exception()
81 raw = [d[h] for h in header]
82 except Exception:
83 print("Error: Incompatible descriptor layouts")
84 print("Layout difference:")
85 print(list(set(header).symmetric_difference(set(d.keys()))))
86 sys.exit()
87
88 writer.writerow(raw)
89
90
91 # TODO: Currently, the same descriptor layout is required for all
92 # input files (after filtering)
93 # Make alternative version that
94 # - gathers a list of all descriptors found in input files
95 # - creates a CSV based on such a list, so that files with
96 # different descriptor layouts can be merged into the same CSV
97
98 return
99
100
101if __name__ == '__main__':
102 parser = ArgumentParser(description = """
103Converts a bunch of descriptor files from json to csv format.
104Descriptor trees are flattened, with additional indices added to descriptor
105names in the case of lists or nested lists
106(for example: {'group': {'name': [[1,2,3], [4,5,6]]}} will be mapped to descriptor names
107'group.name.0.0', 'group.name.0.1', 'group.name.0.2', 'group.name.1.0', 'group.name 1.1', 'group.name 1.2').
108Descriptors can then be included/ignored by their flattened names using wildcards.
109After flattening and filtering, all inputs are expected to have exactly the same set
110of descriptor names to be able to merge them into one csv.
111""")
112
113 parser.add_argument('-i', '--input', nargs='+', help='Input JSON files', required=True)
114 parser.add_argument('-o', '--output', help='Output CSV file', required=True)
115
116 parser.add_argument('--include', nargs='+', help='Descriptors to include (can use wildcards)', required=False)
117 parser.add_argument('--ignore', nargs='+', help='Descriptors to ignore (can use wildcards)', required=False)
118
119 parser.add_argument('--add-filename', help='Add input filenames to "%s" field in CSV' % JSON_FILENAME, action='store_true', required=False)
120
121 args = parser.parse_args()
122
123 if args.include and args.ignore and not set(args.include).isdisjoint(args.ignore):
124 print('You cannot specify the same descriptor patterns in both --include and --ignore flags')
125 sys.exit()
126
127 convert_all(args.input, args.output, args.include, args.ignore, args.add_filename)
Note: See TracBrowser for help on using the repository browser.