1 | #!/usr/bin/env python
|
---|
2 |
|
---|
3 | # Taken from essentia-full-git/src/examples/python/json_to_csv.py
|
---|
4 | # /usr/bin/env line above added
|
---|
5 | # chmod a+x added
|
---|
6 |
|
---|
7 | import sys, json, csv
|
---|
8 | from fnmatch import fnmatch
|
---|
9 | from argparse import ArgumentParser
|
---|
10 |
|
---|
11 | JSON_FILENAME = 'json_file_name'
|
---|
12 |
|
---|
13 | def isMatch(name, patterns):
|
---|
14 | if not patterns:
|
---|
15 | return False
|
---|
16 | for pattern in patterns:
|
---|
17 | if fnmatch(name, pattern):
|
---|
18 | return True
|
---|
19 | return False
|
---|
20 |
|
---|
21 |
|
---|
22 | def parse_descriptors(d, include=None, ignore=None):
|
---|
23 | results = {}
|
---|
24 |
|
---|
25 | stack = [(k, k, v) for k, v in d.items()]
|
---|
26 | while stack:
|
---|
27 | name, k, v = stack.pop()
|
---|
28 | if isinstance(v, dict):
|
---|
29 | stack.extend([(name + '.' + k1, k1, v1) for k1, v1 in v.items()])
|
---|
30 | elif isinstance(v, list):
|
---|
31 | stack.extend([(name + '.' + str(i), i, v[i]) for i in range(len(v))])
|
---|
32 | else:
|
---|
33 | if include:
|
---|
34 | # 'include' flag specified => apply both include and ignore
|
---|
35 | if isMatch(name, include) and not isMatch(name, ignore):
|
---|
36 | results[name] = v
|
---|
37 | else:
|
---|
38 | # 'include' flag not specified => apply only ignore
|
---|
39 | if not isMatch(name, ignore):
|
---|
40 | results[name] = v
|
---|
41 |
|
---|
42 | return results
|
---|
43 |
|
---|
44 |
|
---|
45 | def convert(json_file, include, ignore):
|
---|
46 | print ('Converting %s' % json_file)
|
---|
47 | data = json.load(open(json_file, 'r'))
|
---|
48 |
|
---|
49 | return parse_descriptors(data, include, ignore)
|
---|
50 |
|
---|
51 | def convert_all(json_files, csv_file, include=None, ignore=None, add_filename=True):
|
---|
52 |
|
---|
53 | with open(csv_file, 'w') as f_csv:
|
---|
54 | print("Writing to %s" % csv_file)
|
---|
55 | writer = csv.writer(f_csv,
|
---|
56 | delimiter=',',
|
---|
57 | quotechar='"',
|
---|
58 | quoting=csv.QUOTE_NONNUMERIC)
|
---|
59 | header = None
|
---|
60 |
|
---|
61 | for f_json in json_files:
|
---|
62 | d = convert(f_json, include, ignore)
|
---|
63 |
|
---|
64 | if add_filename:
|
---|
65 | if JSON_FILENAME in d:
|
---|
66 | print("Error appending json filename to the CSV: `%s` name is already used." % JSON_FILENAME)
|
---|
67 | sys.exit()
|
---|
68 | else:
|
---|
69 | d[JSON_FILENAME] = f_json
|
---|
70 |
|
---|
71 | if not header:
|
---|
72 | header = sorted(d.keys())
|
---|
73 | if not len(header):
|
---|
74 | print("Error: no descriptors found to be written.")
|
---|
75 | sys.exit()
|
---|
76 | writer.writerow(header)
|
---|
77 |
|
---|
78 | try:
|
---|
79 | if len(d.keys()) != len(header):
|
---|
80 | raise Exception()
|
---|
81 | raw = [d[h] for h in header]
|
---|
82 | except Exception:
|
---|
83 | print("Error: Incompatible descriptor layouts")
|
---|
84 | print("Layout difference:")
|
---|
85 | print(list(set(header).symmetric_difference(set(d.keys()))))
|
---|
86 | sys.exit()
|
---|
87 |
|
---|
88 | writer.writerow(raw)
|
---|
89 |
|
---|
90 |
|
---|
91 | # TODO: Currently, the same descriptor layout is required for all
|
---|
92 | # input files (after filtering)
|
---|
93 | # Make alternative version that
|
---|
94 | # - gathers a list of all descriptors found in input files
|
---|
95 | # - creates a CSV based on such a list, so that files with
|
---|
96 | # different descriptor layouts can be merged into the same CSV
|
---|
97 |
|
---|
98 | return
|
---|
99 |
|
---|
100 |
|
---|
101 | if __name__ == '__main__':
|
---|
102 | parser = ArgumentParser(description = """
|
---|
103 | Converts a bunch of descriptor files from json to csv format.
|
---|
104 | Descriptor trees are flattened, with additional indices added to descriptor
|
---|
105 | names in the case of lists or nested lists
|
---|
106 | (for example: {'group': {'name': [[1,2,3], [4,5,6]]}} will be mapped to descriptor names
|
---|
107 | 'group.name.0.0', 'group.name.0.1', 'group.name.0.2', 'group.name.1.0', 'group.name 1.1', 'group.name 1.2').
|
---|
108 | Descriptors can then be included/ignored by their flattened names using wildcards.
|
---|
109 | After flattening and filtering, all inputs are expected to have exactly the same set
|
---|
110 | of descriptor names to be able to merge them into one csv.
|
---|
111 | """)
|
---|
112 |
|
---|
113 | parser.add_argument('-i', '--input', nargs='+', help='Input JSON files', required=True)
|
---|
114 | parser.add_argument('-o', '--output', help='Output CSV file', required=True)
|
---|
115 |
|
---|
116 | parser.add_argument('--include', nargs='+', help='Descriptors to include (can use wildcards)', required=False)
|
---|
117 | parser.add_argument('--ignore', nargs='+', help='Descriptors to ignore (can use wildcards)', required=False)
|
---|
118 |
|
---|
119 | parser.add_argument('--add-filename', help='Add input filenames to "%s" field in CSV' % JSON_FILENAME, action='store_true', required=False)
|
---|
120 |
|
---|
121 | args = parser.parse_args()
|
---|
122 |
|
---|
123 | if args.include and args.ignore and not set(args.include).isdisjoint(args.ignore):
|
---|
124 | print('You cannot specify the same descriptor patterns in both --include and --ignore flags')
|
---|
125 | sys.exit()
|
---|
126 |
|
---|
127 | convert_all(args.input, args.output, args.include, args.ignore, args.add_filename)
|
---|