source: gs3-extensions/mars-src/trunk/lib/python/pessentia.py@ 34360

Last change on this file since 34360 was 34360, checked in by davidb, 4 years ago

Collating of python essensia custom scripts and essentia perl plugin code

  • Property svn:executable set to *
File size: 3.0 KB
Line 
1#!/usr/bin/env python3
2
3import sys
4import os
5
6import essentia
7import essentia.standard
8import essentia.streaming
9
10argc = len(sys.argv)
11
12input_audio_filename = None
13output_features_filename = None
14
15if argc <= 1:
16 print("Usage: "+sys.argv[0] +" input_file [output_file]\n",file=sys.stderr)
17 sys.exit(1)
18else:
19 input_audio_filename = sys.argv[1]
20 if argc == 2:
21 output_features_filename = os.path.splitext(input_audio_filename)[0]+'-features.json'
22 else:
23 output_features_filename = sys.argv[2]
24
25# Prepare to process the file
26loader = essentia.standard.MonoLoader(filename=input_audio_filename)
27
28audio = loader()
29
30
31#from pylab import plot, show, figure, imshow
32#%matplotlib inline
33#import matplotlib.pyplot as plt
34#plt.rcParams['figure.figsize'] = (15, 6) # set plot sizes to something larger than default
35#
36#plot(audio[1*44100:2*44100])
37#plt.title("This is how the 2nd second of this audio looks like:")
38#show() # unnecessary if you started "ipython --pylab"
39
40
41from essentia.standard import *
42w = Windowing(type = 'hann')
43spectrum = Spectrum() # FFT() would return the complex FFT, here we just want the magnitude spectrum
44mfcc = MFCC()
45
46logNorm = UnaryOperator(type='log')
47
48mfccs = []
49melbands = []
50melbands_log = []
51
52print("Processing Frames")
53
54for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
55 mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
56 mfccs.append(mfcc_coeffs)
57 melbands.append(mfcc_bands)
58 melbands_log.append(logNorm(mfcc_bands))
59
60# transpose to have it in a better shape
61# we need to convert the list to an essentia.array first (== numpy.array of floats)
62mfccs = essentia.array(mfccs).T
63melbands = essentia.array(melbands).T
64melbands_log = essentia.array(melbands_log).T
65
66
67## and plot
68#imshow(melbands[:,:], aspect = 'auto', origin='lower', interpolation='none')
69#plt.title("Mel band spectral energies in frames")
70#show()
71
72#imshow(melbands_log[:,:], aspect = 'auto', origin='lower', interpolation='none')
73#plt.title("Log-normalized mel band spectral energies in frames")
74#show()
75
76#imshow(mfccs[1:,:], aspect='auto', origin='lower', interpolation='none')
77#plt.title("MFCCs in frames")
78#show()
79
80
81pool = essentia.Pool()
82
83for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512, startFromZero=True):
84 mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
85 pool.add('lowlevel.mfcc', mfcc_coeffs)
86 pool.add('lowlevel.mfcc_bands', mfcc_bands)
87 pool.add('lowlevel.mfcc_bands_log', logNorm(mfcc_bands))
88
89#imshow(pool['lowlevel.mfcc_bands'].T, aspect = 'auto', origin='lower', interpolation='none')
90#plt.title("Mel band spectral energies in frames")
91#show()
92
93#imshow(pool['lowlevel.mfcc_bands_log'].T, aspect = 'auto', origin='lower', interpolation='none')
94#plt.title("Log-normalized mel band spectral energies in frames")
95#show()
96
97#imshow(pool['lowlevel.mfcc'].T[1:,:], aspect='auto', origin='lower', interpolation='none')
98#plt.title("MFCCs in frames")
99#show()
100
101
102output = YamlOutput(filename = output_features_filename, format = 'json' ) # otherwise defaults to YAML (.sig)
103output(pool)
104
Note: See TracBrowser for help on using the repository browser.