#!/usr/bin/env python3

import sys
import os

import essentia
import essentia.standard
import essentia.streaming

argc = len(sys.argv)

input_audio_filename     = None
output_features_filename = None

if argc <= 1:
    print("Usage: "+sys.argv[0] +" input_file [output_file]\n",file=sys.stderr)    
    sys.exit(1)
else:
    input_audio_filename    = sys.argv[1]
    if argc == 2:
        output_features_filename = os.path.splitext(input_audio_filename)[0]+'-features.json'
    else:
        output_features_filename = sys.argv[2]

# Prepare to process the file        
loader = essentia.standard.MonoLoader(filename=input_audio_filename)

audio = loader()


#from pylab import plot, show, figure, imshow
#%matplotlib inline
#import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = (15, 6) # set plot sizes to something larger than default
#
#plot(audio[1*44100:2*44100])
#plt.title("This is how the 2nd second of this audio looks like:")
#show() # unnecessary if you started "ipython --pylab"


from essentia.standard import *
w = Windowing(type = 'hann')
spectrum = Spectrum()  # FFT() would return the complex FFT, here we just want the magnitude spectrum
mfcc = MFCC()

logNorm = UnaryOperator(type='log')

mfccs = []
melbands = []
melbands_log = []

print("Processing Frames")

for frame in FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
    mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
    mfccs.append(mfcc_coeffs)
    melbands.append(mfcc_bands)
    melbands_log.append(logNorm(mfcc_bands))

# transpose to have it in a better shape
# we need to convert the list to an essentia.array first (== numpy.array of floats)
mfccs = essentia.array(mfccs).T
melbands = essentia.array(melbands).T
melbands_log = essentia.array(melbands_log).T


## and plot
#imshow(melbands[:,:], aspect = 'auto', origin='lower', interpolation='none')
#plt.title("Mel band spectral energies in frames")
#show()

#imshow(melbands_log[:,:], aspect = 'auto', origin='lower', interpolation='none')
#plt.title("Log-normalized mel band spectral energies in frames")
#show()

#imshow(mfccs[1:,:], aspect='auto', origin='lower', interpolation='none')
#plt.title("MFCCs in frames")
#show()


pool = essentia.Pool()

for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512, startFromZero=True):
    mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
    pool.add('lowlevel.mfcc', mfcc_coeffs)
    pool.add('lowlevel.mfcc_bands', mfcc_bands)
    pool.add('lowlevel.mfcc_bands_log', logNorm(mfcc_bands))

#imshow(pool['lowlevel.mfcc_bands'].T, aspect = 'auto', origin='lower', interpolation='none')
#plt.title("Mel band spectral energies in frames")
#show()

#imshow(pool['lowlevel.mfcc_bands_log'].T, aspect = 'auto', origin='lower', interpolation='none')
#plt.title("Log-normalized mel band spectral energies in frames")
#show()

#imshow(pool['lowlevel.mfcc'].T[1:,:], aspect='auto', origin='lower', interpolation='none')
#plt.title("MFCCs in frames")
#show()


output = YamlOutput(filename = output_features_filename, format = 'json' ) # otherwise defaults to YAML (.sig)
output(pool)