"""
Extraction of spectral information.
"""
from decimal import Decimal
import numpy as np
from deprecated.sphinx import deprecated
from librosa.filters import mel
from librosa.core.convert import mel_frequencies
from librosa import cqt, hybrid_cqt, cqt_frequencies
from iracema.util.windowing import apply_sliding_window
from iracema.util import conversion
import iracema.core.timeseries
[docs]class STFT(iracema.core.timeseries.TimeSeries):
"Compute the Short-Time Fourier Transform for the ``time_series``."
def __init__(self, time_series, window_size, hop_size, fft_len=None):
"""
Args
----
time_series : TimeSeries
Time series for applying the STFT.
window_size : int
hop_size : int
fft_len : int
Length of the FFT. The signal will be zero-padded if ``fft_len`` >
``window_size``. The default value is equal to `window_size`.
"""
if not fft_len:
fft_len = window_size
def calculate(x):
return np.fft.rfft(x, n=fft_len, norm='ortho')
stft_data = apply_sliding_window(
time_series.data,
window_size,
hop_size,
calculate,
window_name='hann')
new_fs = Decimal(time_series.fs) / Decimal(hop_size)
super(STFT, self).__init__(
new_fs,
data=stft_data,
start_time=time_series.start_time,
caption=time_series.caption)
self.max_frequency = float(time_series.nyquist)
self.frequencies = np.fft.rfftfreq(fft_len, 1. / float(time_series.fs))
self.label = 'STFT'
self.unit = 'complex'
[docs] def magnitude(self, power=2., db=False):
if float(power) not in (1., 2.):
raise ValueError(
'The argument `power` must be equal to 1.0 or 2.0')
magnitude = np.abs(self.data)**power
if db:
if power == 1.0:
magnitude = conversion.amplitude_to_db(magnitude)
elif power == 2.0:
magnitude = conversion.energy_to_db(magnitude)
return magnitude
[docs] def phase(self):
return np.angle(self.data)
[docs]class Spectrogram(iracema.core.timeseries.TimeSeries):
"Generate spectrogram for the given `time_series`."
def __init__(self,
time_series,
window_size,
hop_size,
fft_len=None,
power=2.,
db=False):
"""
Args
----
time_series : TimeSeries
Time series for applying the STFT.
window_size : int
hop_size : int
fft_len : int
Length of the FFT. The signal will be zero-padded if ``fft_len`` >
``window_size``. The default value is equal to `window_size`.
power : float
Exponent for the spectrogram.
db : bool
Whether or not to convert the output values to dB.
"""
stft = STFT(time_series, window_size, hop_size, fft_len=fft_len)
data = stft.magnitude(power=power, db=db)
super(Spectrogram, self).__init__(
stft.fs,
data=data,
start_time=stft.start_time,
caption=stft.caption)
self.max_frequency = stft.max_frequency
self.frequencies = stft.frequencies
self.label = 'Spectrogram'
self.unit = 'Magnitude'
self._power = power
self._db = db
[docs]class MelSpectrogram(iracema.core.timeseries.TimeSeries):
def __init__(self,
time_series,
window_size,
hop_size,
n_mels=256,
fft_len=None,
power=2.,
db=False,
fmin=0.,
fmax=None,
htk=False):
"""
Compute a mel spectrogram for ``time_series``.
Args
----
time_series : TimeSeries
Time series for applying the STFT.
window_size : int
hop_size : int
n_mels : int
Number of mel-scaled filters/channels.
fft_len : int
Length of the FFT. The signal will be zero-padded if ``fft_len`` >
``window_size``. The default value is equal to `window_size`.
power : float
Exponent for the spectrogram.
db : bool
Whether or not to convert the output values to dB.
fmin : float
Frequency of the lowest filter.
fmax : float
Frequency of the highest filter.
htk : bool
"""
if not fft_len:
fft_len = window_size
spec = Spectrogram(
time_series,
window_size,
hop_size,
fft_len=fft_len,
power=power,
db=False)
fmax = fmax or spec.max_frequency
mel_basis = mel(
time_series.fs, fft_len, n_mels=n_mels, fmin=fmin, fmax=fmax)
data = np.dot(mel_basis, spec.data)
if db:
if power == 1.0:
data = conversion.amplitude_to_db(data)
elif power == 2.0:
data = conversion.energy_to_db(data)
super(MelSpectrogram, self).__init__(
spec.fs,
data=data,
start_time=spec.start_time,
caption=spec.caption)
self.frequencies = mel_frequencies(
n_mels=n_mels, fmin=fmin, fmax=fmax, htk=htk)
self.max_frequency = spec.frequencies[-1]
self.label = 'Mel Spectrogram'
self.unit = 'Magnitude'
self._power = power
self._db = db
[docs]class CQT(iracema.core.timeseries.TimeSeries):
def __init__(self,
time_series,
hop_size,
n_bins=84,
bins_per_octave=12,
hybrid=True,
power=2.,
fmin=None,
db=False,
**kwargs):
"""
Compute the Constant Q Transform for ``time_series``.
Args
----
time_series : TimeSeries
Time series for applying the STFT.
window_size : int
hop_size : int
fft_len : int
Length of the FFT. The signal will be zero-padded if ``fft_len`` >
``window_size``. The default value is equal to `window_size`.
power : float
Exponent for the spectrogram.
db : bool
Whether or not to convert the output values to dB.
n_mels : int
Number of mel-scaled filters/channels.
fmin : float
Frequency of the lowest filter.
fmax : float
Frequency of the highest filter.
htk : bool
"""
new_fs = Decimal(time_series.fs) / Decimal(hop_size)
if hybrid:
cqt_func = hybrid_cqt
else:
cqt_func = cqt
data = np.abs(
cqt_func(time_series.data,
sr=float(time_series.fs),
hop_length=hop_size,
n_bins=n_bins,
bins_per_octave=bins_per_octave,
fmin=fmin,
**kwargs
)
)
if db:
if power == 1.0:
data = conversion.amplitude_to_db(data)
elif power == 2.0:
data = conversion.energy_to_db(data)
frequencies = cqt_frequencies(n_bins,
fmin,
bins_per_octave=bins_per_octave,
tuning=kwargs.get('tuning') or 0)
super(CQT, self).__init__(
new_fs,
data=data,
start_time=time_series.start_time,
caption=time_series.caption)
self.frequencies = frequencies
self.max_frequency = float(frequencies[-1])
self.label = 'CQT'
self.unit = 'Magnitude'
self._power = power
self._db = db
[docs]@deprecated(version='0.2.0', reason='Deprecated method. Use `STFT` instead.')
def fft(*args, **kwargs):
"Deprecated FFT method."
return STFT(*args, **kwargs)