#%%
"""
Store audio record.
"""
from typing import AnyStr
import numpy as np
from wavesongs.utils.paths import ProjDirs
from wavesongs.utils.tools import envelope, is_notebook
from maad.sound import write, normalize
from IPython.display import Audio
from os.path import (
basename,
normpath
)
from librosa import (
stft,
reassigned_spectrogram,
amplitude_to_db,
fft_frequencies,
times_like,
yin,
load,
pyin
)
from maad.sound import normalize
from typing import (
Optional,
Tuple,
AnyStr,
Dict,
TypeVar
)
Syllable = TypeVar('Syllable')
Song = TypeVar('Song')
DataFrame = TypeVar('pandas.core.frame.DataFrame')
[docs]
class Song:
"""
Store a song and its properties in a class
Parameters
----------
proj_dirs : ProjDirs
file_id : str
Name or id of the audio sample
tlim : tuple
Time range
flim : tuple
Frequency range
sr : int
Sample rate
info : dict
Audio metadata.
id : str = "song"
Attributes
----------
Example
-------
>>>
"""
[docs]
def __init__(
self,
proj_dirs: ProjDirs,
file_id: AnyStr,
tlim: Tuple[float] = (0, 60),
flim: Tuple[float] = (1e3, 2e4),
sr: int = 44100,
info: Dict = {},
id: AnyStr = "song"
):
self.proj_dirs = proj_dirs
self.file_id = file_id
self.tlim = tlim
self.info = info
self.flim = flim
self.file_path = proj_dirs.find_audio(file_id)
self.file_name = basename(normpath(self.file_path))
if self.proj_dirs.CATALOG:
self.info = proj_dirs.data.iloc[0].to_dict()
s, sr = load(self.file_path, sr=sr, mono=True)
self.id = id
self.sr = sr
# croping the audio in the range tlim
s = s[int(self.tlim[0]*self.sr):int(self.tlim[1]*self.sr)]
self.t0 = self.tlim[0]
self.t0_bs = self.tlim[0]
self.s = normalize(s, max_amp=1.0)
[docs]
def acoustical_features(
self,
llambda: float = 1.,
NN: int = 1024,
overlap: float = 0.5,
center: bool = False,
umbral_FF: float = 1.05,
ff_method: AnyStr = 'yin',
Nt: int = 100,
n_mfcc: int = 4,
n_mels: int = 4,
stft_window: AnyStr = "hann",
tlim: Optional[Tuple[float]] = None,
flim: Optional[Tuple[float]] = None
) -> None:
"""
Coputing acoustical tempo-spectral variables
Parameters
----------
llambda : float
NN : int
overlap : float
center : bool = False
umbral_FF : int
ff_method : str
Nt : int
n_mfcc : int
stft_window : str
Return
------
None
Examples
--------
>>>
"""
if tlim is not None: self.tlim = tlim
if flim is not None: self.flim = flim
self.n_mfcc = n_mfcc
self.n_mels = n_mels
self.stft_window = stft_window
self.llambda = llambda
self.center = center
self.umbral = 0.05
self.umbral_FF = umbral_FF
self.ff_method = ff_method
self.NN = NN
self.Nt = Nt
self.center = center
self.overlap = overlap
self.win_length = self.NN//2
self.hop_length = self.NN//4
self.no_overlap = int(overlap*self.NN)
self.time_s = np.linspace(0, len(self.s)/self.sr, len(self.s))
self.envelope = envelope(self.s, self.sr, Nt=self.Nt)
# Short-time Fourier transform (STFT)
self.stft = stft(y=self.s,
n_fft=self.NN,
hop_length=self.hop_length,
win_length=self.NN,
window=self.stft_window,
center=self.center,
dtype=None,
pad_mode='constant')
# Time-frequency reassigned spectrogram
freqs, times, mags = reassigned_spectrogram(
self.s,
sr=self.sr,
S=self.stft,
n_fft=self.NN,
hop_length=self.hop_length,
win_length=self.win_length,
window=self.stft_window,
center=self.center,
reassign_frequencies=True,
reassign_times=True,
ref_power=1e-06,
fill_nan=True,
clip=True,
dtype=None,
pad_mode ='constant'
)
self.freqs = freqs
self.times = times
self.Sxx = mags
self.Sxx_dB = amplitude_to_db(mags, ref=np.max)
self.freq = fft_frequencies(sr=self.sr, n_fft=self.NN)
self.time = times_like(X=self.stft,
sr=self.sr,
hop_length=self.hop_length,
n_fft=self.NN) #, axis=-1
# put in origin the time
self.time -= self.time[0]
# method to calculate fundamental frequency
self.FF = yin(self.s,
fmin=self.flim[0],
fmax=self.flim[1],
sr=self.sr,
frame_length=self.NN,
win_length=self.win_length,
hop_length=self.hop_length,
trough_threshold=self.umbral_FF,
center=self.center,
pad_mode='constant')
[docs]
def play(self) -> Audio:
"""
Parameters
----------
Return
------
Example
-------
>>>
"""
if is_notebook():
return Audio(data=self.s, rate=self.sr)
else:
raise Exception("Not implemented yet!")
#%%
[docs]
def write_audio(self, bit_depth: int = 16) -> None:
"""
Parameters
----------
Return
------
Examples
--------
>>>
"""
audio_name = f'{self.file_name[:-4]}-{self.id}.wav'
path_name = self.proj_dirs.examples / audio_name
write(filename=path_name, fs=self.sr, data=self.s, bit_depth=bit_depth)
print(f"Audio saved at {path_name}.")
#%%
# def Syllable(self, no_syllable, NN=1024):
# self.no_syllable = no_syllable
# ss = self.syllables[self.no_syllable-1] # syllable indexes
# self.syll_complet = self.s[ss] # audios syllable
# self.time_syllable = self.time_s[ss]
# self.t0 = self.time_syllable[0]
# self.syllable = Syllable(self, tlim=(self.time_syllable[0], self.time_syllable[-1]), flim=self.flim, NN=NN, file_name=self.file_name+"synth")
# self.syllable.no_syllable = self.no_syllable
# self.syllable.file_name = self.file_name
# self.syllable.state = self.state
# self.syllable.country = self.country
# self.syllable.no_file = self.no_file
# self.syllable.proj_dirs = self.proj_dirs
# self.syllable.id = "syllable"
# self.SylInd.append([[no_syllable], [ss]])
# fraction = self.syll_complet.size/1024
# Nt_new = int(((fraction%1)/fraction+1)*1024)
# self.chuncks = wave2frames(self.syll_complet, Nt=Nt_new)
# self.times_chun = wave2frames(self.time_syllable, Nt=Nt_new)
# self.no_chuncks = len(self.chuncks)
# return self.syllable
# #%%
# def SyntheticSyllable(self):
# self.s_synth = np.empty_like(self.s)
# for i in range(self.syllables.size):
# self.s_synth[self.SylInd[i][1]] = self.syllables[i]
#%%
# #%%
# def Set(self, p_array):
# self.p["a0"].set(value=p_array[0])
# self.p["a1"].set(value=p_array[1])
# self.p["a2"].set(value=p_array[2])
# self.p["b0"].set(value=p_array[3])
# self.p["b1"].set(value=p_array[4])
# self.p["b2"].set(value=p_array[5])
# #%%
# def Syllables(self, method="freq"):
# if method=="amplitud":
# supra = np.where(self.envelope > self.umbral)[0]
# candidates = np.split(supra, np.where(np.diff(supra) != 1)[0]+1)
# return [x for x in candidates if len(x) > 2*self.NN]
# elif method=="freq":
# # ss = np.where((self.FF < self.flim[1]) & (self.FF>self.flim[0])) # filter frequency
# # ff_t = self.time[ss] # cleaning timeFF
# # FF_new = self.FF[ss] # cleaning FF
# # FF_dif = np.abs(np.diff(FF_new)) # find where is it cutted
# # # alternative form with pandas
# df = pd.DataFrame(data={"FF":self.FF, "time":self.time})
# q = df["FF"].quantile(0.99)
# df[df["FF"] < q]
# q_low, q_hi = df["FF"].quantile(0.1), df["FF"].quantile(0.99)
# df_filtered = df[(df["FF"] < q_hi) & (df["FF"] > q_low)]
# ff_t = self.time[df_filtered["FF"].index]
# FF_new = self.FF[df_filtered["FF"].index]
# FF_dif = np.abs(np.diff(FF_new))
# # plt.plot(self.FF, 'o'); plt.plot(df_filtered["FF"], 'o')
# peaks, _ = find_peaks(FF_dif, distance=10, height=500) # FF_dif
# syl = [np.arange(peaks[i]+1,peaks[i+1]) for i in range(len(peaks)-1)]
# syl = [np.arange(0,peaks[0])]+syl+[np.arange(peaks[-1]+1,len(ff_t))]
# syl_intervals = np.array([[ff_t[s][0], ff_t[s][-1]] for s in syl])
# indexes = np.int64(self.sr*syl_intervals)
# indexes = [np.arange(ind[0],ind[1],1) for ind in indexes]
# return [x for x in indexes if len(x) > 2*self.NN]
# elif "maad":
# im_bin = create_mask(self.Sxx_dB, bin_std=1.5, bin_per=0.5, mode='relative')