Source code for wavesongs.objs.syllable

"""
Store and compute acoustica features from syllables.
"""
import numpy as np
import pandas as pd

from wavesongs.utils.paths import ProjDirs
from wavesongs.utils.tools import envelope, is_notebook

from wavesongs.models.bird import (
    _PARAMS,
    _Z,
    alpha_beta,
    motor_gestures
)

from maad.sound import write, normalize
from IPython.display import Audio

from os.path import (
    basename,
    normpath
)

from librosa import (
    stft,
    reassigned_spectrogram,
    amplitude_to_db,
    fft_frequencies,
    times_like,
    yin,
    load,
    pyin
)

from librosa.feature import (
    spectral_centroid,
    mfcc,
    rms,
    melspectrogram
)

from maad.sound import normalize
from numpy.linalg import norm
from scipy.interpolate import interp1d

from typing import (
    Optional,
    Tuple,
    List,
    AnyStr,
    Dict,
    Any,
    Union,
    Literal,
    TypeVar
)

Syllable = TypeVar('Syllable')
Song = TypeVar('Song')
DataFrame = TypeVar('pandas.core.frame.DataFrame')
#%%
[docs] class Syllable: #%% """ Object class to store, characterize, and compare syllables. See :func:`~wavesongs.objs.syllable.Syllable.__init__`. :class:`~wavesongs.objs.song.Song` Parameters ---------- proj_dirs : ProjDirs | None Object to manage project directories song : Syllable | Song | None Object params : dict | None Diccionary with all or some constat of the physical model motor gestures tlim : tuple Time range flim : tuple Frequency range sr : int Sample rate no_syllable : int Sylalble number in song id : str Type of the object, "syllable" or "synth-syllable" info : dict Audio metadata about the audio. type : str A short description about the part, theme or trill, and the behaviour of the fundamental frequency: plane, up, down, up-down, down-up, and complex. Template: "{part}-{behaviour}". Example: theme-up Raise ----- asdasd232 Note ---- adasds Warning ------- 12qsdad Examples -------- >>> """ #%%
[docs] def __init__( self, file_id: Optional[AnyStr] = None, proj_dirs: Optional[ProjDirs] = None, obj: Any = None, tlim: Tuple[float] = (0, 60), sr: int = 44100, no_syllable: int = 0, id: AnyStr = "syllable", metadata: Dict = {}, type: AnyStr = "", duration: Optional[int] = None ): """_summary_ Args: file_id (Optional[AnyStr], optional): _description_. Defaults to None. proj_dirs (Optional[ProjDirs], optional): _description_. Defaults to None. obj (Any, optional): _description_. Defaults to None. tlim (Tuple[float], optional): _description_. Defaults to (0, 60). sr (int, optional): _description_. Defaults to 44100. no_syllable (int, optional): _description_. Defaults to 0. id (AnyStr, optional): _description_. Defaults to "syllable". metadata (Dict, optional): _description_. Defaults to {}. type (AnyStr, optional): _description_. Defaults to "". duration (Optional[int], optional): _description_. Defaults to None. Raises: Exception: _description_ """ self.no_syllable = no_syllable self.proj_dirs = proj_dirs self.metadata = metadata self.file_id = file_id self.type = type self.sr = sr self.tlim = tlim self.id = id # defining syllable by songs or file_id with proj_dirs object if (obj is not None) and (file_id is None) and (proj_dirs is None): # self.__dict__.update(obj.__dict__) self.proj_dirs = obj.proj_dirs self.file_name = obj.file_name self.t0_bs = obj.t0_bs + tlim[0] self.info = obj.info self.sr = obj.sr s = obj.s s = s[int(self.tlim[0]*self.sr):int(self.tlim[1]*self.sr)] self.t0 = self.tlim[0] self.s = normalize(s, max_amp=1.0) self.acoustical_features( NN=obj.NN, ff_method=obj.ff_method, umbral_FF=obj.umbral_FF, Nt=obj.Nt, center=obj.center, overlap=obj.overlap, flim=obj.flim, llambda=obj.llambda, n_mfcc=obj.n_mfcc, n_mels=obj.n_mels, stft_window=obj.stft_window ) elif (file_id is not None) and (proj_dirs is not None): self.file_path = proj_dirs.find_audio(file_id) self.file_name = basename(normpath(self.file_path)) s, sr = load(self.file_path, sr=self.sr, mono=True) self.t0_bs = 0 self.sr = sr s = s[int(self.tlim[0]*self.sr):int(self.tlim[1]*self.sr)] self.t0 = self.tlim[0] self.s = normalize(s, max_amp=1.0) elif (obj is None) and (file_id is None) and (duration is not None): self.file_name = "synthetic" self.id = "synth-" +id self.no_syllable = no_syllable self.proj_dirs = proj_dirs self.metadata = metadata self.sr = sr self.type = type self.T = duration self.s = np.ones(int(self.T*sr)) self.t0 = self.t0_bs = 0 else: raise Exception("You have to enter a file_id with a" + " project object or a song or syllable object")
#%%
[docs] def acoustical_features( self, NN: int = 512, ff_method: Literal["yin", "pyin"] = "yin", umbral_FF: int = 1, flim: Tuple[float] = (1e3, 2e4), Nt: int = 10, center: bool = False, overlap: float = 0.5, llambda: float = 1.5, n_mfcc: int = 4, n_mels: int = 4, stft_window: AnyStr = "hann" ) -> None: """ Coputing acoustical tempo-spectral variables Parameters ---------- NN : int llambda : float overlap : float center : bool = False umbral_FF : int ff_method : str Nt : int n_mfcc : int n_mels : int stft_window : str Return ------ None Examples -------- >>> """ self.stft_window = stft_window self.n_mfcc = n_mfcc self.n_mels = n_mels self.flim = flim self.Nt = Nt self.NN = NN self.ff_method = ff_method self.umbral_FF = umbral_FF self.llambda = llambda self.overlap = overlap self.center = center self.envelope = envelope(self.s, self.sr, self.Nt) self.time0 = np.linspace(0, len(self.s)/self.sr, len(self.s)) self.time_s = np.linspace(0, len(self.s)/self.sr, len(self.s)) self.T = self.s.size / self.sr self.t_interval = np.array([self.time_s[0], self.time_s[-1]]) self.t_interval += self.t0_bs self.win_length = self.NN//2 self.hop_length = self.NN//4 self.no_overlap = int(overlap*self.NN) # ------------- ACOUSTIC FEATURES ------------------------------- self.stft = stft(y=self.s, n_fft=self.NN, hop_length=self.hop_length, win_length=self.NN, window=self.stft_window, center=self.center, dtype=float, pad_mode='constant') freqs, times, mags = reassigned_spectrogram( self.s, sr=self.sr, S=self.stft, n_fft=self.NN, hop_length=self.hop_length, win_length=self.win_length, window=self.stft_window, center=self.center, reassign_frequencies=True, reassign_times=True, ref_power=1e-06, fill_nan=True, clip=True, dtype=float, pad_mode='constant' ) self.Sxx_dB = amplitude_to_db(mags, ref=np.max) self.freqs = freqs self.times = times self.Sxx = mags self.FF_coef = np.abs(self.stft) self.freq = fft_frequencies(sr=self.sr, n_fft=self.NN) self.time = times_like(X=self.stft, sr=self.sr, hop_length=self.hop_length, n_fft=self.NN) #, axis=-1 self.time -= self.time[0] self.f_msf = [norm(self.FF_coef[:,i]*self.freq, 1) / norm(self.FF_coef[:,i], 1) for i in range(self.FF_coef.shape[1])] self.f_msf = np.array(self.f_msf) self.centroid = spectral_centroid( y=self.s, sr=self.sr, S=np.abs(self.stft), n_fft=self.NN, hop_length=self.hop_length, freq=self.freqs, win_length=self.win_length, window=self.stft_window, center=self.center, pad_mode='constant' )[0] self.mfccs = mfcc( y=self.s, sr=self.sr, S=self.stft, n_mfcc=self.n_mfcc, dct_type=2, norm='ortho', lifter=0 ) self.rms = rms( y=self.s, S=self.stft, frame_length=self.NN, hop_length=self.hop_length, center=self.center, pad_mode='constant' )[0] self.s_mel = melspectrogram( y=self.sr, sr=self.sr, S=self.stft, n_fft=self.NN, hop_length=self.hop_length, win_length=self.win_length, window=self.stft_window, center=self.center, pad_mode='constant', power=2.0, n_mels=self.n_mels, fmin=self.flim[0], fmax=self.flim[1] ) # # ------------- Fundamental Frequency computing -------------- if self.ff_method=="pyin": self.FF,_,_ = pyin( self.s, fmin=self.flim[0], fmax=self.flim[1], sr=self.sr, frame_length=self.NN, win_length=self.win_length, hop_length=self.hop_length, n_thresholds=100, beta_parameters=(2, 18), boltzmann_parameter=2, resolution=0.1, max_transition_rate=35.92, switch_prob=0.01, no_trough_prob=0.01, fill_na=0, center=self.center, pad_mode='constant' ) elif self.ff_method=="yin": self.FF = yin( self.s, fmin=self.flim[0], fmax=self.flim[1], sr=self.sr, frame_length=self.NN, win_length=self.win_length, hop_length=self.hop_length, center=self.center, trough_threshold=self.umbral_FF, pad_mode='constant' ) elif self.ff_method=="both": self.FF2,_,_ = pyin( self.s, fmin=self.flim[0], fmax=self.flim[1], sr=self.sr, frame_length=self.NN, win_length=self.win_length, hop_length=self.hop_length, n_thresholds=100, beta_parameters=(2, 18), boltzmann_parameter=2, resolution=0.1, max_transition_rate=35.92, switch_prob=0.01, no_trough_prob=0.01, fill_na=0, center=self.center, pad_mode='constant' ) self.FF = yin( self.s, fmin=self.flim[0], fmax=self.flim[1], sr=self.sr, frame_length=self.NN, win_length=self.win_length, hop_length=self.hop_length, center=self.center, trough_threshold=self.umbral_FF, pad_mode='constant' ) elif self.ff_method=="manual": print("Not implemented yet.") self.timeFF = np.linspace(0,self.time[-1],self.FF.size) self.FF_fun = interp1d(self.timeFF, self.FF) self.SCI = self.f_msf / self.FF_fun(self.time)
#%%
[docs] def solve( self, z: List[float] = _Z, params: Dict = _PARAMS, order: int = 2, method: Literal["best", "fast"] = "best" ) -> Syllable : """ Parameters ---------- z: List[float] params : dict order : int Return ------ synth : Syllable Examples -------- >>> """ self.params = _PARAMS self.z = _Z # update parameters if given for k in params.keys(): self.params[k] = params[k] for k in z.keys(): self.z[k] = z[k] # define alpha and beta parameters curves = alpha_beta(self, self.z, method) # generate the synthetic syllable synth = motor_gestures(self, curves, self.params) synth = self.synth_scores(synth, order=order) return synth
#%%
[docs] def synth_scores( self, synth: Syllable, order: int = 2 ) -> Syllable: """ Parameters ---------- synth : Sylllable order : int Return ------ Example ------- synth : Syllable """ synth.envelope = envelope(synth.s, synth.sr, synth.Nt) synth.acoustical_features( stft_window = synth.stft_window, umbral_FF = synth.umbral_FF, ff_method = synth.ff_method, overlap = synth.overlap, llambda = synth.llambda, center = synth.center, n_mfcc = synth.n_mfcc, n_mels = synth.n_mels, NN = synth.NN, Nt = synth.Nt ) # residual difference between real and synthetic samples synth.deltaCentroid = np.abs(synth.centroid - self.centroid) synth.deltaMfccs = np.abs(synth.mfccs - self.mfccs) synth.deltaFmsf = np.abs(synth.f_msf - self.f_msf) synth.deltaEnv = np.abs(synth.envelope - self.envelope) synth.deltaSCI = np.abs(synth.SCI - self.SCI) synth.deltaRMS = np.abs(synth.rms - self.rms) synth.deltaSxx = np.abs(synth.Sxx_dB - self.Sxx_dB) synth.deltaMel = np.abs(synth.FF_coef - self.FF_coef) synth.deltaFF = np.abs(synth.FF - self.FF) ## --------- normalizing ---------------------- synth.deltaCentroid /= np.max(synth.centroid) synth.deltaMfccs /= np.max(synth.deltaMfccs) synth.deltaFmsf /= synth.f_msf synth.deltaSCI /= synth.SCI synth.deltaEnv /= synth.envelope synth.deltaRMS /= synth.rms synth.deltaSxx /= np.max(synth.deltaSxx) synth.deltaMel /= np.max(synth.deltaMel) synth.deltaFF /= synth.FF # --------------- scoring variables -------------------- synth.scoreCentroid = norm(synth.deltaCentroid, ord=order) synth.scoreFmsf = norm(synth.deltaFmsf, ord=order) synth.scoreMfccs = norm(synth.deltaMfccs, ord=np.inf) synth.scoreSCI = norm(synth.deltaSCI, ord=order) synth.scoreEnv = norm(synth.deltaEnv, ord=order) synth.scoreRMS = norm(synth.deltaRMS, ord=order) synth.scoreSxx = norm(synth.deltaSxx, ord=np.inf) synth.scoreMel = norm(synth.deltaMel, ord=np.inf) synth.scoreFF = norm(synth.deltaFF, ord=order) # ------------------- removing size dependency ------------------- synth.scoreCentroid /= synth.deltaCentroid.size synth.scoreMfccs /= synth.deltaMfccs.size synth.scoreFmsf /= synth.deltaFmsf.size synth.scoreSCI /= synth.deltaSCI.size synth.scoreEnv /= synth.deltaEnv.size synth.scoreRMS /= synth.deltaRMS.size synth.scoreSxx /= synth.deltaSxx.size synth.scoreMel /= synth.deltaSxx.size synth.scoreFF /= synth.deltaFF.size # -------------------- variables mean ------------------------- # synth.scoreNoHarm = deltaNOP*10**(deltaNOP-2) synth.scoreCentroid_mean = synth.scoreCentroid.mean() synth.scoreFmsf_mean = synth.deltaFmsf.mean() synth.deltaSCI_mean = synth.deltaSCI.mean() synth.scoreRMS_mean = synth.scoreRMS.mean() synth.deltaEnv_mean = synth.deltaEnv.mean() synth.deltaFF_mean = synth.deltaFF.mean() # ------------- acoustic dissimilarity indexes (adi) --------------- synth.correlation = np.zeros_like(synth.time) synth.SKL = np.zeros_like(synth.time) synth.Df = np.zeros_like(synth.time) for i in range(synth.mfccs.shape[1]): x = self.mfccs[:,i] y = synth.mfccs[:,i] r = norm(x*y,ord=1) / (norm(x,ord=2)*norm(y,ord=2)) Df = x*np.log2(np.abs(x/y)) + y*np.log2(np.abs(y/x)) synth.correlation[i] = np.sqrt(1-r) synth.SKL[i] = 0.5*norm(np.abs(x-y), ord=1) synth.Df[i] = 0.5*norm(Df, ord=1) #synth.Df[np.argwhere(np.isnan(synth.Df))]=-10 # ------------- normalizing adi ----------------- # synth.correlation /= synth.correlation.max() synth.SKL /= synth.SKL.max() synth.Df /= synth.Df.max() # computing adi scores synth.scoreCorrelation = norm(synth.correlation, ord=order) synth.scoreSKL = norm(synth.SKL, ord=order) synth.scoreDF = norm(synth.Df, ord=order) # normalizing synth.scoreCorrelation /= synth.correlation.size synth.scoreSKL /= synth.SKL.size synth.scoreDF /= synth.Df.size # mean scores mean_scores = np.mean(synth.correlation+synth.Df+synth.scoreSKL) synth.residualCorrelation = synth.scoreFF - mean_scores synth.SCIFF = synth.scoreSCI + synth.scoreFF return synth
#%%
[docs] def export_mg(self, dataframe: bool=False, export_curves: bool=True) -> DataFrame|None: """ Parameters ---------- Return ------ synth : Syllable Examples -------- >>> """ if "synth" not in self.id: raise Exception("You only can export motor gestures" + " parameters from synthetic objects") # ------------ export p values and alpha-beta arrays ------------ file_name = self.file_name.replace("synth-","") type = self.type if self.type!="" else "" info = { "t_ini": round(self.t_interval[0], 4), "t_end": round(self.t_interval[1], 4), "f_ini": self.flim[0], "f_end": self.flim[1], "id": self.id, "no_syllable": self.no_syllable, "sr": self.sr, "NN": self.NN, "umbral_FF": self.umbral_FF, "ff_method": self.ff_method, "type": type, "metadata": str(self.metadata), "file_name": file_name, "audios_folder": self.proj_dirs.AUDIOS, "z": str(self.z), "duration": self.T, "params": str(self.params), "Nt": self.Nt, "center": self.center, "overlap": self.overlap, "llambda": self.llambda, "n_mfcc": self.n_mfcc, "n_mels": self.n_mels, "stft_window": self.stft_window } if export_curves: path = self.export_curves() info = info | {"curves_csv": path} name = f"{file_name[:-4]}-{self.no_syllable}-mg.csv"\ if type!="" \ else f"{file_name[:-4]}-{self.no_syllable}-{self.type}-mg.csv" path = self.proj_dirs.mg_param / name.replace(" ", "") df_mg = pd.DataFrame.from_dict(info, orient="index", columns=["value"]) df_mg.to_csv(path, index=True) print(f"Motor gesture parameters saved at {path}.") if dataframe: return df_mg
#%%
[docs] def play(self) -> Audio: """ Parameters ---------- Return ------ Examples -------- >>> """ if is_notebook(): return Audio(data=self.s, rate=self.sr) else: raise Exception("Not implemented yet!")
#%%
[docs] def write_audio(self, bit_depth: int = 16) -> None: """ Parameters ---------- Return ------ Examples -------- >>> """ audio_name = f'{self.file_name[:-4]}-{self.id}-{self.no_syllable}.wav' path_name = self.proj_dirs.examples / audio_name.replace(" ", "") write(filename=path_name, fs=self.sr, data=self.s, bit_depth=bit_depth) print(f"Audio saved at {path_name}.")
# %%
[docs] def export_curves(self): """_summary_ Returns: _type_: _description_ """ curves_array = np.array([self.time_s, self.alpha, self.beta]).T curves_df = pd.DataFrame(curves_array, columns=["time","alpha","beta"]) name = f"{self.file_name[:-4]}-{self.no_syllable}-curves.csv"\ if type!="" \ else f"{self.file_name[:-4]}-{self.no_syllable}-{self.type}-curves.csv" path = self.proj_dirs.mg_param / name.replace(" ", "") curves_df.to_csv(path, index=True) print(f"Curves arrays saved at {path}") return path
# %%
[docs] def import_curves(self): """_summary_ Returns: _type_: _description_ """ curves_array = np.array([self.time_s, self.alpha, self.beta]) curves_df = pd.DataFrame(curves_array) name = f"{self.file_name[:-4]}-{self.no_syllable}-curves.csv"\ if type!="" \ else f"{self.file_name[:-4]}-{self.no_syllable}-{self.type}-curves.csv" path = self.proj_dirs.mg_param / name return pd.read_csv(path)