Source code for wavesongs.objs.song

#%%
"""
Store audio record.
"""
from typing import AnyStr

import numpy as np

from wavesongs.utils.paths import ProjDirs
from wavesongs.utils.tools import envelope, is_notebook

from maad.sound import write, normalize
from IPython.display import Audio

from os.path import (
    basename,
    normpath
)

from librosa import (
    stft,
    reassigned_spectrogram,
    amplitude_to_db,
    fft_frequencies,
    times_like,
    yin,
    load,
    pyin
)


from maad.sound import normalize


from typing import (
    Optional,
    Tuple,
    AnyStr,
    Dict,
    TypeVar
)

Syllable = TypeVar('Syllable')
Song = TypeVar('Song')
DataFrame = TypeVar('pandas.core.frame.DataFrame')


[docs] class Song: """ Store a song and its properties in a class Parameters ---------- proj_dirs : ProjDirs file_id : str Name or id of the audio sample tlim : tuple Time range flim : tuple Frequency range sr : int Sample rate info : dict Audio metadata. id : str = "song" Attributes ---------- Example ------- >>> """
[docs] def __init__( self, proj_dirs: ProjDirs, file_id: AnyStr, tlim: Tuple[float] = (0, 60), flim: Tuple[float] = (1e3, 2e4), sr: int = 44100, info: Dict = {}, id: AnyStr = "song" ): self.proj_dirs = proj_dirs self.file_id = file_id self.tlim = tlim self.info = info self.flim = flim self.file_path = proj_dirs.find_audio(file_id) self.file_name = basename(normpath(self.file_path)) if self.proj_dirs.CATALOG: self.info = proj_dirs.data.iloc[0].to_dict() s, sr = load(self.file_path, sr=sr, mono=True) self.id = id self.sr = sr # croping the audio in the range tlim s = s[int(self.tlim[0]*self.sr):int(self.tlim[1]*self.sr)] self.t0 = self.tlim[0] self.t0_bs = self.tlim[0] self.s = normalize(s, max_amp=1.0)
[docs] def acoustical_features( self, llambda: float = 1., NN: int = 1024, overlap: float = 0.5, center: bool = False, umbral_FF: float = 1.05, ff_method: AnyStr = 'yin', Nt: int = 100, n_mfcc: int = 4, n_mels: int = 4, stft_window: AnyStr = "hann", tlim: Optional[Tuple[float]] = None, flim: Optional[Tuple[float]] = None ) -> None: """ Coputing acoustical tempo-spectral variables Parameters ---------- llambda : float NN : int overlap : float center : bool = False umbral_FF : int ff_method : str Nt : int n_mfcc : int stft_window : str Return ------ None Examples -------- >>> """ if tlim is not None: self.tlim = tlim if flim is not None: self.flim = flim self.n_mfcc = n_mfcc self.n_mels = n_mels self.stft_window = stft_window self.llambda = llambda self.center = center self.umbral = 0.05 self.umbral_FF = umbral_FF self.ff_method = ff_method self.NN = NN self.Nt = Nt self.center = center self.overlap = overlap self.win_length = self.NN//2 self.hop_length = self.NN//4 self.no_overlap = int(overlap*self.NN) self.time_s = np.linspace(0, len(self.s)/self.sr, len(self.s)) self.envelope = envelope(self.s, self.sr, Nt=self.Nt) # Short-time Fourier transform (STFT) self.stft = stft(y=self.s, n_fft=self.NN, hop_length=self.hop_length, win_length=self.NN, window=self.stft_window, center=self.center, dtype=None, pad_mode='constant') # Time-frequency reassigned spectrogram freqs, times, mags = reassigned_spectrogram( self.s, sr=self.sr, S=self.stft, n_fft=self.NN, hop_length=self.hop_length, win_length=self.win_length, window=self.stft_window, center=self.center, reassign_frequencies=True, reassign_times=True, ref_power=1e-06, fill_nan=True, clip=True, dtype=None, pad_mode ='constant' ) self.freqs = freqs self.times = times self.Sxx = mags self.Sxx_dB = amplitude_to_db(mags, ref=np.max) self.freq = fft_frequencies(sr=self.sr, n_fft=self.NN) self.time = times_like(X=self.stft, sr=self.sr, hop_length=self.hop_length, n_fft=self.NN) #, axis=-1 # put in origin the time self.time -= self.time[0] # method to calculate fundamental frequency self.FF = yin(self.s, fmin=self.flim[0], fmax=self.flim[1], sr=self.sr, frame_length=self.NN, win_length=self.win_length, hop_length=self.hop_length, trough_threshold=self.umbral_FF, center=self.center, pad_mode='constant')
[docs] def play(self) -> Audio: """ Parameters ---------- Return ------ Example ------- >>> """ if is_notebook(): return Audio(data=self.s, rate=self.sr) else: raise Exception("Not implemented yet!")
#%%
[docs] def write_audio(self, bit_depth: int = 16) -> None: """ Parameters ---------- Return ------ Examples -------- >>> """ audio_name = f'{self.file_name[:-4]}-{self.id}.wav' path_name = self.proj_dirs.examples / audio_name write(filename=path_name, fs=self.sr, data=self.s, bit_depth=bit_depth) print(f"Audio saved at {path_name}.")
#%% # def Syllable(self, no_syllable, NN=1024): # self.no_syllable = no_syllable # ss = self.syllables[self.no_syllable-1] # syllable indexes # self.syll_complet = self.s[ss] # audios syllable # self.time_syllable = self.time_s[ss] # self.t0 = self.time_syllable[0] # self.syllable = Syllable(self, tlim=(self.time_syllable[0], self.time_syllable[-1]), flim=self.flim, NN=NN, file_name=self.file_name+"synth") # self.syllable.no_syllable = self.no_syllable # self.syllable.file_name = self.file_name # self.syllable.state = self.state # self.syllable.country = self.country # self.syllable.no_file = self.no_file # self.syllable.proj_dirs = self.proj_dirs # self.syllable.id = "syllable" # self.SylInd.append([[no_syllable], [ss]]) # fraction = self.syll_complet.size/1024 # Nt_new = int(((fraction%1)/fraction+1)*1024) # self.chuncks = wave2frames(self.syll_complet, Nt=Nt_new) # self.times_chun = wave2frames(self.time_syllable, Nt=Nt_new) # self.no_chuncks = len(self.chuncks) # return self.syllable # #%% # def SyntheticSyllable(self): # self.s_synth = np.empty_like(self.s) # for i in range(self.syllables.size): # self.s_synth[self.SylInd[i][1]] = self.syllables[i] #%% # #%% # def Set(self, p_array): # self.p["a0"].set(value=p_array[0]) # self.p["a1"].set(value=p_array[1]) # self.p["a2"].set(value=p_array[2]) # self.p["b0"].set(value=p_array[3]) # self.p["b1"].set(value=p_array[4]) # self.p["b2"].set(value=p_array[5]) # #%% # def Syllables(self, method="freq"): # if method=="amplitud": # supra = np.where(self.envelope > self.umbral)[0] # candidates = np.split(supra, np.where(np.diff(supra) != 1)[0]+1) # return [x for x in candidates if len(x) > 2*self.NN] # elif method=="freq": # # ss = np.where((self.FF < self.flim[1]) & (self.FF>self.flim[0])) # filter frequency # # ff_t = self.time[ss] # cleaning timeFF # # FF_new = self.FF[ss] # cleaning FF # # FF_dif = np.abs(np.diff(FF_new)) # find where is it cutted # # # alternative form with pandas # df = pd.DataFrame(data={"FF":self.FF, "time":self.time}) # q = df["FF"].quantile(0.99) # df[df["FF"] < q] # q_low, q_hi = df["FF"].quantile(0.1), df["FF"].quantile(0.99) # df_filtered = df[(df["FF"] < q_hi) & (df["FF"] > q_low)] # ff_t = self.time[df_filtered["FF"].index] # FF_new = self.FF[df_filtered["FF"].index] # FF_dif = np.abs(np.diff(FF_new)) # # plt.plot(self.FF, 'o'); plt.plot(df_filtered["FF"], 'o') # peaks, _ = find_peaks(FF_dif, distance=10, height=500) # FF_dif # syl = [np.arange(peaks[i]+1,peaks[i+1]) for i in range(len(peaks)-1)] # syl = [np.arange(0,peaks[0])]+syl+[np.arange(peaks[-1]+1,len(ff_t))] # syl_intervals = np.array([[ff_t[s][0], ff_t[s][-1]] for s in syl]) # indexes = np.int64(self.sr*syl_intervals) # indexes = [np.arange(ind[0],ind[1],1) for ind in indexes] # return [x for x in indexes if len(x) > 2*self.NN] # elif "maad": # im_bin = create_mask(self.Sxx_dB, bin_std=1.5, bin_per=0.5, mode='relative')