Source code for wavesongs.utils.paths

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Create and manage project directory trees: input audios
and results (images, parameters files, and audios)
"""
import json
import numpy as np
import pandas as pd
from pathlib import Path


from os import makedirs
from librosa import load
from os.path import isdir, basename
from pathlib import Path, PosixPath
from typing import Union, List, AnyStr, TypeVar

from wavesongs.models.bird import alpha_beta, motor_gestures
import wavesongs as ws

# prefix components:
space =  '    '
branch = '│   '
# pointers:
tee =    '├── '
last =   '└── '

_CATALOG_LABEL = "ML Catalog Number"
_AUDIO_FORMATS = (".mp3", ".wav")

Syllable = TypeVar('Syllable')
#%%
[docs] class ProjDirs: """ Creates a ProjDirs class, which is used to store a project's file structure. This is required when constructing a :class:`~wavesongs.obj.Syllable` or a :class:`~wavesongs.obj.Song` objects and generally useful to keep paths tidy and in the same location. Parameters ---------- audios : str ='./assets/audio' Folder path where the audio records samples are stored. results: str = "./assets/results" Folder path to store the files and data generated. metadata: str = "spreadsheet.csv" Name of the csv file with the metadata of the audios. Usually given by the data provider. Attributes ---------- Example ------- >>> proj_dirs = ProjDirs( >>> "./assets/audio", "./assets/results", "spreadsheet.csv" >>> ) """ # %%
[docs] def __init__( self, audios: AnyStr = "./assets/audios", results: AnyStr = "./assets/results", metadata: AnyStr = "spreadsheet.csv", catalog: bool = False ): """Constructor""" self.AUDIOS = Path(audios) self.RESULTS = Path(results) self.mg_param = self.RESULTS / "mg_params" self.IMAGES = self.RESULTS / "figures" self.examples = self.RESULTS / "audios" self.SPREADSHEET = self.AUDIOS / metadata self.CATALOG = catalog # create folder in case they do not exist Path(self.RESULTS).mkdir(parents=True, exist_ok=True) Path(self.mg_param).mkdir(parents=True, exist_ok=True) Path(self.IMAGES).mkdir(parents=True, exist_ok=True) Path(self.examples).mkdir(parents=True, exist_ok=True) # Check if there is a metadata spreadsheet file inside audios folder spreadsheet_file = list(Path(self.AUDIOS).glob("*" + metadata)) if len(spreadsheet_file) > 0 and self.CATALOG==True: self.CATALOG = True self.CATALOG_LABEL = _CATALOG_LABEL self.find_audios()
# %%
[docs] def find_audios(self, pretty=False) -> Union[List, pd.DataFrame]: """ Search for all audios, mp3 and wav types, in the audios folder. Parameters ---------- None Return ------ files_names : list List with the audios files names Notes ----- If the audios folder contains a metadata file, spreadsheet.csv, the method will return a dataframe. However, the parameter `files_names` always is present. Example ------- >>> """ try: all_filles = Path(self.AUDIOS).glob("**/*") self.files = [a for a in all_filles if a.suffix in _AUDIO_FORMATS] self.files_names = [basename(f) for f in self.files] self.no_files = len(self.files_names) if self.CATALOG is True: self.data = pd.read_csv(self.SPREADSHEET, encoding_errors="ignore") self.data.dropna(axis=0, how="all", inplace=True) # self.data = data.convert_dtypes() self.data = self.data.astype({self.CATALOG_LABEL: "str"}) found_files = [ ( str(self.AUDIOS) + f"/{file}.mp3" if file + ".mp3" in self.files_names else str(self.AUDIOS) + f"/{file}.wav" ) for file in self.data[self.CATALOG_LABEL] ] self.data["File Path"] = found_files self.no_files = len(self.data) if pretty: print(self.data) return self.data if pretty: print("Audios found:") for i in range(len(self.files_names)): print(f"\t- {self.files_names[i]}") print(self.data) return self.files_names except: raise Exception(f"The path {self.AUDIOS} does not contain any "+ "samples. Change the audios path and try again.")
# %%
[docs] def audios_info(self) -> None: """ Get information about the audios folder: audios paths and number of audios. Parameters ---------- None Return ------ None Example ------- >>> """ print(f"Audios path: {self.AUDIOS}\n") print("The folder has {} audio samples:".format(self.no_files)) if self.CATALOG: print(self.data) else: for file in self.files_names: print(" - " + file)
[docs] def find_audio(self, id: AnyStr) -> PosixPath: """ Find an audio in the audios folder by the id or filename Parameters ---------- id : str Whole filename of a part of it. Usually, the catalog number. Return ------ path : PosixPath Aduio path location. Example ------- >>> """ if self.CATALOG: id_df = self.data.loc[self.data[self.CATALOG_LABEL] == id] path = PosixPath(id_df["File Path"].values[0]) else: path = [ self.files[i] for i in range(len(self.files)) if id in self.files_names[i] ][0] return path
# %%
[docs] def import_mg(self, id, no_syllable=0): all_filles = Path(self.mg_param).glob("**/*") path_mg = [a for a in all_filles if f"-{no_syllable}-" in str(a) and id in str(a) and "mg." in str(a)][0] mg_df = pd.read_csv(path_mg, index_col=0) mg_df = mg_df.to_dict()["value"] t0 = float(mg_df["t_ini"]) sr = int(mg_df["sr"]) duration = float(mg_df["duration"]) self.AUDIOS = mg_df["audios_folder"] params = eval(mg_df["params"]) #self = ProjDirs(audios=audios_folder, results=self.RESULTS) synth = ws.objs.syllable.Syllable(proj_dirs=self, duration=duration, sr=sr) synth.id = mg_df["id"] synth.type = mg_df["type"] synth.no_syllable = int(mg_df["no_syllable"]) synth.metadata = mg_df["metadata"] synth.file_name = mg_df["file_name"] synth.z = eval(mg_df["z"]) synth.t0_bs = t0 if "curves_csv" in mg_df.keys(): curves_df = pd.read_csv(mg_df["curves_csv"], index_col=0) time_s = curves_df["time"].array alpha = curves_df["alpha"].array beta = curves_df["beta"].array duration = time_s[-1] curves = [alpha, beta] synth.alpha = alpha synth.beta = beta else: curves = alpha_beta(synth, synth.z, "fast") synth = motor_gestures(synth, curves, params) synth.acoustical_features( NN = int(mg_df["NN"]), ff_method = mg_df["ff_method"], umbral_FF = float(mg_df["umbral_FF"]), flim = [float(mg_df["f_ini"]), float(mg_df["f_end"])], Nt = int(mg_df["Nt"]), center = mg_df["center"], overlap = float(mg_df["overlap"]), llambda = float(mg_df["llambda"]), n_mfcc = int(mg_df["n_mfcc"]), n_mels = int(mg_df["n_mels"]), stft_window = mg_df["stft_window"] ) return synth
[docs] def read_mg( self, file_name: AnyStr, no_syllable: Union[int, AnyStr], type: AnyStr = "" ) -> Syllable: """ Read motor gesture parameters from csv file Parameters ---------- proj_dirs : ProjDirs file_name: AnyStr no_syllable: Union[int, AnyStr] type: AnyStr = "" Return ------- synth: Syllable Example ------- >>> """ folder = self.mg_param # f"{results}/mg_param" file_name = f"{folder}/{file_name}-{no_syllable}-mg.csv" \ if type=="" \ else f"{folder}/{file_name}-{no_syllable}-{type}-mg.csv" df = pd.read_csv(file_name, index_col=0) data = df.to_dict()["value"] tlim = [float(data["t_ini"]), float(data["t_end"])] flim = [float(data["f_ini"]), float(data["f_end"])] z_json = data["z"].replace("'", "\"") z = json.loads(z_json) metadata_json = data["metadata"].replace("'", "\"") metadata = json.loads(metadata_json) root_folder = data["root_folder"] \ if data["root_folder"]!=".." \ else data["root_folder"]+"/" audios_folder = data["audios_folder"].replace(root_folder, "") proj_dirs = ProjDirs(root=root_folder, audios=audios_folder) syllable = Syllable( file_id=data["file_name"][:-4], proj_dirs=proj_dirs, tlim=tlim, no_syllable=int(data["no_syllable"]), id=data["id"], sr=int(data["sr"]), metadata=metadata, type=data["type"] ) syllable.acoustical_features( flim=flim, umbral_FF=float(data["umbral_FF"]), NN=int(data["NN"]), ff_method=data["ff_method"] ) syllable.z = z return syllable
[docs] def tree_list(self, prefix: str=''): """A recursive generator, given a directory Path object will yield a visual tree structure line by line with each line prefixed by the same characters """ dir_path = Path("./") contents = list(dir_path.iterdir()) # contents each get pointers that are ├── with a final └── : pointers = [tee] * (len(contents) - 1) + [last] for pointer, path in zip(pointers, contents): yield prefix + pointer + path.name if path.is_dir(): # extend the prefix and recurse: extension = branch if pointer == tee else space # i.e. space because last, └── , above so no more | yield from self.tree(prefix=prefix+extension)
[docs] def tree(self, prefix: str='') -> str: tree_str = "" possibles = ["assets", "results", "audios", "figures", "mg_params"] for line in self.tree_list(prefix): count = 0 for pos in possibles: if pos in line: count += 1 if count>=1: tree_str += line+"\n" print(tree_str) return tree_str
def __str__(self): return f""" Audios: {self.AUDIOS} Results: {self.RESULTS} """