Source code for denote.util

"""Audio loading and utility functions for denote."""

from typing import Union, Optional, Tuple
from pathlib import Path

import numpy as np



[docs]
def load_audio(
    audio: Union[str, Path, np.ndarray],
    *,
    sr: Optional[int] = None,
    mono: bool = True,
    target_sr: Optional[int] = None,
) -> Tuple[np.ndarray, int]:
    """Load audio from a file path or validate an existing array.

    Args:
        audio: File path (str/Path) or numpy array of audio samples.
        sr: Sample rate. Required when audio is an array. Ignored for file paths
            (detected automatically).
        mono: If True, convert to mono.
        target_sr: If set, resample to this rate.

    Returns:
        Tuple of (audio_array, sample_rate).

    Raises:
        ValueError: If audio is an array and sr is not provided.
        FileNotFoundError: If audio is a path that doesn't exist.
    """
    import librosa

    if isinstance(audio, (str, Path)):
        path = Path(audio)
        if not path.exists():
            raise FileNotFoundError(f"Audio file not found: {path}")
        y, file_sr = librosa.load(str(path), sr=target_sr, mono=mono)
        return y, target_sr or file_sr

    if isinstance(audio, np.ndarray):
        if sr is None:
            raise ValueError(
                "Sample rate (sr) is required when audio is a numpy array."
            )
        y = audio
        if mono and y.ndim > 1:
            y = librosa.to_mono(y)
        if target_sr is not None and target_sr != sr:
            y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
            return y, target_sr
        return y, sr

    # Try torch.Tensor
    try:
        y = audio.numpy()
        if y.ndim > 1:
            y = y.squeeze()
        if sr is None:
            raise ValueError(
                "Sample rate (sr) is required when audio is a tensor."
            )
        return load_audio(y, sr=sr, mono=mono, target_sr=target_sr)
    except AttributeError:
        pass

    raise TypeError(
        f"Unsupported audio type: {type(audio)}. "
        "Expected str, Path, np.ndarray, or torch.Tensor."
    )




[docs]
def ensure_file_path(
    audio: Union[str, Path, np.ndarray],
    *,
    sr: Optional[int] = None,
) -> str:
    """Ensure audio is available as a file path.

    If audio is already a path, return it as a string.
    If it's an array, write to a temporary WAV file and return the path.
    """
    if isinstance(audio, (str, Path)):
        path = Path(audio)
        if not path.exists():
            raise FileNotFoundError(f"Audio file not found: {path}")
        return str(path)

    # It's an array — write to temp file
    import tempfile
    import soundfile as sf

    if sr is None:
        raise ValueError(
            "Sample rate (sr) is required when audio is an array."
        )
    y, actual_sr = load_audio(audio, sr=sr, mono=True)
    tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    sf.write(tmp.name, y, actual_sr)
    return tmp.name