Audio loading error

Hi there,
I am using the following prodigy version

============================== :sparkles: Prodigy Stats ==============================

Version 1.17.5
License Type Prodigy Personal
Location /home/gio/.local/lib/python3.10/site-packages/prodigy
Prodigy Home /home/gio/.prodigy
Platform Linux-6.8.0-52-generic-x86_64-with-glibc2.35
Python Version 3.10.12
spaCy Version 3.7.5
Database Name SQLite
Database Id sqlite
Total Datasets 1
Total Sessions 13

I am having an issue loading audio. I already have segmented clips and just want to load the clips from a folder but my current method is not displaying any audio. Any help would be greatly appreciated!


def load_audio_files(folder_path: str, data_type: str) -> List[Dict]:
    """
    Load audio files from the specified folder path.
    The data_type is used for metadata but not for path construction.
    """
    root_path = Path(folder_path)
    streams = []

    if not root_path.exists():
        raise ValueError(f"Folder path {folder_path} not found")

    for session_folder in root_path.iterdir():
        if session_folder.is_dir():
            session_code = session_folder.name
            for audio_file in session_folder.glob("*.wav"):
                task = {
                    "audio": str(audio_file),
                    "path": str(audio_file),
                    "session_code": session_code,
                    "meta": {
                        "session": session_code,
                        "data_type": data_type
                    }
                }
                streams.append(task)

    if not streams:
        print(f"Warning: No .wav files found in {root_path}")

    return streams


@prodigy.recipe(
    "audio-affect-intensity",
    dataset=("Dataset to save annotations to", "positional", None, str),
    source=("Path to audio files", "positional", None, str),
    inf=("Select infant (ALL_CHI) recordings", "flag", "inf", bool),
    mum=("Select mother (FEM) recordings", "flag", "mum", bool)
)
def audio_annotation_recipe(dataset: str, source: str, inf: bool = False, mum: bool = False):
    """Recipe for annotating audio with affect and intensity ratings"""
    # Determine data type from flags
    if inf and not mum:
        data_type = 'ALL_CHI'
    elif mum and not inf:
        data_type = 'FEM'
    else:
        raise ValueError("Must specify exactly one of --inf or --mum")

    print(f"Using data_type: {data_type}")

    # Create path to specific data type folder
    data_path = Path(source) / data_type

    # Load audio files using the load_audio_files function
    stream = load_audio_files(str(data_path), data_type)

    def add_options(stream):
        """Helper function to add rating options to every task"""
        for task in stream:
            # Add affect and intensity options
            task["options"] = [
                # Affect options
                {"id": "affect1", "text": "Affect 1 - Very Negative"},
                {"id": "affect2", "text": "Affect 2 - Negative"},
                {"id": "affect3", "text": "Affect 3 - Neutral"},
                {"id": "affect4", "text": "Affect 4 - Positive"},
                {"id": "affect5", "text": "Affect 5 - Very Positive"},
                # Intensity options
                {"id": "intensity1", "text": "Intensity 1 - Very Low"},
                {"id": "intensity2", "text": "Intensity 2 - Low"},
                {"id": "intensity3", "text": "Intensity 3 - Medium"},
                {"id": "intensity4", "text": "Intensity 4 - High"},
                {"id": "intensity5", "text": "Intensity 5 - Very High"}
            ]
            yield task

    # Add the options to each task
    stream = add_options(stream)

    return {
        "dataset": dataset,
        "stream": stream,
        "view_id": "choice",
        "config": {
            "choice_style": "multiple",
            "choice_auto_accept": False,
            "audio_loop": False,
            "show_audio_minimap": True
        }
    }

Hi @Gio,

It most likely is the browser preventing loading media files from a local path.
To circumvent it you could represent your audios as base64 data URIs with the help of fech_media stream wrapper. So in your recipe it would be one more step after adding options:

from prodigy.components.preprocess import fetch_media
stream = add_options(stream)
stream = fetch_media(stream, ["audio"], skip=False)

You also might want to remove the encoded data before saving to the DB to prevent database bloat.
The easiest way to do it would be to the following. before_db callback:

def remove_base64(examples: List[Dict]) -> List[Dict]:
    """Remove base64-encoded string if "path" is preserved in example."""
    for eg in examples:
        if "audio" in eg and eg["audio"].startswith("data:") and "path" in eg:
            eg["audio"] = eg["path"]
    return examples

Fantastic - worked a treat! Thank you.