Is it possible to annotate multiple audio files in the same session (one after another)? For example I have 10 jobs I want to annotate, each with its respective audio.mp3 and speaker_data.txt (contained in different local folders that are named as their respective job_id). My recipe.py looks like the following:
import os
import prodigy
from prodigy.components.loaders import Audio
@prodigy.recipe("speaker-audio-manual")
def speaker_audio_manual(dataset: str, jobs_folder: str = "Jobs"):
"""
A custom Prodigy recipe for annotating speaker data across multiple job folders.
Args:
dataset (str): The name of the Prodigy dataset.
jobs_folder (str): The path to the directory containing job folders.
Returns:
dict: A Prodigy configuration dictionary.
"""
def get_audio_examples(jobs_folder):
# Iterate over job folders and prepare the examples
for job_id in os.listdir(jobs_folder):
job_folder_path = os.path.join(jobs_folder, job_id)
# Paths for speaker data and recording
speaker_data_path = os.path.join(job_folder_path, "speaker_data.txt")
recording_path = os.path.join(job_folder_path, "audio.mp3")
# Ensure both files exist
if os.path.exists(speaker_data_path) and os.path.exists(recording_path):
# Create an example for Prodigy
yield {
"audio": recording_path,
"meta": {"job_id": job_id},
"options": [
{"id": "SPEAKER_A", "text": "Speaker A"},
{"id": "SPEAKER_B", "text": "Speaker B"},
{"id": "SPEAKER_C", "text": "Speaker C"},
{"id": "SPEAKER_D", "text": "Speaker D"},
{"id": "SPEAKER_E", "text": "Speaker E"}
]
}
else:
print(f"Missing files in {job_folder_path}. Skipping this folder.")
# Load the examples from the job folders
examples = get_audio_examples(jobs_folder)
return {
"dataset": dataset,
"stream": examples,
"view_id": "audio_manual",
"config": {
"audio_loop": True,
"labels": ["SPEAKER_A", "SPEAKER_B", "SPEAKER_C", "SPEAKER_D", "SPEAKER_E"],
}
}
However when I run the following line in terminal, it allows me to click the check button to move onto the next job, but none of them actually display the audio file (i.e. there are no audio waves that I can drag speaker labels on). Furthermore, the bottom correctly displays the job_id and filepath of speaker_data.txt, however there is no mention of the audio.mp3.
prodigy speaker-audio-manual dataset Jobs -F recipe.py