Hello, new user here. I am wanting to use prodigy for audio file classification..
Am following the custom recipe example provided:
import prodigy
from prodigy.components.loaders import Audio
@prodigy.recipe("classify-audio")
def classify_audio(dataset, source):
    def get_stream():
        # Load the directory of audio files and add options to each task
        stream = Audio(source)
        for eg in stream:
            eg["options"] = [
                {"id": "CAR", "text": "๐ Car"},
                {"id": "PLANE", "text": "โ๏ธ Plane"},
                {"id": "OTHER", "text": "Other / Unclear"}
            ]
            yield eg
    return {
        "dataset": dataset,
        "stream": get_stream(),
        "view_id": "choice",
        "config": {
            "choice_style": "single",  # or "multiple"
            "choice_auto_accept": True,
            "audio_loop": True,
            "show_audio_minimap": False
        }
    }
When exporting the database using db-out I get 1000s of random characters in each row of data like so:
('......'  here represents an unfathomable amount of characters)
{"audio":"data:audio/x-wav;base64,UklGRiQ6IAB.........../r/+f/4//j/9//2//b/9//3//j/+f8=","text":"EM2010-00504-2021-08-10T07-46-23-058dB","meta":{"file":"EM2010-00504-2021-08-10T07-46-23-058dB.wav"},"path":"recordings/EM2010-00504-2021-08-10T07-46-23-058dB.wav","options":[{"id":"CAR","text":"\ud83d\ude97 Car"},{"id":"PLANE","text":"\u2708\ufe0f Plane"},{"id":"OTHER","text":"Other / Unclear"}],"_input_hash":928286171,"_task_hash":-1137344558,"_session_id":null,"_view_id":"choice","config":{"choice_style":"single"},"accept":["OTHER"],"audio_spans":[],"answer":"accept"}
Is there a way to avoid this??
Thanks in advance for your help