Hello, new user here. I am wanting to use prodigy for audio file classification..
Am following the custom recipe example provided:
import prodigy
from prodigy.components.loaders import Audio
@prodigy.recipe("classify-audio")
def classify_audio(dataset, source):
def get_stream():
# Load the directory of audio files and add options to each task
stream = Audio(source)
for eg in stream:
eg["options"] = [
{"id": "CAR", "text": "๐ Car"},
{"id": "PLANE", "text": "โ๏ธ Plane"},
{"id": "OTHER", "text": "Other / Unclear"}
]
yield eg
return {
"dataset": dataset,
"stream": get_stream(),
"view_id": "choice",
"config": {
"choice_style": "single", # or "multiple"
"choice_auto_accept": True,
"audio_loop": True,
"show_audio_minimap": False
}
}
When exporting the database using db-out I get 1000s of random characters in each row of data like so:
('......' here represents an unfathomable amount of characters)
{"audio":"data:audio/x-wav;base64,UklGRiQ6IAB.........../r/+f/4//j/9//2//b/9//3//j/+f8=","text":"EM2010-00504-2021-08-10T07-46-23-058dB","meta":{"file":"EM2010-00504-2021-08-10T07-46-23-058dB.wav"},"path":"recordings/EM2010-00504-2021-08-10T07-46-23-058dB.wav","options":[{"id":"CAR","text":"\ud83d\ude97 Car"},{"id":"PLANE","text":"\u2708\ufe0f Plane"},{"id":"OTHER","text":"Other / Unclear"}],"_input_hash":928286171,"_task_hash":-1137344558,"_session_id":null,"_view_id":"choice","config":{"choice_style":"single"},"accept":["OTHER"],"audio_spans":[],"answer":"accept"}
Is there a way to avoid this??
Thanks in advance for your help