Hi !
I run the version 1.10.6.
This recipe is used to annotate chunks of audio with a voice activity detection model in the loop.
My json file look like these :
{
"theme": "basic",
"custom_theme": {},
"buttons": ["accept", "reject", "ignore", "undo"],
"batch_size": 3,
"history_size": 3,
"port": 8080,
"host": "localhost",
"cors": true,
"db": "sqlite",
"db_settings": {},
"validate": true,
"auto_exclude_current": true,
"instant_submit": false,
"feed_overlap": false,
"auto_count_stream": false,
"total_examples_target": 0,
"ui_lang": "en",
"project_info": ["dataset", "session", "lang", "recipe_name", "view_id", "label"],
"show_stats": false,
"hide_meta": false,
"show_flag": false,
"instructions": false,
"swipe": false,
"swipe_gestures": { "left": "accept", "right": "reject" },
"split_sents_threshold": false,
"html_template": false,
"global_css": null,
"javascript": null,
"writing_dir": "ltr",
"show_whitespace": false,
"exclude_by": "task"
}
The config for the receipe :
@recipe(
"audio.test",
dataset=("Dataset to save annotations to", "positional", None, str),
source=("Data to annotate (file path or '-' to read from standard input)", "positional", None, str),
chunk=("split long audio files into shorter chunks of that many seconds each","option",None,float),
loader=("Loader to use", "option", "lo", str),
keep_base64=("If 'audio' loader is used: don't remove base64-encoded data from the data on save", "flag", "B", bool),
autoplay=("Autoplay audio when a new task loads", "flag", "A", bool),
fetch_media=("Convert URLs and local paths to data URIs", "flag", "FM", bool),
exclude=("Comma-separated list of dataset IDs whose annotations to exclude", "option", "e", split_string),
)
def test(
dataset: str,
source: Union[str, Iterable[dict]],
loader: Optional[str] = "audio",
chunk: float = 10.0,
autoplay: bool = False,
keep_base64: bool = False,
fetch_media: bool = False,
exclude: Optional[List[str]] = None,
) -> Dict[str, Any]:
label=['Speech']
return {
"view_id": "audio_manual",
"dataset": dataset,
"stream": sad_manual_stream(pipeline, source, chunk=chunk),
"before_db": remove_base64 if not keep_base64 else None,
"exclude": exclude,
"update": update,
"config": {
"labels": label,
"audio_autoplay": autoplay,
"force_stream_order": False,
"show_audio_minimap": False,
},
}
And I try with this update function:
def update(answers):
prodigy.log('---- UPDATE ----')
time.sleep(10)
prodigy.log('---- END ----')
The sad_manual_stream
take a voice activity detection pipeline and the path of audio files, it loop in the path for all audio file, then cuts audios by chunk and yield the differents informations (made with pyannote-audio http://pyannote.github.io/ ). It works fine without the update callback. But with this one (or the real one), it's stop with "No tasks available." after the update end. Here is the last logs:
INFO: 127.0.0.1:51017 - "POST /get_session_questions HTTP/1.1" 200 OK
14:02:25: POST: /give_answers (received 3)
14:02:25: CONTROLLER: Receiving 3 answers
14:02:25: ---- UPDATE ----
14:02:35: ---- END ----
14:02:35: DB: Getting dataset '2021-09-22_13-58-16'
14:02:35: DB: Getting dataset 'test'
14:02:35: DB: Getting dataset '2021-09-22_13-58-16'
14:02:35: DB: Added 3 examples to 2 datasets
14:02:35: CONTROLLER: Added 3 answers to dataset 'test' in database SQLite
14:02:35: RESPONSE: /give_answers
INFO: 127.0.0.1:51017 - "POST /give_answers HTTP/1.1" 200 OK
I try to replace the stream with get_stream
from prodigy.components.loaders (stream = get_stream(source, loader=loader, dedup=True, rehash=True)
), and I get "No tasks available." after the same kind of log (CONTROLLER: Receiving,... ), so I don't really now what cause this, maybe the configuration...
Jim