Hi, @ines!
We have the same issue with instant_submit
. We are loading text samples from .jsonl file located in s3 bucket as a stream via dvc.api.open()
. When instant_submit
is on, new batch of tasks loading only after refresh. Without instant_submit
tasks loading as expected until all of samples won't labeled. But new batch loading slowly, so it might causes the problem.
def get_dvc_stream(repo, path, rev, github_token):
repo_url = f"https://{github_token}@github.com/{repo}"
with dvc.api.open(path=path, repo=repo_url, rev=rev, mode='rb') as fsource:
for line in fsource.readlines():
yield json.loads(line)
@recipe('clausescat.manual',
dataset=("Dataset to save answers to", "positional", None, str),
label=("Comma-separated label(s) to annotate or text file with one label per line", "option", "l", get_labels)
)
def clauses_categorisation(dataset, label):
options = [{"id": id, "text": text} for id, text in enumerate(label)]
blocks = [
{"view_id": "text"},
{"view_id": "choice", "text": None},
{"view_id": "text_input", "field_rows": 3, "field_label": "Comment"}
]
def get_stream():
stream = get_dvc_stream(settings.dvc_repo,
settings.dvc_data_path,
settings.dvc_rev,
settings.github_token)
for eg in stream:
yield {'text': eg['text'], "options": options, 'meta': eg['meta']}
stream = get_stream()
return {
"dataset": dataset, # the dataset to save annotations to
"view_id": "blocks", # set the view_id to "blocks"
"stream": stream, # the stream of incoming examples
"config": {
"blocks": blocks, # add the blocks to the config
"card_css": {"text-align": "left"}
}
}
Here is the config for annotation server:
from settings import settings
import prodigy
import recipes
prodigy_config = {"db": "postgresql",
"db_settings":
{"postgresql":
{"dbname": "prodigy",
"user": settings.database_username,
"password": settings.database_password,
"host": settings.database_host,
"port": settings.database_port}
},
"host": "0.0.0.0",
"show_flag": True,
"custom_theme":
{"cardMaxWidth": "95%",
"smallText": 16},
"feed_overlap": True,
"hide_meta": False,
"instant_submit": settings.instant_submit}
print(settings.prodigy_recipe_cmd)
prodigy.serve(settings.prodigy_recipe_cmd, **prodigy_config)