Hi,
from typing import List, Optional
import prodigy
from prodigy.components.loaders import get_stream
from prodigy.util import split_string
def add_options(stream):
"""Helper function to add options to every task in a stream."""
options = [
{"id": "a", "text": "a"},
{"id": "b", "text": "b"},
{"id": "c", "text": "c"},
]
for task in stream:
task["options"] = options
yield task
def progress(ctrl, update_return_value):
return ctrl.session_annotated / 50
# Recipe decorator with argument annotations: (description, argument type,
# shortcut, type / converter function called on value before it's passed to
# the function). Descriptions are also shown when typing --help.
@prodigy.recipe(
"memt.manual",
dataset=("The dataset to use", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string),
exclusive=("Treat classes as mutually exclusive", "flag", "E", bool),
exclude=("Names of datasets to exclude", "option", "e", split_string),
)
def memt_manual(
dataset: str,
source: str,
label: Optional[List[str]] = None,
exclusive: bool = False,
exclude: Optional[List[str]] = None,
):
"""
Manually annotate categories that apply to a text. If more than one label
is specified, categories are added as multiple choice options. If the
--exclusive flag is set, categories become mutually exclusive, meaning that
only one can be selected during annotation.
"""
# Load the stream from a JSONL file and return a generator that yields a
# dictionary for each example in the data.
stream = get_stream(source, rehash=True, dedup=False, input_key="text" )
#Add labels to each task in stream
stream = add_options(stream)
return {
"view_id": "blocks", # Annotation interface to use
"dataset": dataset, # Name of dataset to save annotations
"stream": stream, # Incoming stream of examples
"progress": progress,
"config": { # Additional config settings, mostly for app UI
"batch_size": 10,
"blocks": [
{"view_id": "html",
"html_template": "{{id}}. SOURCE:<h5>{{ text }}</h5>PROPOSED TRANSLATION:<h5>{{ translation }}</h5></strong><p style='font-size: 15px'>Client: {{ client }}</p>",
},
{"view_id":"choice", "text":None}
],
},
}
prodigy.json:
{
"theme": "basic",
"custom_theme": {},
"buttons": ["undo"],
"history_size": 30,
"port": 8880,
"host": "xxx",
"cors": true,
"db": "postgresql",
"db_settings": {
"postgresql": {
"dbname": "xxx",
"user": "xxx",
"password": "xxx"
}
},
"validate": true,
"auto_exclude_current": false,
"choice_auto_accept": true,
"feed_overlap": false,
"auto_exclude_current": false,
"force_stream_order": true,
"instant_submit": false,
"feed_overlap": false,
"auto_count_stream": true,
"total_examples_target": 0,
"instructions": false,
"ui_lang": "en",
"project_info": ["dataset", "session", "lang", "recipe_name", "view_id", "label"],
"show_stats": true,
"hide_meta": true,
"show_flag": false,
"javascript": null,
"swipe": false,
"swipe_gestures": { "left": "accept", "right": "reject" },
"split_sents_threshold": false,
"writing_dir": "ltr",
"show_whitespace": true,
"exclude_by": "task",
"global_css": ".prodigy-content { text-align: left} .prodigy-content p{ text-align: right; } .prodigy-container {width: 500px} .prodigy-content, .c01185 {width: 100%} .c01133 { max-width: 2000px; width: 1300px}"
Data example:
{"id":0,"client":"xxx","text":aaa.","translation":"bbb.","engine":"ccc"}
output:
As you can see on the image, the records are repeated.
To perform the annotation excersice, I used the url + session:
http://host:XXXX/?session=user1
But honestly, I think it will it be difficult to reproduce the errors. I repeated the same experiment another time and everything went ok.
In addition, I see that there are records with no answer 'accept': []
Regards