I have 3 labels in my task and currently users can annotate multiple words with the same/different labels. I want to set a restriction such that only one label can ever be selected per annotation. Is this possible?
Hi! It sounds like this might be a good use case for the validate_answer
callback, which is a custom function you can provide in your recipe to define what's considered a "valid" submission: https://prodi.gy/docs/custom-recipes#validate_answer
It's called on every example and receives the annotated JSON, so you can add a condition to raise an error if multiple labels are used in the "spans"
, or whatever other logic you want to implement. Any error raised in the function is shown to the annotator in the UI as an alert and they'll only be able to submit the answer once the problem is resolved.
Thanks Ines. I've included the below code to my existing recipe (which is basically just ner.manual with an added text_input), however it doesn't seem to trigger a response.
def validate_answer(eg):
spans = eg.get("spans", [])
assert 1 <= len(spans) <= 1, "Select only 1 label, otherwise ignore and add comment."
Full recipe below:
from typing import List, Optional
import spacy
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.components.preprocess import add_tokens
from prodigy.models.matcher import PatternMatcher
from prodigy.util import split_string
def remove_tokens(answers):
for eg in answers:
del eg["tokens"]
if "spans" in eg:
for span in eg["spans"]:
del span["token_start"]
del span["token_end"]
return answers
@prodigy.recipe(
"ner.manual",
dataset=("The dataset to use", "positional", None, str),
spacy_model=("The base model", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string),
patterns=("The match patterns file","option","p",str),
exclude=("Names of datasets to exclude", "option", "e", split_string),
highlight_chars=("Allow for highlighting individual characters instead of tokens", "flag", "C", bool),
)
def ner_manual(
dataset: str,
spacy_model: str,
source: str,
label: Optional[List[str]] = None,
patterns: Optional[str] = None,
exclude: Optional[List[str]] = None,
highlight_chars: bool = False,
):
blocks = [
{"view_id": "ner_manual"},
{"view_id":
"text_input",
"field_id":"user_input",
"field_rows": 1,
"field_placeholder": "Optional comment",
"field_autofocus": "false",
"field_suggestions": ["Ambiguous—could be construct or absolute","Note intervening particle"]},
]
nlp = spacy.load(spacy_model)
stream = JSONL(source)
if patterns is not None:
pattern_matcher = PatternMatcher(nlp, combine_matches=True, all_examples=True)
pattern_matcher = pattern_matcher.from_disk(patterns)
stream = (eg for _,eg in pattern_matcher(stream))
stream = add_tokens(nlp, stream, use_chars=highlight_chars)
def validate_answer(eg):
spans = eg.get("spans", [])
assert 1 <= len(spans) <= 1, "Select only 1 label, otherwise ignore and add comment."
return {
"view_id": "ner_manual", # Annotation interface to use
"dataset": dataset, # Name of dataset to save annotations
"view_id":"blocks",
"stream": stream, # Incoming stream of examples
"exclude": exclude, # List of dataset names to exclude
"before_db": remove_tokens if highlight_chars else None,
"config": { # Additional config settings, mostly for app UI
"lang": nlp.lang,
"labels": label, # Selectable label options
"blocks": blocks,
},
}
I think you just forgot to actually return the validate_answer
function by your recipe – if you add the following to the dictionary returned by the function, it should be included:
"validate_answer": validate_answer
Got it, thank you Ines! =D