Hi, I'm trying to build an interface with two blocks, one displaying some html and the other displaying the actual ner.manual component.
However, the custom recipe always fails with the error blocks: extra fields are not permitted
Here are the details:
ner_checks.py -> I started with ner.manual as a base and modified it
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.components.preprocess import add_tokens
from prodigy.util import split_string
import spacy
from typing import List, Optional
# Recipe decorator with argument annotations: (description, argument type,
# shortcut, type / converter function called on value before it's passed to
# the function). Descriptions are also shown when typing --help.
@prodigy.recipe(
"ner.checks",
dataset=("The dataset to use", "positional", None, str),
spacy_model=("The base model", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string),
exclude=("Names of datasets to exclude", "option", "e", split_string),
)
def ner_checks(
dataset: str,
spacy_model: str,
source: str,
label: Optional[List[str]] = None,
exclude: Optional[List[str]] = None,
):
# Load the spaCy model for tokenization
nlp = spacy.load(spacy_model)
# Load the stream from a JSONL file and return a generator that yields a
# dictionary for each example in the data.
stream = JSONL(source)
# Tokenize the incoming examples and add a "tokens" property to each
# example. Also handles pre-defined selected spans. Tokenization allows
# faster highlighting, because the selection can "snap" to token boundaries.
stream = add_tokens(nlp, stream)
return {
"view_id": "blocks", # Annotation interface to use
"dataset": dataset, # Name of dataset to save annotations
"stream": stream, # Incoming stream of examples
"exclude": exclude, # List of dataset names to exclude
"config": { # Additional config settings, mostly for app UI
"lang": nlp.lang,
"labels": label, # Selectable label options
},
"blocks": [
{"view_id":"ner_manual"},
{"view_id":"html"}
]
}
data.jsonl
{"text": "some text","html": "some html"} ...
command
prodigy ner.checks test_dataset2 blank:en .\data.jsonl --label "A,B,C" -F .\ner_checks.py
Could you help me figure out why this isn't working?