OKAY, I figured out how to accomplish this!
First I had to check out the builtin recipie which can be found by doing this: python -c "import prodigy;print(prodigy.__file__)"
and can be found here
I literally took the textcat.manual part of the textcat.py file and modified it to
- add 2 components to the block (the actual text to annotate and the input box)
- include the blocks
- add the block as the view id
Here is what my code looks like:
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.models.textcat import TextClassifier
from prodigy.models.matcher import PatternMatcher
from prodigy.components.sorters import prefer_uncertain
from prodigy.util import combine_models, split_string, get_labels, log
from prodigy.components.loaders import get_stream
from prodigy.components.preprocess import add_label_options, add_labels_to_stream
from prodigy.types import TaskType, StreamType, RecipeSettingsType
from typing import List, Optional, Union, Iterable
import spacy
from typing import List, Optional
@prodigy.recipe(
"textcat.manual.BOX",
# fmt: off
dataset=("Dataset to save annotations to", "positional", None, str),
source=("Data to annotate (file path or '-' to read from standard input)", "positional", None, str),
loader=("Loader (guessed from file extension if not set)", "option", "lo", str),
label=("Comma-separated label(s) to annotate or text file with one label per line", "option", "l", get_labels),
exclusive=("Treat classes as mutually exclusive (if not set, an example can have multiple correct classes)", "flag", "E", bool),
exclude=("Comma-separated list of dataset IDs whose annotations to exclude", "option", "e", split_string),
# fmt: on
)
def manual(
dataset: str,
source: Union[str, Iterable[dict]],
loader: Optional[str] = None,
label: Optional[List[str]] = None,
exclusive: bool = False,
exclude: Optional[List[str]] = None,
) -> RecipeSettingsType:
"""
Manually annotate categories that apply to a text. If more than one label
is specified, categories are added as multiple choice options. If the
--exclusive flag is set, categories become mutually exclusive, meaning that
only one can be selected during annotation.
"""
log("RECIPE: Starting recipe textcat.manual", locals())
labels = label
if not labels:
msg.fail("textcat.manual requires at least one --label", exits=1)
has_options = len(labels) > 1
log(f"RECIPE: Annotating with {len(labels)} labels", labels)
stream = get_stream(
source, loader=loader, rehash=True, dedup=True, input_key="text"
)
blocks = [
{"view_id": "choice" if has_options else "classification"},
{"view_id": "text_input", "field_rows": 3, "field_label": "Explain your decision"}
]
if has_options:
stream = add_label_options(stream, label)
else:
stream = add_labels_to_stream(stream, label)
if exclusive:
# Use the dataset to decide what's left to annotate
db = connect()
if dataset in db:
stream = filter_accepted_inputs(db.get_dataset(dataset), stream)
return {
#"view_id": "choice" if has_options else "classification",
"view_id": "blocks",
"dataset": dataset,
"stream": stream,
"exclude": exclude,
"config": {
"labels": labels,
"choice_style": "single" if exclusive else "multiple",
"choice_auto_accept": exclusive,
"exclude_by": "input" if has_options else "task",
"auto_count_stream": True,
"blocks": blocks,
},
}