Adding a text box to a recipe

OKAY, I figured out how to accomplish this!
First I had to check out the builtin recipie which can be found by doing this: python -c "import prodigy;print(prodigy.__file__)" and can be found here

I literally took the textcat.manual part of the textcat.py file and modified it to

  1. add 2 components to the block (the actual text to annotate and the input box)
  2. include the blocks
  3. add the block as the view id

Here is what my code looks like:

import prodigy
from prodigy.components.loaders import JSONL
from prodigy.models.textcat import TextClassifier
from prodigy.models.matcher import PatternMatcher
from prodigy.components.sorters import prefer_uncertain
from prodigy.util import combine_models, split_string, get_labels, log
from prodigy.components.loaders import get_stream
from prodigy.components.preprocess import add_label_options, add_labels_to_stream
from prodigy.types import TaskType, StreamType, RecipeSettingsType
from typing import List, Optional, Union, Iterable
import spacy
from typing import List, Optional
@prodigy.recipe(
    "textcat.manual.BOX",
    # fmt: off
    dataset=("Dataset to save annotations to", "positional", None, str),
    source=("Data to annotate (file path or '-' to read from standard input)", "positional", None, str),
    loader=("Loader (guessed from file extension if not set)", "option", "lo", str),
    label=("Comma-separated label(s) to annotate or text file with one label per line", "option", "l", get_labels),
    exclusive=("Treat classes as mutually exclusive (if not set, an example can have multiple correct classes)", "flag", "E", bool),
    exclude=("Comma-separated list of dataset IDs whose annotations to exclude", "option", "e", split_string),
    # fmt: on
)
def manual(
    dataset: str,
    source: Union[str, Iterable[dict]],
    loader: Optional[str] = None,
    label: Optional[List[str]] = None,
    exclusive: bool = False,
    exclude: Optional[List[str]] = None,
) -> RecipeSettingsType:
    """
    Manually annotate categories that apply to a text. If more than one label
    is specified, categories are added as multiple choice options. If the
    --exclusive flag is set, categories become mutually exclusive, meaning that
    only one can be selected during annotation.
    """
    
    log("RECIPE: Starting recipe textcat.manual", locals())
    labels = label
    if not labels:
        msg.fail("textcat.manual requires at least one --label", exits=1)
    has_options = len(labels) > 1
    log(f"RECIPE: Annotating with {len(labels)} labels", labels)
    stream = get_stream(
        source, loader=loader, rehash=True, dedup=True, input_key="text"
    )
    blocks = [
        {"view_id": "choice" if has_options else "classification"},
        {"view_id": "text_input", "field_rows": 3, "field_label": "Explain your decision"}
    ]
    if has_options:
        stream = add_label_options(stream, label)
    else:
        stream = add_labels_to_stream(stream, label)
        if exclusive:
            # Use the dataset to decide what's left to annotate
            db = connect()
            if dataset in db:
                stream = filter_accepted_inputs(db.get_dataset(dataset), stream)

    return {
        #"view_id": "choice" if has_options else "classification",
        "view_id": "blocks", 
        "dataset": dataset,
        "stream": stream,
        "exclude": exclude,
        "config": {
            "labels": labels,
            "choice_style": "single" if exclusive else "multiple",
            "choice_auto_accept": exclusive,
            "exclude_by": "input" if has_options else "task",
            "auto_count_stream": True,
            "blocks": blocks,
        },
    }
1 Like