Offering a bit more info about what is working, and what is not working with v1.14.4
. I was able to get various span recipes (3 out of 17, meaning I am not able to run 14 of my custom recipes with v1.14.4
) working with the fix Vincent mentioned above. I have not tried v1.14.5
yet.
Here's an example of a custom recipe that I am able to run with the latest update:
stt_spans.py
-- in this recipe, I edited this code, and it now runs:
CLI:
PRODIGY_ALLOWED_SESSIONS=cheyanne prodigy stt-spans stt_spans_install_test blank:en /Users/cheyannebaird/posh/stt_error_validation/final_annotated_output/final/joel_updated_output/diff_with_raw/stt_error_validation_joel.jsonl -F /Users/cheyannebaird/posh/annotation-service/src/annotator/recipes/stt_spans.py --label STT_ERROR
Recipe:
Edits made:
nlp = spacy.load(spacy_model)
result = prodigy_spans_manual(dataset, nlp, source, label=label)
import json
import prodigy
from typing import Dict, Generator, Iterable, List, Optional, Union
import spacy
from prodigy.components.loaders import get_stream
from prodigy.components.loaders import JSONL
from prodigy.components.preprocess import add_tokens
from prodigy.core import recipe
from prodigy.recipes.spans import manual as prodigy_spans_manual
from prodigy.types import RecipeSettingsType
from prodigy.util import get_labels
@prodigy.recipe(
"stt-spans",
dataset=("dataset to save annotations to", "positional", None, str),
spacy_model=(
"spaCy pipeline for tokenization (e.g. blank:en)", "positional", None,
str
),
source=("file path with data to annotate", "positional", None, str),
loader=(
"loader (guessed from file extension if not set)", "option", "lo",
str
),
label=("comma-separated label(s) to annotate", "option", "l", get_labels)
)
def manual(
dataset: str,
spacy_model: str,
source: Union[str, Iterable[dict]],
loader: Optional[str] = None,
label: Optional[List[str]] = None
) -> RecipeSettingsType:
"""
Overrides the built-in Prodigy recipe to be able to present the data to the
annotators with our custom view_id and stream configuration.
"""
nlp = spacy.load(spacy_model)
stream = add_tokens(
nlp,
get_stream(
source, loader=loader, rehash=True, dedup=True, input_key="text"
),
use_chars=False
)
nlp = spacy.load(spacy_model)
result = prodigy_spans_manual(dataset, nlp, source, label=label)
#result = prodigy_spans_manual(dataset, spacy_model, source, label=label)
result.update(
stream=list(stream),
view_id="blocks",
config={
"lang": nlp.lang,
"blocks": [
{"view_id": "spans_manual"},
{"view_id": "text_input", "field_label": "1st STT Error", "field_id": "1st_stt_error"},
{"view_id": "text_input", "field_label": "2nd STT Error", "field_id": "2nd_stt_error"},
{"view_id": "text_input", "field_label": "3rd STT Error", "field_id": "3rd_stt_error"},
{"view_id": "html", "html_template": "<div style=\"padding: 0 10px; border: 1px solid #ddd; border-radius: 4px; text-align: left;\">" +
"<label style=\"font-size: 12px; font-weight: bold; opacity: 0.75; margin-bottom: 10px;\">deployment</label>" +
"<div style=\"max-height: 300px; overflow-y: scroll; margin-bottom: 10px; margin: 0; padding: 0;\">{{deployment}}</div>" +
"</div>"
},
{"view_id": "html", "html_template": "<div style=\"padding: 0 10px; border: 1px solid #ddd; border-radius: 4px; text-align: left;\">" +
"<a href=\"https://app.poshdevelopment.com/chatlogs?chatId={{chat_id}}\" target=\"_blank\" style=\"margin-bottom: 10px;\">View Full Chat</a>" +
"</div>"
},
{"view_id": "text_input", "field_label": "notes"}
],
"labels": label,
"exclude_by": "input",
"ner_manual_highlight_chars": False,
"auto_count_stream": True
}
)
return result
This is a custom recipe that I am not able to get working:
eup_corpus_validation_with_options.py
:
CLI:
PRODIGY_ALLOWED_SESSIONS=cheyanne prodigy eup-corpus-validation-with-options eup_val_options_install_test /Users/cheyannebaird/posh/eup_corpus/eup_task_april2023/final_final/eup_low_scoring_data_v1.jsonl -F /Users/cheyannebaird/posh/annotation-service/src/annotator/recipes/eup_corpus_validation_with_options.py
Error:
the following arguments are required: --label-field, --choice-field
Recipe:
import json
from typing import Dict, Generator
import prodigy
from prodigy.components.loaders import JSONL
from annotator.recipes.utils import DIFFERENT_EUP
UNLIMITED_ROWS = [
{"view_id": "html", "html_template":
"<div style=\"padding: 0 10px; border: 1px solid #ddd; border-radius: 4px; text-align: center;\">" +
"<div style=\"max-height: 300px; font-size: 30px; overflow-y: scroll; margin-bottom: 10px;\">{{message}}" +
"</div>"
},
{"view_id": "choice"},
{"view_id": "html", "html_template": "<div style=\"padding: 0 10px; border: 1px solid #ddd; border-radius: 4px; text-align: left;\">" +
"<label style=\"font-size: 12px; font-weight: bold; opacity: 0.75; margin-bottom: 10px;\"></label>" +
"<a href=\"https://coda.io/d/Research-Notes_dbuGQfaDy5r/EUP-Tree_suVEG#_lu8Uk\" target=\"_blank\" style=\"margin-bottom: 10px;\">EUP Tree</a>" +
"</div>"
},
{
"view_id": "html",
"html_template":
"<style>" +
".checkbox-container {" +
"float: left;" +
"margin-right: 25px;" +
"}" +
"</style>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='ambig' name='ambig' value='Ambig' data-id='{{chat_id}}' onchange='updateAmbig()'>" +
"<label for='ambig' style='margin-left: 5px;'>Ambig</label>" +
"</div>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='contextual' name='contextual' value='Contextual' data-id='{{chat_id}}' onchange='updateContextual()'>" +
"<label for='contextual' style='margin-left: 5px;'>Contextual</label>" +
"</div>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='multi' name='multi' value='Multi-EUP' data-id='{{chat_id}}' onchange='updateMulti()'>" +
"<label for='multi' style='margin-left: 5px;'>Multi-EUP</label>" +
"</div>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='partial' name='partial' value='Partial' data-id='{{chat_id}}' onchange='updatePartial()'>" +
"<label for='partial' style='margin-left: 5px;'>Partial</label>" +
"</div>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='side_conversation' name='side_conversation' value='Side Conversation' data-id='{{chat_id}}' onchange='updateSideConversation()'>" +
"<label for='side_conversation' style='margin-left: 5px;'>Side Conversation</label>" +
"</div>" +
"<div class='checkbox-container'>" +
"<input type='checkbox' id='stt_error' name='stt_error' value='STT Error' data-id='{{chat_id}}' onchange='updateSttError()'>" +
"<label for='stt_error' style='margin-left: 5px;'>STT Error</label>" +
"</div>"
},
{"view_id": "text_input", "field_label": "Proposed EUP(s)", "field_id": "proposedEUP"},
{"view_id": "text_input", "field_label": "notes"}
]
def add_options(
stream, label_field="eup", choices=DIFFERENT_EUP
) -> Generator[Dict, None, None]:
"""
Convert each line in the ``stream`` to a ``task`` with a text and an
options field
:param stream: the input stream
:param label_field: key; defaults to "label"
:param choices: the different choices
:yield: a task Dict with text and options
"""
for line in stream:
if label_field in line:
options = json.loads(line[label_field])
else:
options = []
for word in (options + choices):
if word not in options:
options.append(word)
task = {
"eup": line["eup"],
"message": line["message"],
"ambig": False,
"contextual": False,
"multi": False,
"partial": False,
"side_conversation": False,
"stt_error": False,
"options": [
{"id": o, "deployment": o, "prompt": o,
"text": o} for o in options
]
}
yield task
@prodigy.recipe(
"eup-corpus-validation-with-options",
dataset=("The dataset to save to", "positional", None, str),
file_path=("Path to texts", "positional", None, str),
label_field=("Label to use for the accept task", "option", "f", str),
choice_field=("Choices to add to the input", "option", "c", str)
)
def custom_labels(dataset, file_path, label_field, choice_field):
"""
Annotate the text with labels from the list from the ``label_field`` in
the input file. Augmented with choices from ``choice_field``.
"""
blocks = UNLIMITED_ROWS
stream = JSONL(file_path)
stream = add_options(stream) # add options to each task
javascript = """
// Set ambig to false by default
prodigy.update({ ambig: false });
function updateAmbig() {
prodigy.update({ ambig: document.getElementById('ambig').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset ambig to false
prodigy.update({ ambig: false });
document.getElementById('ambig').checked = false;
});
// Set contextual to false by default
prodigy.update({ contextual: false });
function updateContextual() {
prodigy.update({ contextual: document.getElementById('contextual').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset contextual to false
prodigy.update({ contextual: false });
document.getElementById('contextual').checked = false;
});
// Set multi to false by default
prodigy.update({ multi: false });
function updateMulti() {
prodigy.update({ multi: document.getElementById('multi').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset multi to false
prodigy.update({ multi: false });
document.getElementById('multi').checked = false;
});
// Set partial to false by default
prodigy.update({ partial: false });
function updatePartial() {
prodigy.update({ partial: document.getElementById('partial').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset partial to false
prodigy.update({ partial: false });
document.getElementById('partial').checked = false;
});
// Set side_conversation to false by default
prodigy.update({ side_conversation: false });
function updateSideConversation() {
prodigy.update({ side_conversation: document.getElementById('side_conversation').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset side_conversation to false
prodigy.update({ side_conversation: false });
document.getElementById('side_conversation').checked = false;
});
// Set stt_error to false by default
prodigy.update({ stt_error: false });
function updateSttError() {
prodigy.update({ stt_error: document.getElementById('stt_error').checked });
}
document.addEventListener('prodigyanswer', (event) => {
// Reset stt_error to false
prodigy.update({ stt_error: false });
document.getElementById('stt_error').checked = false;
});
"""
return {
"dataset": dataset,
"view_id": "blocks",
"stream": list(stream),
"config": {
"blocks": blocks,
"javascript": javascript
}
}
With the recipe that is not working with the latest update, I did add dummy labels and choice to the call / command line, and then I was able to launch the recipe... but why is this required? This will require us to make an update to our internal annotation-service if these fields are required, so I'm trying to understand this requirement.
PRODIGY_ALLOWED_SESSIONS=cheyanne prodigy eup-corpus-validation-with-options eup_val_options_install_test /Users/cheyannebaird/posh/eup_corpus/eup_task_april2023/final_final/eup_low_scoring_data_v1.jsonl -F /Users/cheyannebaird/posh/annotation-service/src/annotator/recipes/eup_corpus_validation_with_options.py --label foo,bar --choice-field foo,bar