Hi folks. Really excited to try out the new IAA metrics recipes, but unfortunately I'm banging my head up against some issues.
I tried to chase the issue down in the source, but the error being thrown is within the pre-compiled source I can't access.
m = IaaDoc(annotation_type=annotation_type, labels=labels, annotators=annotators)
stream = get_stream(source, loader=loader, rehash=False)
try:
m.measure(stream)
except MetricError as e:
msg.fail(e.msg, exits=True)
I'm using customized recipes to store the annotations for which I'd like the metrics, and unfortunately the dataset is sensitive. I could possibly try to give you a tiny subset with just a couple of examples annotated by just a pair of annotators, if that's necessary?
I'm using a bunch of different LLMs (local and APIs) as annotators to help me rapidly iterate on a labelling scheme. I updated my custom recipes to ensure the "view_id" was being set, and the local workflow is in a happy place for using my slightly-customized "review" recipe.
Is it helpful for me to share the annotation recipe? It's as follows -
from typing import Iterable, Union
from prodigy.cli import serve
from prodigy.components.filters import filter_seen_before
from prodigy.components.preprocess import make_textcat_suggestions, add_tokens, add_annot_name
from prodigy.components.stream import get_stream, Stream
from prodigy.core import recipe, Arg, connect
from prodigy.util import ANNOTATOR_ID_ATTR
from spacy import Language
from spacy.lang.en import English
from lib.complaints import get_complaint_labels
from lib.explicit_langchain_model import AvailableModels, component_name
from lib.utils import chunk_stream, datafile
def add_view_info(stream: Stream):
for example in stream:
config = dict.get(example, 'config', dict())
config['choice_style'] = "multiple"
example['_view_id'] = "choice"
example['config'] = config
yield example
@recipe(
"textcat.explicit_langchain_model_annotate",
dataset=Arg(help="Dataset to save annotations to"),
source=Arg(help="Data to annotate (file path or '-' to read from standard input)"),
cpp_filename=Arg(help="GGUF format model filename saved to LLM directory"),
model_alias=Arg(help="Annotator alias on behalf of the model"),
)
def textcat_explicit_langchain_model_annotate(
dataset: str,
source: Union[str, Iterable[dict]],
cpp_filename: str,
model_alias: str,
) -> None:
stream = get_stream(source, api=None, loader=None, rehash=True, input_key="text")
component = "llm"
nlp: Language = English()
nlp.add_pipe(factory_name="explicit_langchain_model",
name=component_name,
config={'cpp_filename': cpp_filename})
labels = get_complaint_labels()
db = connect()
if dataset in db.datasets:
already_annotated = (
ex
for ex in db.iter_dataset_examples(dataset)
if ex[ANNOTATOR_ID_ATTR] == model_alias
)
stream.apply(filter_seen_before, stream=stream, cache_stream=already_annotated)
chunk_size = 50
for chunk in chunk_stream(stream, chunk_size):
chunked_stream = get_stream(chunk, api=None, loader=None, rehash=True, input_key="text")
chunked_stream.apply(
make_textcat_suggestions,
stream=chunked_stream,
nlp=nlp,
component=component,
threshold=0.5,
labels=labels,
show_progress_bar=True,
progress_bar_total=chunk_size,
)
chunked_stream.apply(add_tokens, nlp=nlp, stream=chunked_stream)
chunked_stream.apply(add_annot_name, name=model_alias)
chunked_stream.apply(add_view_info)
db.add_dataset(model_alias, session=True)
db.add_examples(chunked_stream, [dataset, model_alias])
source_examples = datafile('complaints.jsonl')
save_to_dataset = 'complaints'
model_file = AvailableModels.speechless_13b()
cli_command = (f'prodigy textcat.explicit_langchain_model_annotate '
f'{save_to_dataset} {source_examples} {model_file} {model_file}')
serve(cli_command)
It's really only customized to make it friendlier to local models / output parsers, and to ensure it saves its progress more frequently than the bundled equivalent recipe.
The error was raised with a brand new prodigy database, with a couple of hundred model-annotated examples, by 2 diffferent local LLMs, so it's pretty fast for me to test tweaks - it's more that I'm not sure how much I can do on my own without a clearer idea of what the recipe is trying to achieve when it throws this error.
Thanks for the kickass product, by the way. Prodigy is awesome, and I'm not really blocked by this - I just wanted to see if the metrics can help me drive the labelling scheme and model improvements.