Hello,
I have a custom recipe written that is just rel.manual. I want to extend this Dependencies and Relations · Prodigy · An annotation tool for AI, Machine Learning & NLP example to identify "span_labels", but cant find any examples or documentation on how to do so?
This is my recipe code so far:
@prodigy.recipe("prelabel_rel",
dataset=("The dataset to use", "positional", None, str),
spacy_model=("The base model", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string),
span_label=("One or more comma-separated labels", "option", "sl", split_string),
)
def prelabel_rel_recipe(dataset, spacy_model, source, label, span_label):
nlp = spacy.load(spacy_model)
stream = JSONL(source) # load the data
stream = prelabel_stream(stream) # add custom relations
stream = add_tokens(nlp, stream) # add "tokens" to stream
return {
"dataset": dataset, # dataset to save annotations to
"stream": stream, # the incoming stream of examples
"view_id": "relations", # annotation interface to use
"config": {
"labels": label, # labels to annotate
"span_labels": span_label
}
}
This is my code that wil add the span_labels:
def prelabel_stream(stream):
pattern_dict = build_prelabel_dict('./root/pattern_matching_rules.csv')
for eg in stream:
deps, heads = label_stream(eg["text"], pattern_dict)
yield eg
eg["relations"] = []
for i, (label, head) in enumerate(zip(deps, heads)):
# How do I go about identifying span_labels instead of labels?
eg["relations"].append({"child": i, "head": head, "label": label})
yield eg
This is my prodigy command:
prodigy prelabel_rel prelabel_test_deploy_one en_core_web_md ./root/data.jsonl -l Concept_Member,Contributes_To -sl base,type_of,change_direction,aspect_changing,to_whom,effect_size,confidence,where,when,predicate -F ./root/prelabel_rel.py
Thanks!