Sometimes, I get this rendered with latest prodigy
then when I try to scroll to the right it does not render
attaching my recipe verbatim
# https://prodi.gy/docs/custom-recipes
# autopep8: off
import prodigy
import pathlib
from typing import Generator, TypedDict
import spacy
from smashingalpha.match_patterns import match_all_in_order
from smashingalpha import bottom_up_patterns as bup
sm = spacy.load("en_core_web_sm")
# autopep8: on
equals_pats = [
('OF_ENT', bup.OF_ENT),
('OF_ENT_ACL', bup.OF_ENT_ACL),
('GERUND', bup.GERUND),
('QTY_VERB_UNITS', bup.QTY_VERB_UNITS),
('GERUND_2', bup.GERUND_2),
('QTY_AUX_ATTR', bup.QTY_AUX_ATTR),
('QTY_VERB', bup.QTY_VERB),
('QTY_AUX', bup.QTY_AUX),
('QTY_NUMMOD', bup.QTY_NUMMOD),
('ADP_QTY', bup.ADP_QTY),
]
def print_quote(QTY_chunk):
if ',' in QTY_chunk:
print(f'"{QTY_chunk}",', end='')
else:
print(f"{QTY_chunk},", end='')
def print_subtree_or_chunk(AMOUNT):
# first try chunk
if 'chunk' in AMOUNT:
tmp = AMOUNT['chunk'].text
print_quote(tmp)
elif 'subtree' in AMOUNT:
tmp = ' '.join([tok.text for tok in AMOUNT['subtree']])
print_quote(tmp)
else:
tmp = AMOUNT['tok'].text
print_quote(tmp)
# https://docs.python.org/3/library/typing.html#typing.TypedDict
class Example(TypedDict):
html: str
text: str
def load_my_custom_stream(source: str) -> Generator:
transcript = pathlib.Path(source).read_text(encoding='utf-8')
doc = sm(transcript)
labels = ['CARDINAL', 'MONEY', 'ORDINAL', 'PERCENT', 'QUANTITY']
for break_into_sents in doc.sents:
if len([tok.ent_type_ for tok in break_into_sents if tok.ent_type_ in labels]) > 0:
text = str(break_into_sents)
s = sm(text)
pat_matches = match_all_in_order(sm, equals_pats, s, s)
spans = []
relations = []
for pat_name, ms in pat_matches:
for m in ms:
if 'chunk' in m['QTY']:
# print_quote(m['QTY']['chunk'].text)
# print('chunk:',[t.i for t in m['QTY']['chunk'] ] )
qty_tokens = m['QTY']['chunk']
else:
# print(',', end='') # no chunk
qty_tokens = [m['QTY']['tok']]
# print_quote(m['QTY']['tok'].text)
head_span = {
"start": qty_tokens[0].idx,
"end": qty_tokens[-1].idx + len(qty_tokens[-1].text),
"token_start": qty_tokens[0].i,
"token_end": qty_tokens[-1].i,
"label": "QTY"
}
# print(head_span)
if 'chunk' in m['AMOUNT']:
amount_tokens = m['AMOUNT']['chunk']
# print_quote(tmp)
elif 'subtree' in m['AMOUNT']:
amount_tokens = m['AMOUNT']['subtree']
# print_quote(tmp)
else:
amount_tokens = [m['AMOUNT']['tok']]
child_span = {
"start": amount_tokens[0].idx,
"end": amount_tokens[-1].idx + len(amount_tokens[-1].text),
"token_start": amount_tokens[0].i,
"token_end": amount_tokens[-1].i,
"label": "AMOUNT"
}
# print(child_span)
spans.append(head_span)
spans.append(child_span)
rel = {
"head": head_span['token_end'], # 18,
"child": child_span['token_end'], # 43,
"head_span": head_span,
"child_span": child_span,
# "color": "#c5bdf4",
"label": "EQUALS"
}
# print(rel)
relations.append(rel)
tokens = [{"text": tok.text, "start": tok.idx, "end": tok.idx +
len(tok.text), "id": tok.i, "ws": tok.is_space} for tok in s]
ex = {"text": text,
"tokens": tokens,
"spans": spans,
"relations": relations,
# https://support.prodi.gy/t/difference-between-input-hash-and-task-hash/3220
"_input_hash": hash(text),
"_task_hash": hash(text), # XXX
}
yield ex
blocks = [
{"view_id": "relations"}
]
@prodigy.recipe(
"equals-pats-recipe",
dataset=("Dataset to save answers to", "positional", None, str),
source=("Source JSONL file", "option", "s", str)
)
# TODO remove view_id
def my_custom_recipe(dataset, source): # ="./et/ABG/2021-12-31.txt"):
# Load your own streams from anywhere you want
stream = load_my_custom_stream(source)
# https://support.prodi.gy/t/enabling-both-assign-relations-and-select-spans-in-custom-relations-recipe/3647/5?u=ysz
return {
"view_id": "blocks",
"dataset": dataset, # Name of dataset to save annotations
"stream": stream, # Incoming stream of examples
"config": {"blocks": blocks,
"labels": ["EQUALS"],
"relations_span_labels": ["QTY", "AMOUNT"],
"global_css": ".prodigy-container{max-width: unset;}"
},
}