Hello everyone! I have built my custom recipe for prodigy. It took a list of sentences and a list of questions, and prodigy render them combined. However, something strange happen time by time. After a couple of annotations system says "No tasks avaible", but if I reload the page it lets me annotate again. Furthermore, they are sentences different from the previous ones. I can't figure the problem out. What am I misunderstanding?
This is the code of my recipe:
import prodigy
from prodigy.components.preprocess import add_tokens
from prodigy.components.db import connect
from prodigy import set_hashes
from utils import constant
import spacy
import json
@prodigy.recipe("liwi")
def fak_facts_ner(dataset, lang="de", input_file=("File to input", "positional", None, str)):
input_file = "".join(input_file)
def get_stream():
db = connect()
with open(input_file, 'r') as json_file:
while True:
json_list = list(json_file)
for fact in json_list:
result = json.loads(fact)
for question, options in constant.options.items():
hashes_in_dataset = db.get_task_hashes(dataset)
next_value = set_hashes(
{
"text": result["text"],
"meta": result["meta"],
"label": question,
"options": options
}
)
if next_value["_task_hash"] not in hashes_in_dataset:
yield next_value
nlp = spacy.blank(lang) # blank spaCy pipeline for tokenization
stream = get_stream() # set up the stream
stream = add_tokens(nlp, stream) # tokenize the stream for ner_manual
#stream = list(stream)
return {
"dataset": dataset, # the dataset to save annotations to
"view_id": "blocks", # set the view_id to "blocks"
"stream": stream, # the stream of incoming examples,
"config": {
"labels": [],
"blocks": constant.blocks, # add the blocks to the config
"choice_style": "multiple"
}
}