I noticed that validate answer doesn't return the session id for multiuser is there a way to return it?
Hi Adrian,
it's a bit unclear to me what exactly you tried to achieve and what went wrong. Could you share the recipe that you ran, what you expected to see and what went wrong? If you could share any relevant configurations in prodigy.json
that'd be helpful too.
# Does not receive session id
answers = []
def countAnswer(answer):
if len(answers) >= 5:
raise ValueError("Limit Reached")
answers.append(answer)
dataset = dataset
view_id = "ner_manual"
stream = stream
update = None
db = None
progress = None
on_load = lambda i: print("On Load!")
on_exit = lambda i: print("On Exit!")
before_db = remove_tokens
validate_answer = countAnswer
get_session_id = None
exclude = None
config = {
"labels": label
}
ctrl = Controller(dataset, view_id, stream, update, db,
progress, on_load, on_exit, before_db,
validate_answer, get_session_id, exclude,
config, None)
return ctrl
Is it possible to return a session id to get user dataset info, because the return doesn't include this. I was thinking of changing the controller but this is not the best idea.
Could you share the full recipe? It seems like you're only sending a part of the Python script. Also, could you share the command that you used to run the recipe?
from typing import List, Optional
from requests import session
import spacy
import prodigy
from prodigy.components.loaders import JSONL
from prodigy.components.preprocess import add_tokens
from prodigy.models.matcher import PatternMatcher
from prodigy.util import split_string
from prodigy.core import Controller
from prodigy.components.db import connect
def remove_tokens(answers):
for eg in answers:
del eg["tokens"]
if "spans" in eg:
for span in eg["spans"]:
del span["token_start"]
del span["token_end"]
return answers
@prodigy.recipe(
"entity_curation",
dataset=("The dataset to use", "positional", None, str),
spacy_model=("The base model", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
label=("One or more comma-separated labels", "option", "l", split_string),
patterns=("The match patterns file", "option", "p", str),
n_examples=(
"Number of examples to randomly review, -1 for all",
"option",
"n",
int,
),
exclude=("Names of datasets to exclude", "option", "e", split_string),
highlight_chars=(
"Allow for highlighting individual characters instead of tokens",
"flag",
"C",
bool,
),
)
def entity_curation(
dataset: str,
spacy_model: str,
n_examples: int,
source: str,
label: Optional[List[str]] = None,
patterns: Optional[str] = None,
exclude: Optional[List[str]] = None,
highlight_chars: bool = False,
):
answered = set()
db = connect()
nlp = spacy.load(spacy_model)
stream = JSONL(source)
def validate_answer(answer):
answered.add(answer["_input_hash"])
if len(answered) == n_examples:
raise ValueError(
f'You have reacher {n_examples-1} examples, please save and exit')
if patterns is not None:
pattern_matcher = PatternMatcher(
nlp, combine_matches=True, all_examples=True)
pattern_matcher = pattern_matcher.from_disk(patterns)
stream = (eg for _, eg in pattern_matcher(stream))
stream = add_tokens(nlp, stream, use_chars=highlight_chars)
dataset = dataset
view_id = "ner_manual"
stream = stream
update = None
db = None
progress = None
on_load = lambda i: print("On Load!")
on_exit = lambda i: print("On Exit!")
before_db = remove_tokens
validate_answer = validate_answer
get_session_id = None
exclude = None
config = {
"labels": label
}
ctrl = Controller(dataset, view_id, stream, update, db,
progress, on_load, on_exit, before_db,
validate_answer, get_session_id, exclude,
config, None)
return ctrl
@ines is this possible?
hi @a.arranz!
Have you seen this post?
Unfortunately since the session ID isn't assigned until execution, there's no way to access it within the recipe. However, the post outlines how to get it from separate instances of Prodigy.
Hope this helps!