I’d like to use a model with a custom component that applies a spacy Matcher
, but I’m running into an error that I can’t make sense of. Is there a correct way to do this? Thanks in advance.
Starting from the recipe in the NER workflow, I import split_sentences
from prodigy.components.preprocess
instead of prodigy.preprocess
, and then add the lines marked by comments.
import spacy
from prodigy import recipe, get_stream
from prodigy.components.sorters import prefer_uncertain
from prodigy.models.ner import EntityRecognizer
from prodigy.components.preprocess import split_sentences
from spacy.matcher import Matcher # Added
@recipe('ner.teach',
dataset=("Dataset ID", "positional"),
spacy_model=("Loadable spaCy model (for tokenization)"),
source=("Source data (file path or API query)"),
api=("Optional API loader to use", "option", "a", str),
loader=("Optional file loader to use", "option", "lo", str),
label=("Label to annotate", "option", "l", str),
# Works when False; breaks when True
match=("Try to include the custom component", "flag", "m", bool)
)
def teach(dataset, spacy_model, source, api=None, loader=None, label='', match=False):
"""Annotate texts to train a NER model"""
nlp = spacy.load(spacy_model)
if match: # Added
# Try to add a custom component that instantiates a Matcher
do_nothing = DoNothing(nlp.vocab)
nlp.add_pipe(do_nothing, last=True)
model = EntityRecognizer(nlp, label=label)
stream = get_stream(source, api, loader)
stream = split_sentences(model.orig_nlp, stream)
return {
'dataset': dataset,
'view_id': 'ner',
'stream': prefer_uncertain(model(stream), bias=0.8),
'update': model.update,
'config': {'lang': nlp.lang, 'label': label}
}
class DoNothing(object):
"""A custom component that uses a Matcher."""
name = 'do_nothing'
def __init__(self, vocab):
self.matcher = Matcher(vocab)
def __call__(self, doc):
return doc
Invoking this recipe with the -m
flag to put the custom component in the processing pipeline gives:
Traceback (most recent call last):
File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/james/.virtualenvs/pipeline/lib/python3.6/site-packages/prodigy/__main__.py", line 248, in <module>
controller = recipe(*args, use_plac=True)
File "cython_src/prodigy/core.pyx", line 150, in prodigy.core.recipe.recipe_decorator.recipe_proxy
File "/home/james/.virtualenvs/pipeline/lib/python3.6/site-packages/plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "/home/james/.virtualenvs/pipeline/lib/python3.6/site-packages/plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "mre.py", line 25, in teach
model = EntityRecognizer(nlp, label=label)
File "cython_src/prodigy/models/ner.pyx", line 143, in prodigy.models.ner.EntityRecognizer.__init__
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 150, in deepcopy
y = copier(x, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 150, in deepcopy
y = copier(x, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 215, in _deepcopy_list
append(deepcopy(a, memo))
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 150, in deepcopy
y = copier(x, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 220, in _deepcopy_tuple
y = [deepcopy(a, memo) for a in x]
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 220, in <listcomp>
y = [deepcopy(a, memo) for a in x]
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 150, in deepcopy
y = copier(x, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "/home/james/.virtualenvs/pipeline/lib64/python3.6/copy.py", line 274, in _reconstruct
y = func(*args)
File "matcher.pyx", line 195, in spacy.matcher.Matcher.__init__
TypeError: __init__() takes exactly 1 positional argument (2 given)
Prodigy info:
Version 1.2.0
Location /home/james/.virtualenvs/pipeline/lib/python3.6/site-packages/prodigy
Prodigy Home /home/james/.prodigy
Platform Linux-4.14.13-300.fc27.x86_64-x86_64-with-fedora-27-Twenty_Seven
Python Version 3.6.3
Database Name PostgreSQL
Database Id postgresql
Total Datasets 6
Total Sessions 67
Spacy info:
spaCy version 2.0.5
Location /home/james/.virtualenvs/pipeline/lib/python3.6/site-packages/spacy
Platform Linux-4.14.13-300.fc27.x86_64-x86_64-with-fedora-27-Twenty_Seven
Python version 3.6.3
Models en