Hi I tried creating a blank NER model (w/ the FOOD
label).
from prodigy.util import export_model_data
def _get_blank_ner_model():
from spacy.pipeline import TokenVectorEncoder
from spacy.pipeline import NeuralEntityRecognizer
from spacy.pipeline import SentenceSegmenter
from spacy.pipeline import NeuralDependencyParser
nlp = spacy.blank('en')
nlp.pipeline.append(TokenVectorEncoder(nlp.vocab))
nlp.pipeline.append(NeuralEntityRecognizer(nlp.vocab))
nlp.pipeline[-1].add_label('FOOD')
nlp.pipeline.append(SentenceSegmenter(nlp.vocab))
optimizer = nlp.begin_training(lambda: []) # The API here is admittedly a bit inconvenient
nlp.meta['name'] = 'some arbitrary name'
nlp = _get_blank_ner_model()
export_model_data('out', nlp, [], [])
Then I run the ner.batch-train
command from the blank NER model.
prodigy ner.batch-train DATASET_NAME out -o model_out -l FOOD
I get an error from the sentence segmentation (same error happens when I replace SentenceSegmenter
with NeuralDependencyParser
).
Loaded model here
Using 20% of examples (355) for evaluation
Traceback (most recent call last):
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/prodigy/__main__.py", line 235, in <module>
controller = recipe(*args, use_plac=True)
File "cython_src/prodigy/core.pyx", line 129, in prodigy.core.recipe.recipe_decorator.recipe_proxy
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/prodigy/recipes/ner.py", line 233, in batch_train
examples = list(split_sentences(model.orig_nlp, examples))
File "cython_src/prodigy/components/preprocess.pyx", line 7, in split_sentences
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/spacy/language.py", line 466, in pipe
for doc, context in izip(docs, contexts):
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/spacy/language.py", line 479, in pipe
for doc in docs:
File "/Users/apewu/writelab/prodigy/lib/python3.6/site-packages/spacy/language.py", line 588, in _pipe
func(doc)
TypeError: 'str' object is not callable
Exception ignored in: <generator object at 0x1141527b8>
Traceback (most recent call last):
File "cython_src/prodigy/components/preprocess.pyx", line 6, in genexpr
AttributeError: 'weakref' object has no attribute 'cline_in_traceback'
When I comment out the split_sentences
in the recipe, the command seems to work.
# examples = list(split_sentences(model.orig_nlp, examples))
# evals = list(split_sentences(model.orig_nlp, evals))
Thanks