I just trained some new models (from en_core_web_lg) and I am having trouble with the Language.evaluate method on them.
import spacy
from spacy.gold import GoldParse
import en_core_web_lg
def calculate_ent_offsets(test_ed):
char_offsets = []
for x in test_ed.ents:
char_offsets.append((x.start_char, x.end_char, x.label_))
return char_offsets
core_model = en_core_web_lg.load()
new_model = spacy.load("CU_new")
doc = next(core_model.entity.pipe([core_model.tokenizer("Where is Mary?")]))
model = core_model
doc_golds = [(model.tokenizer(str(doc)), GoldParse(doc, entities=calculate_ent_offsets(doc)))]
score = model.evaluate(doc_golds)
assert(new_model("Where is Mary?").to_json() == core_model("Where is Mary?").to_json())
print("OK")
model = new_model
doc_golds = [(model.tokenizer(str(doc)), GoldParse(doc, entities=calculate_ent_offsets(doc)))]
score = model.evaluate(doc_golds)
I get an error in the second case for unexpected keyword argument which doesn’t make sense to me because they should have the same class definition.
TypeError Traceback (most recent call last)
<ipython-input-1-0d3e786850a7> in <module>
26
27 doc_golds = [(model.tokenizer(str(doc)), GoldParse(doc, entities=calculate_ent_offsets(doc)))]
---> 28 score = model.evaluate(doc_golds)
29
30
/venv/lib64/python3.6/site-packages/spacy/language.py in evaluate(self, docs_golds, verbose, batch_size, scorer, component_cfg)
608 else:
609 docs = pipe.pipe(docs, **kwargs)
--> 610 for doc, gold in zip(docs, golds):
611 if verbose:
612 print(doc)
nn_parser.pyx in pipe()
/venv/lib64/python3.6/site-packages/spacy/util.py in minibatch(items, size)
455 while True:
456 batch_size = next(size_)
--> 457 batch = list(itertools.islice(items, int(batch_size)))
458 if len(batch) == 0:
459 break
nn_parser.pyx in pipe()
/venv/lib64/python3.6/site-packages/spacy/util.py in minibatch(items, size)
455 while True:
456 batch_size = next(size_)
--> 457 batch = list(itertools.islice(items, int(batch_size)))
458 if len(batch) == 0:
459 break
pipes.pyx in pipe()
/venv/lib64/python3.6/site-packages/spacy/util.py in minibatch(items, size)
455 while True:
456 batch_size = next(size_)
--> 457 batch = list(itertools.islice(items, int(batch_size)))
458 if len(batch) == 0:
459 break
/venv/lib64/python3.6/site-packages/spacy/language.py in <genexpr>(.0)
605 kwargs.setdefault("batch_size", batch_size)
606 if not hasattr(pipe, "pipe"):
--> 607 docs = (pipe(doc, **kwargs) for doc in docs)
608 else:
609 docs = pipe.pipe(docs, **kwargs)
nn_parser.pyx in spacy.syntax.nn_parser.Parser.__call__()
TypeError: __call__() got an unexpected keyword argument 'batch_size'
If I remove the sentencizer pipe from the language object, evaluate runs fine.