Hello, I am trying to train a NER and I get the following error:
python -m prodigy train --ner ner_dataset --base-model en_core_web_trf model_output_trf
⚠ Aborting and saving the final best model. Encountered exception:
TypeError("'FullTransformerBatch' object is not iterable")
Traceback (most recent call last):
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/prodigy/__main__.py", line 54, in <module>
controller = recipe(*args, use_plac=True)
File "cython_src/prodigy/core.pyx", line 329, in prodigy.core.recipe.recipe_decorator.recipe_proxy
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/plac_core.py", line 367, in call
cmd, result = parser.consume(arglist)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/plac_core.py", line 232, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/prodigy/recipes/train.py", line 244, in train
return _train(
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/prodigy/recipes/train.py", line 172, in _train
spacy_train(nlp, output_path, use_gpu=gpu_id, stdout=stdout)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/training/loop.py", line 122, in train
raise e
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/training/loop.py", line 105, in train
for batch, info, is_best_checkpoint in training_step_iterator:
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/training/loop.py", line 224, in train_while_improving
score, other_scores = evaluate()
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/training/loop.py", line 281, in evaluate
scores = nlp.evaluate(dev_corpus(nlp))
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/language.py", line 1377, in evaluate
for doc, eg in zip(
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/util.py", line 1488, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 79, in pipe
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy/util.py", line 1507, in raise_error
raise e
File "spacy/pipeline/trainable_pipe.pyx", line 75, in spacy.pipeline.trainable_pipe.TrainablePipe.pipe
File "spacy/pipeline/tagger.pyx", line 111, in spacy.pipeline.tagger.Tagger.predict
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/thinc/model.py", line 315, in predict
return self._func(self, X, is_train=False)[0]
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/thinc/model.py", line 291, in __call__
return self._func(self, X, is_train=is_train)
File "/home/borlogh/anaconda3/envs/fb_spacy31/lib/python3.8/site-packages/spacy_transformers/layers/trfs2arrays.py", line 23, in forward
for trf_data in trf_datas:
TypeError: 'FullTransformerBatch' object is not iterable
I was able to fix the problem modifing the file spacy_transformers/layers/trfs2arrays.py adding the following code :
def forward(model: Model, trf_datas: List[TransformerData], is_train: bool):
pooling: Model[Ragged, Floats2d] = model.layers[0]
grad_factor = model.attrs["grad_factor"]
outputs = []
backprops = []
# NEW CODE - BEGIN
if not isinstance(trf_datas, list):
trf_datas = trf_datas.doc_data # FullTransformerBatch -> List[TransformerData]
# NEW CODE - END
for trf_data in trf_datas:
if len(trf_data.tensors) > 0:
t_i = find_last_hidden(trf_data.tensors)
tensor_t_i = trf_data.tensors[t_i]
if tensor_t_i.size == 0:
what do you think about what the real problem might be?
I am using prodigy 1.11.0a8 and python 3.8.10