(Let me know if I should switch this discussion to Github issues.) I’m following your advice, @honnibal, to set the dependency and POS tags to “” or 0 so I can use the full spaCy JSON format to train an NER system. When I do spacy train
with --no-tagger --no-parser
, I get this error:
Traceback (most recent call last):
File "/Users/ahalterman/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/Users/ahalterman/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/spacy/__main__.py", line 31, in <module>
plac.call(commands[command])
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/spacy/cli/train.py", line 130, in train
scorer = nlp_loaded.evaluate(dev_docs)
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/spacy/language.py", line 472, in evaluate
scorer.score(doc, gold, verbose=verbose)
File "/Users/ahalterman/anaconda3/lib/python3.6/site-packages/spacy/scorer.py", line 91, in score
for annot in gold.orig_annot]))
File "gold.pyx", line 31, in spacy.gold.tags_to_entities
AssertionError: ['B-ORG', 'I-ORG', 'L-ORG', 'O', 'U-GPE', 'O', 'B-DATE', 'L-DATE', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'L-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'U-ORDINAL', 'O', 'O', 'O', 'U-ORDINAL', 'B-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'L-ORG', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'L-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'B-GPE', 'I-GPE', 'L-GPE', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'O', 'B-GPE', 'L-GPE', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'O', 'B-GPE', 'I-GPE', 'L-GPE', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'L-DATE', 'O', 'B-GPE', 'L-GPE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'U-DATE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'U-DATE', 'O', 'O', 'B-EVENT', 'I-EVENT', 'I-EVENT', 'L-EVENT', 'O', 'O', 'O', 'O', 'U-NORP', 'O', 'O', 'O', 'O', 'U-GPE', 'O', 'U-DATE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'U-NORP', 'O', 'O', 'U-DATE', 'O', 'O', 'O', 'O', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'I-DATE', 'L-DATE', 'O', 'U-GPE', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'L-ORG', 'O', 'O', 'B-ORG', 'L-ORG', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'B-ORG', 'I-ORG', 'L-ORG']