Problems when saving model with blank NER

I’ve built my own model using a gensim word2vec and succesfully loaded it using

 nlp = spacy.load('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

Now, when I try to load it again in the same way it throws an error:

     ---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
<ipython-input-3-12adfa70fede> in <module>()
  1 import spacy
----> 2 nlp = spacy.load('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')
  3 #nlp.add_pipe(nlp.create_pipe('ner'))
  4 #nlp.to_disk('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/__init__.py in load(name, **overrides)
 40     overrides['meta'] = meta
 41     overrides['path'] = model_path
---> 42     return cls(**overrides)

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/language.py in __init__(self, **overrides)
263 
264         self.vocab     = self.Defaults.create_vocab(self) \
--> 265                          if 'vocab' not in overrides \
266                          else overrides['vocab']
267         add_vectors    = self.Defaults.add_vectors(self) \

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/language.py in create_vocab(cls, nlp)
 40         else:
 41             vocab = Vocab.load(nlp.path, lex_attr_getters=cls.lex_attr_getters,
---> 42                              tag_map=cls.tag_map, lemmatizer=lemmatizer)
 43         for tag_str, exc in cls.morph_rules.items():
 44             for orth_str, attrs in exc.items():

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/vocab.pyx in spacy.vocab.Vocab.load (spacy/vocab.cpp:4974)()

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/vocab.pyx in 
spacy.vocab.Vocab.load_lexemes (spacy/vocab.cpp:9653)()

/Users/mos/anaconda3/lib/python3.5/site-packages/spacy/strings.pyx in 
spacy.strings.StringStore.__getitem__ (spacy/strings.cpp:2470)()

OverflowError: can't convert negative value to uint64_t

I have the code in Dropbox, which might be a bad idea in general. Might it be something resulting from that?

1 Like

Sorry, my bad. I used the wrong Conda environment.

But now I get the following error when I try to add “ner” to the pipeline to my model in order to create a new entity type:

nlp.add_pipe(nlp.create_pipe('ner'))
nlp.to_disk('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

The error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-6b8812b838f4> in <module>()
  2 nlp = spacy.load('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')
  3 nlp.add_pipe(nlp.create_pipe('ner'))
----> 4 nlp.to_disk('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/language.py in to_disk(self, 
 path, disable)
619             serializers[name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
620         serializers['vocab'] = lambda p: self.vocab.to_disk(p)
--> 621         util.to_disk(path, serializers, {p: False for p in disable})
622 
623     def from_disk(self, path, disable=tuple()):

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in to_disk(path, writers, exclude)
501     for key, writer in writers.items():
502         if key not in exclude:

–> 503 writer(path / key)
504 return path
505

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/language.py in <lambda>(p, proc)
617             if not hasattr(proc, 'to_disk'):
618                 continue
--> 619             serializers[name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
620         serializers['vocab'] = lambda p: self.vocab.to_disk(p)
621         util.to_disk(path, serializers, {p: False for p in disable})

 nn_parser.pyx in spacy.syntax.nn_parser.Parser.to_disk()

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in to_disk(path, writers, exclude)
501     for key, writer in writers.items():
502         if key not in exclude:
--> 503             writer(path / key)
504     return path
505 

nn_parser.pyx in spacy.syntax.nn_parser.Parser.to_disk.lambda3()

TypeError: 'bool' object is not subscriptable

Ah, sorry about that, I think this might be related to a quirk in the current version of spaCy (which will be fixed in the upcoming v2.1.0). Basically, the weights of the blank NER component aren’t initialised by default, which causes the error when you try to save them to a directory. Could you try the following?

nlp.add_pipe(nlp.create_pipe('ner'))
nlp.begin_training()
nlp.to_disk('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

I added the line and ran it again. Now it throws an error already on the first line where the model is loaded:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-7-0cc05d7a66ed> in <module>()
  1 import spacy
----> 2 nlp = spacy.load('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')
  3 nlp.add_pipe(nlp.create_pipe('ner'))
  4 nlp.begin_training()
  5 nlp.to_disk('/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M')

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/__init__.py in load(name, **overrides)
 13     if depr_path not in (True, False, None):
 14         deprecation_warning(Warnings.W001.format(path=depr_path))
 ---> 15     return util.load_model(name, **overrides)
 16 
 17 

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in load_model(name, **overrides)
114             return load_model_from_package(name, **overrides)
115         if Path(name).exists():  # path to model data directory
 --> 116             return load_model_from_path(Path(name), **overrides)
117     elif hasattr(name, 'exists'):  # Path or Path-like to model data
118         return load_model_from_path(name, **overrides)

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in load_model_from_path(model_path, meta, **overrides)
154             component = nlp.create_pipe(name, config=config)
155             nlp.add_pipe(component, name=name)
 --> 156     return nlp.from_disk(model_path)
157 
158 

 ~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/language.py in from_disk(self, path, disable)
651         if not (path / 'vocab').exists():
652             exclude['vocab'] = True
 --> 653         util.from_disk(path, deserializers, exclude)
654         self._path = path
655         return self

    ~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in from_disk(path, 
 readers, exclude)
  509     for key, reader in readers.items():
  510         if key not in exclude:
   --> 511             reader(path / key)
   512     return path
513 

~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/language.py in <lambda>(p, proc)
647             if not hasattr(proc, 'to_disk'):
648                 continue
--> 649             deserializers[name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
650         exclude = {p: False for p in disable}
651         if not (path / 'vocab').exists():

 nn_parser.pyx in spacy.syntax.nn_parser.Parser.from_disk()

 ~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in from_disk(path, readers, exclude)
509     for key, reader in readers.items():
510         if key not in exclude:
--> 511             reader(path / key)
512     return path
513 

 nn_parser.pyx in spacy.syntax.nn_parser.Parser.from_disk.lambda11()

 ~/anaconda3/envs/memeticscience/lib/python3.5/site-packages/spacy/util.py in read_json(location)
462     """
463     location = ensure_path(location)
 --> 464     with location.open('r', encoding='utf8') as f:
465         return ujson.load(f)
466 

 ~/anaconda3/envs/memeticscience/lib/python3.5/pathlib.py in open(self, mode, buffering, encoding, errors, newline)
 1149             self._raise_closed()
 1150         return io.open(str(self), mode, buffering, encoding, errors, newline,
 -> 1151                        opener=self._opener)
 1152 
 1153     def read_bytes(self):

 ~/anaconda3/envs/memeticscience/lib/python3.5/pathlib.py in _opener(self, name, flags, mode)
 1003     def _opener(self, name, flags, mode=0o666):
 1004         # A stub for the opener argument to built-in open()
 -> 1005         return self._accessor.open(self, flags, mode)
 1006 
 1007     def _raw_open(self, flags, mode=0o777):

 ~/anaconda3/envs/memeticscience/lib/python3.5/pathlib.py in wrapped(pathobj, *args)
 369         @functools.wraps(strfunc)
370         def wrapped(pathobj, *args):
  --> 371             return strfunc(str(pathobj), *args)
372         return staticmethod(wrapped)
373 

FileNotFoundError: [Errno 2] No such file or directory: 
 '/Users/mos/Dropbox/spacy/build_swedish_spacy_model/w2v_model_1M/ner/cfg'

What does that tell you? Is that a file/folder I should have created earlier in the process?

That’s weird – I can’t reproduce this :thinking: This might be a stupid suggestion, but could you just try removing the w2v_model_1M directory or saving to a different directory? Maybe there’s stale state somewhere that causes the model to not be overwritten correctly.

Actually, your suggestion was what was needed. I tried loading another model I had not tampered with and ner.teach worked all the way through to the server starting and me being able to correct annotations. I’ll init a new model and run add_pipe(nlp.create_pipe('ner')) plus call begin_training() straight away on it and everything will probably be fine and dandy.

2 Likes

Thanks for updating – that’s interesting and good to know! And glad to hear it all worked in the end :+1:

1 Like