mismatched structure when using tranformers model to train textcat (en_core_web_trf)

Hello,

Experiencing the same issue with spacy 3.5.1 and transformers. Training config is generated by prodigy 1.11.11. Getting the error when either try to use this model with Prodigy with ner.correct or just load it into spacy with spacy.load(). Trying on the same machine as trainig.
Python version 3.7.12 it was trained in.
Also tried to load the model in Python 3.10.10.

freeze:

blis==0.7.9
catalogue==2.0.8
certifi==2022.12.7
charset-normalizer==3.1.0
click==8.1.3
confection==0.0.4
cupy-cuda113==10.6.0
cupy-cuda116==10.6.0
cymem==2.0.7
fastrlock==0.8.1
filelock==3.9.0
fr-core-news-sm @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.5.0/fr_core_news_sm-3.5.0-py3-none-any.whl
fr-dep-news-trf @ https://github.com/explosion/spacy-models/releases/download/fr_dep_news_trf-3.5.0/fr_dep_news_trf-3.5.0-py3-none-any.whl
huggingface-hub==0.13.1
idna==3.4
importlib-metadata==6.0.0
Jinja2==3.1.2
langcodes==3.3.0
MarkupSafe==2.1.2
murmurhash==1.0.9
numpy==1.21.6
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
packaging==23.0
pathy==0.10.1
preshed==3.0.8
protobuf==3.20.3
pydantic==1.10.6
PyYAML==6.0
regex==2022.10.31
requests==2.28.2
sentencepiece==0.1.97
smart-open==6.3.0
spacy==3.5.1
spacy-alignments==0.9.0
spacy-legacy==3.0.12
spacy-loggers==1.0.4
spacy-transformers==1.2.2
srsly==2.4.6
thinc==8.1.9
tokenizers==0.13.2
torch==1.13.1
tqdm==4.65.0
transformers==4.26.1
typer==0.7.0
typing_extensions==4.4.0
urllib3==1.26.15
wasabi==1.1.1
zipp==3.15.0

config:

[paths]
train = null
dev = null
vectors = null
init_tok2vec = null

[system]
gpu_allocator = null
seed = 0

[nlp]
lang = "fr"
pipeline = ["tok2vec","transformer","morphologizer","parser","attribute_ruler","lemmatizer","ner"]
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
batch_size = 64
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}

[components]

[components.attribute_ruler]
source = "fr_dep_news_trf"

[components.lemmatizer]
source = "fr_dep_news_trf"

[components.morphologizer]
source = "fr_dep_news_trf"
replace_listeners = ["model.tok2vec"]

[components.ner]
factory = "ner"
incorrect_spans_key = "incorrect_spans"
moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100

[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null

[components.ner.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
width = ${components.tok2vec.model.encode.width}
upstream = "*"

[components.parser]
source = "fr_dep_news_trf"
replace_listeners = ["model.tok2vec"]

[components.tok2vec]
factory = "tok2vec"

[components.tok2vec.model]
@architectures = "spacy.Tok2Vec.v2"

[components.tok2vec.model.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = ${components.tok2vec.model.encode.width}
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
rows = [5000,1000,2500,2500]
include_static_vectors = false

[components.tok2vec.model.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = 96
depth = 4
window_size = 1
maxout_pieces = 3

[components.transformer]
source = "fr_dep_news_trf"

[corpora]

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[training]
train_corpus = "corpora.train"
dev_corpus = "corpora.dev"
seed = ${system:seed}
gpu_allocator = ${system:gpu_allocator}
dropout = 0.1
accumulate_gradient = 3
patience = 5000
max_epochs = 0
max_steps = 20000
eval_frequency = 1000
frozen_components = ["morphologizer","parser","attribute_ruler","lemmatizer"]
before_to_disk = null
annotating_components = []
before_update = null

[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
discard_oversize = true
get_length = null
size = 2000
buffer = 256

[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = false

[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = true
eps = 0.00000001

[training.optimizer.learn_rate]
@schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 20000
initial_rate = 0.00005

[training.score_weights]
pos_acc = null
morph_acc = null
morph_per_feat = null
dep_uas = null
dep_las = null
dep_las_per_type = null
sents_p = null
sents_r = null
sents_f = null
lemma_acc = null
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null
speed = 0.0

[pretraining]

[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.components.morphologizer]

[initialize.components.morphologizer.labels]
@readers = "spacy.read_labels.v1"
path = "spacy_training/labels/morphologizer.json"
require = false

[initialize.components.ner]

[initialize.components.ner.labels]
@readers = "spacy.read_labels.v1"
path = "spacy_training/labels/ner.json"
require = false

[initialize.components.parser]

[initialize.components.parser.labels]
@readers = "spacy.read_labels.v1"
path = "spacy_training/labels/parser.json"
require = false

[initialize.tokenizer]

error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-ddc3bbe33688> in <module>
----> 1 nlp = spacy.load('./')

~/trainings/env/lib/python3.7/site-packages/spacy/__init__.py in load(name, vocab, disable, enable, exclude, config)
     58         enable=enable,
     59         exclude=exclude,
---> 60         config=config,
     61     )
     62 

~/trainings/env/lib/python3.7/site-packages/spacy/util.py in load_model(name, vocab, disable, enable, exclude, config)
    442             return load_model_from_package(name, **kwargs)  # type: ignore[arg-type]
    443         if Path(name).exists():  # path to model data directory
--> 444             return load_model_from_path(Path(name), **kwargs)  # type: ignore[arg-type]
    445     elif hasattr(name, "exists"):  # Path or Path-like to model data
    446         return load_model_from_path(name, **kwargs)  # type: ignore[arg-type]

~/trainings/env/lib/python3.7/site-packages/spacy/util.py in load_model_from_path(model_path, meta, vocab, disable, enable, exclude, config)
    522         meta=meta,
    523     )
--> 524     return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)
    525 
    526 

~/trainings/env/lib/python3.7/site-packages/spacy/language.py in from_disk(self, path, exclude, overrides)
   2123             # Convert to list here in case exclude is (default) tuple
   2124             exclude = list(exclude) + ["vocab"]
-> 2125         util.from_disk(path, deserializers, exclude)  # type: ignore[arg-type]
   2126         self._path = path  # type: ignore[assignment]
   2127         self._link_components()

~/trainings/env/lib/python3.7/site-packages/spacy/util.py in from_disk(path, readers, exclude)
   1367         # Split to support file names like meta.json
   1368         if key.split(".")[0] not in exclude:
-> 1369             reader(path / key)
   1370     return path
   1371 

~/trainings/env/lib/python3.7/site-packages/spacy/language.py in <lambda>(p, proc)
   2118                 continue
   2119             deserializers[name] = lambda p, proc=proc: proc.from_disk(  # type: ignore[misc]
-> 2120                 p, exclude=["vocab"]
   2121             )
   2122         if not (path / "vocab").exists() and "vocab" not in exclude:  # type: ignore[operator]

~/trainings/env/lib/python3.7/site-packages/spacy/pipeline/transition_parser.pyx in spacy.pipeline.transition_parser.Parser.from_disk()

~/trainings/env/lib/python3.7/site-packages/thinc/model.py in from_bytes(self, bytes_data)
    617         msg = srsly.msgpack_loads(bytes_data)
    618         msg = convert_recursive(is_xp_array, self.ops.asarray, msg)
--> 619         return self.from_dict(msg)
    620 
    621     def from_disk(self, path: Union[Path, str]) -> "Model":

~/trainings/env/lib/python3.7/site-packages/thinc/model.py in from_dict(self, msg)
    634         nodes = list(self.walk())
    635         if len(msg["nodes"]) != len(nodes):
--> 636             raise ValueError("Cannot deserialize model: mismatched structure")
    637         for i, node in enumerate(nodes):
    638             info = msg["nodes"][i]

ValueError: Cannot deserialize model: mismatched structure