Hi @ljvmiranda921
The code for training works well with the FUNSD data but when I annotate with prodigy with different dataset and transform the data for training, I am getting the erorr.
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_107850/4032920361.py in <module>
----> 1 trainer.train()
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1447 resume_from_checkpoint=resume_from_checkpoint,
1448 trial=trial,
-> 1449 ignore_keys_for_eval=ignore_keys_for_eval,
1450 )
1451
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1685 tr_loss_step = self.training_step(model, inputs)
1686 else:
-> 1687 tr_loss_step = self.training_step(model, inputs)
1688
1689 if (
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/trainer.py in training_step(self, model, inputs)
2369
2370 with self.compute_loss_context_manager():
-> 2371 loss = self.compute_loss(model, inputs)
2372
2373 if self.args.n_gpu > 1:
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2401 else:
2402 labels = None
-> 2403 outputs = model(**inputs)
2404 # Save past state if it exists
2405 # TODO: this needs to be fixed and made cleaner later.
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/models/layoutlmv3/modeling_layoutlmv3.py in forward(self, input_ids, bbox, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, pixel_values)
1034 output_hidden_states=output_hidden_states,
1035 return_dict=return_dict,
-> 1036 pixel_values=pixel_values,
1037 )
1038 if input_ids is not None:
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/models/layoutlmv3/modeling_layoutlmv3.py in forward(self, input_ids, bbox, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, pixel_values, output_attentions, output_hidden_states, return_dict)
845 pixel_values.shape[3] / self.config.patch_size
846 )
--> 847 visual_embeddings = self.forward_image(pixel_values)
848 visual_attention_mask = torch.ones(
849 (batch_size, visual_embeddings.shape[1]), dtype=torch.long, device=device
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/models/layoutlmv3/modeling_layoutlmv3.py in forward_image(self, pixel_values)
748
749 def forward_image(self, pixel_values):
--> 750 embeddings = self.patch_embed(pixel_values)
751
752 # add [CLS] token
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/layoutlmv3-conda/lib/python3.7/site-packages/transformers/models/layoutlmv3/modeling_layoutlmv3.py in forward(self, pixel_values, position_embedding)
141
142 def forward(self, pixel_values, position_embedding=None):
--> 143 embeddings = self.proj(pixel_values)
144
145 if position_embedding is not None:
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/conv.py in forward(self, input)
397
398 def forward(self, input: Tensor) -> Tensor:
--> 399 return self._conv_forward(input, self.weight, self.bias)
400
401 class Conv3d(_ConvNd):
/layoutlmv3-conda/lib/python3.7/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
394 _pair(0), self.dilation, self.groups)
395 return F.conv2d(input, weight, bias, self.stride,
--> 396 self.padding, self.dilation, self.groups)
397
398 def forward(self, input: Tensor) -> Tensor:
RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR
pytorch cuda version:
1.8.0+cu111
nvidia-smi:
Thu Jul 21 09:03:05 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01 Driver Version: 465.19.01 CUDA Version: 11.3 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA Tesla K80 On | 00000001:00:00.0 Off | 0 |
| N/A 38C P8 27W / 149W | 3MiB / 11441MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+