I'm using ner.manual
to fix some automatic annotations and have noticed that sometimes prodigy will highlight the wrong token. It appears that this is happening possibly because it's tokenizing the sentence, even when the tokens are already provided. It's not a major hassle but I'm curious if there's anything I'm doing to cause this bug?
{
"text": "we have been living in italy for the past few years , where mexican food is non-existent , so we were looking forward to eating here . ",
"meta": {
"food": "non-existent"
},
"tokens": [
{
"text": "we",
"start": 0,
"end": 2,
"id": 0
},
{
"text": "have",
"start": 3,
"end": 7,
"id": 1
},
{
"text": "been",
"start": 8,
"end": 12,
"id": 2
},
{
"text": "living",
"start": 13,
"end": 19,
"id": 3
},
{
"text": "in",
"start": 20,
"end": 22,
"id": 4
},
{
"text": "italy",
"start": 23,
"end": 28,
"id": 5
},
{
"text": "for",
"start": 29,
"end": 32,
"id": 6
},
{
"text": "the",
"start": 33,
"end": 36,
"id": 7
},
{
"text": "past",
"start": 37,
"end": 41,
"id": 8
},
{
"text": "few",
"start": 42,
"end": 45,
"id": 9
},
{
"text": "years",
"start": 46,
"end": 51,
"id": 10
},
{
"text": ",",
"start": 52,
"end": 53,
"id": 11
},
{
"text": "where",
"start": 54,
"end": 59,
"id": 12
},
{
"text": "mexican",
"start": 60,
"end": 67,
"id": 13
},
{
"text": "food",
"start": 68,
"end": 72,
"id": 14
},
{
"text": "is",
"start": 73,
"end": 75,
"id": 15
},
{
"text": "non-existent",
"start": 76,
"end": 88,
"id": 16
},
{
"text": ",",
"start": 89,
"end": 90,
"id": 17
},
{
"text": "so",
"start": 91,
"end": 93,
"id": 18
},
{
"text": "we",
"start": 94,
"end": 96,
"id": 19
},
{
"text": "were",
"start": 97,
"end": 101,
"id": 20
},
{
"text": "looking",
"start": 102,
"end": 109,
"id": 21
},
{
"text": "forward",
"start": 110,
"end": 117,
"id": 22
},
{
"text": "to",
"start": 118,
"end": 120,
"id": 23
},
{
"text": "eating",
"start": 121,
"end": 127,
"id": 24
},
{
"text": "here",
"start": 128,
"end": 132,
"id": 25
},
{
"text": ".",
"start": 133,
"end": 134,
"id": 26
}
],
"spans": [
{
"start": 76,
"end": 88,
"token_start": 16,
"token_end": 16,
"label": "food"
}
]
}