Prodigy error "No Task Available" specific error "ERROR: Can't fetch tasks. Make sure the server is running correctly.
Can someone please help me in figuring this out this is a big data set but I was only able to get to 284, before I got this error message.
This is the code being run
"# Prodigy using jsonl data that was converted from json in convert_data.ipynb
!python -m prodigy ner.manual test_data_5K blank:en ./test_data_5K.jsonl --label PER,ORG,MISC,LOC"
This is the output error messages
"Using 4 label(s): PER, ORG, MISC, LOC
Starting the web server at http://0.0.0.0:8088 ...
Open the app in your browser and start annotating!
Task exception was never retrieved
future: <Task finished name='Task-11' coro=<RequestResponseCycle.run_asgi() done, defined at /usr/local/anaconda3/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py:388> exception=ValueError("Mismatched tokenization. Can't resolve span to token index 1. This can happen if your data contains pre-set spans. Make sure that the spans match spaCy's tokenization or add a 'tokens' property to your task.\n\n{'start': 1, 'end': 4, 'label': 'ORG'}")>
Traceback (most recent call last):
File "/usr/local/anaconda3/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 393, in run_asgi
self.logger.error(msg, exc_info=exc)
File "/usr/local/anaconda3/lib/python3.8/logging/init.py", line 1463, in error
self._log(ERROR, msg, args, **kwargs)
File "/usr/local/anaconda3/lib/python3.8/logging/init.py", line 1577, in _log
self.handle(record)
File "/usr/local/anaconda3/lib/python3.8/logging/init.py", line 1586, in handle
if (not self.disabled) and self.filter(record):
File "/usr/local/anaconda3/lib/python3.8/logging/init.py", line 807, in filter
result = f.filter(record)
File "cython_src/prodigy/util.pyx", line 121, in prodigy.util.ServerErrorFilter.filter
File "/usr/local/anaconda3/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 390, in run_asgi
result = await app(self.scope, self.receive, self.send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in call
return await self.app(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/fastapi/applications.py", line 140, in call
await super().call(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/applications.py", line 134, in call
await self.error_middleware(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/errors.py", line 178, in call
raise exc from None
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/errors.py", line 156, in call
await self.app(scope, receive, _send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/cors.py", line 84, in call
await self.simple_response(scope, receive, send, request_headers=headers)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/cors.py", line 140, in simple_response
await self.app(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/base.py", line 25, in call
response = await self.dispatch_func(request, self.call_next)
File "/usr/local/anaconda3/lib/python3.8/site-packages/prodigy/app.py", line 198, in reset_db_middleware
response = await call_next(request)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/base.py", line 45, in call_next
task.result()
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/middleware/base.py", line 38, in coro
await self.app(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/exceptions.py", line 73, in call
raise exc from None
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/exceptions.py", line 62, in call
await self.app(scope, receive, sender)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/routing.py", line 590, in call
await route(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/routing.py", line 208, in call
await self.app(scope, receive, send)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/routing.py", line 41, in app
response = await func(request)
File "/usr/local/anaconda3/lib/python3.8/site-packages/fastapi/routing.py", line 129, in app
raw_response = await run_in_threadpool(dependant.call, **values)
File "/usr/local/anaconda3/lib/python3.8/site-packages/starlette/concurrency.py", line 25, in run_in_threadpool
return await loop.run_in_executor(None, func, *args)
File "/usr/local/anaconda3/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/anaconda3/lib/python3.8/site-packages/prodigy/app.py", line 420, in get_session_questions
return _shared_get_questions(req.session_id, excludes=req.excludes)
File "/usr/local/anaconda3/lib/python3.8/site-packages/prodigy/app.py", line 391, in _shared_get_questions
tasks = controller.get_questions(session_id=session_id, excludes=excludes)
File "cython_src/prodigy/core.pyx", line 223, in prodigy.core.Controller.get_questions
File "cython_src/prodigy/core.pyx", line 227, in prodigy.core.Controller.get_questions
File "cython_src/prodigy/components/feeds.pyx", line 99, in prodigy.components.feeds.SharedFeed.get_questions
File "cython_src/prodigy/components/feeds.pyx", line 106, in prodigy.components.feeds.SharedFeed.get_next_batch
File "cython_src/prodigy/components/feeds.pyx", line 245, in prodigy.components.feeds.RepeatingFeed.get_session_stream
File "/usr/local/anaconda3/lib/python3.8/site-packages/toolz/itertoolz.py", line 376, in first
return next(iter(seq))
File "cython_src/prodigy/components/preprocess.pyx", line 130, in add_tokens
File "cython_src/prodigy/components/preprocess.pyx", line 222, in prodigy.components.preprocess._add_tokens
File "cython_src/prodigy/components/preprocess.pyx", line 199, in prodigy.components.preprocess.sync_spans_to_tokens
ValueError: Mismatched tokenization. Can't resolve span to token index 1. This can happen if your data contains pre-set spans. Make sure that the spans match spaCy's tokenization or add a 'tokens' property to your task.
{'start': 1, 'end': 4, 'label': 'ORG'}"