I've said it before but I'm saying it again.
The new docs are sooooo good compared to the prior - I love it !!
However I've stumpled upon an error that I can't exactly determine why it occurs. I have my own model to score the stream
def score_stream(nlp, classifier, stream):
for task in stream:
span = task["spans"][
0
] # grab the single span from the list (since split_spans were applied prior)
features = np.array( # create features based on preceding and following tokens
[
np.concatenate(
[
nlp(task["text"][: span["start"]]).vector,
nlp(task["text"][span["end"] :]).vector,
]
)
]
)
score = classifier.predict_proba(features)[0][1] # score the task
task["meta"]["score"] = score
yield (score, task)
The recipe looks like this (calling a few custom stream modifiers)
@prodigy.recipe(
"alba.teach",
dataset=("Dataset to save annotations to", "positional", None, str),
stream=("Input stream", "positional", None, str),
)
def teach(dataset, stream):
log("RECIPE: Starting recipe alba.teach", locals())
nlp = get_nlp()
log("RECIPE: Loaded alba")
vectors_nlp = spacy.load("en_vectors_web_lg")
clf = joblib.load("xgboost.joblib")
stream = get_stream(stream, rehash=True, dedup=True, input_key="text",)
stream = add_financial_entities(nlp, stream)
stream = split_sentences(nlp, stream, min_length=300)
stream = split_spans(
stream,
[
Entity.Amount.label,
Entity.AmountRange.label,
Entity.AmountPercent.label,
Entity.AmountRangePercent.label,
],
)
stream = add_url_meta(stream)
stream = prefer_uncertain(score_stream(vectors_nlp, clf, stream))
return {
"view_id": "classification",
"dataset": dataset,
"stream": stream,
}
Having prefer_uncertain(score_stream(vectors_nlp, clf, stream))
causes the error
future: <Task finished name='Task-6' coro=<RequestResponseCycle.run_asgi() done, defined at /home/nixd/plx/alba/venv/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py:383> exception=ValueError([KeyError(<class 'numpy.float32'>), TypeError("'numpy.float32' object is not iterable"), TypeError('vars() argument must have __dict__ attribute')])>
Traceback (most recent call last):
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 388, in run_asgi
self.logger.error(msg, exc_info=exc)
File "/usr/local/lib/python3.8/logging/__init__.py", line 1463, in error
self._log(ERROR, msg, args, **kwargs)
File "/usr/local/lib/python3.8/logging/__init__.py", line 1577, in _log
self.handle(record)
File "/usr/local/lib/python3.8/logging/__init__.py", line 1586, in handle
if (not self.disabled) and self.filter(record):
File "/usr/local/lib/python3.8/logging/__init__.py", line 807, in filter
result = f.filter(record)
File "cython_src/prodigy/util.pyx", line 120, in prodigy.util.ServerErrorFilter.filter
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/uvicorn/protocols/http/httptools_impl.py", line 385, in run_asgi
result = await app(self.scope, self.receive, self.send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/uvicorn/middleware/proxy_headers.py", line 45, in __call__
return await self.app(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/applications.py", line 140, in __call__
await super().__call__(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/applications.py", line 134, in __call__
await self.error_middleware(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/middleware/errors.py", line 178, in __call__
raise exc from None
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/middleware/errors.py", line 156, in __call__
await self.app(scope, receive, _send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/middleware/cors.py", line 84, in __call__
await self.simple_response(scope, receive, send, request_headers=headers)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/middleware/cors.py", line 140, in simple_response
await self.app(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/exceptions.py", line 73, in __call__
raise exc from None
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/exceptions.py", line 62, in __call__
await self.app(scope, receive, sender)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/routing.py", line 590, in __call__
await route(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/routing.py", line 208, in __call__
await self.app(scope, receive, send)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/starlette/routing.py", line 41, in app
response = await func(request)
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/routing.py", line 134, in app
response_data = serialize_response(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/routing.py", line 73, in serialize_response
return jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 52, in jsonable_encoder
return jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 82, in jsonable_encoder
encoded_value = jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 96, in jsonable_encoder
jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 82, in jsonable_encoder
encoded_value = jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 82, in jsonable_encoder
encoded_value = jsonable_encoder(
File "/home/nixd/plx/alba/venv/lib/python3.8/site-packages/fastapi/encoders.py", line 125, in jsonable_encoder
raise ValueError(errors)
ValueError: [KeyError(<class 'numpy.float32'>), TypeError("'numpy.float32' object is not iterable"), TypeError('vars() argument must have __dict__ attribute')]