I am having trouble getting tasks into my queue using JSONL input and a custom recipe. This method was working until recently, so I am not sure what changed to cause the problem.
I have a JSONL file, something like:
{"image": "https://example.org/items/5c0dff34845f43369a4008653691f35c/preview/preview.jpg?s=640x"}
{"image": "https://example.org/items/5e8e79f8b0fd435388d536cee9b30f54/preview/preview.jpg?s=640x"}
...
And I have a custom recipe for image classification:
import prodigy
from prodigy.util import split_string
from prodigy.components.loaders import Images
from prodigy.components.loaders import JSONL, CSV
from prodigy.components.preprocess import fetch_images
def add_options(stream, options):
options = [{'id': option, 'text': option} for option in options]
for task in stream:
task['options'] = options
yield task
@prodigy.recipe('imagecat.manual',
dataset=("The dataset to use", "positional", None, str),
source=("Path to a JSONL file", "positional", None, str),
options=("One or more comma-separated options", "option", "o", split_string),
multiple=("Allow multiple choice", "flag", "M", bool),
exclude=("Names of datasets to exclude", "option", "e", split_string)
)
def imagecat_manual(dataset, source, options=None, multiple=False, exclude=None):
stream = JSONL(source)
stream = add_options(stream, options)
def on_exit(controller):
examples = controller.db.get_dataset(controller.session_id)
examples = [eg for eg in examples if eg['answer'] == 'accept']
for option in options:
count = get_count_by_option(examples, option)
print('Annotated {} {} examples'.format(count, option))
def get_count_by_option(examples, option):
filtered = [eg for eg in examples if option in eg['accept']]
return len(filtered)
return {
'view_id': 'choice', # Annotation interface to use
'dataset': dataset, # Name of dataset to save annotations
'stream': stream, # Incoming stream of examples
'options': options,
'exclude': exclude, # List of dataset names to exclude
'config': { # Additional config settings, mostly for app UI
'choice_style': 'multiple' if multiple else 'single',
'choice_auto_accept': False if multiple else True
},
'on_exit': on_exit # Called when Prodigy server is stopped
}
And I invoke the recipe like so:
prodigy imagecat.manual test ./test.jsonl -F ./scripts/imagecat_manual.py --option "landscape,not_landscape"
I open up my browser and see:
No tasks available.
The verbose logging indicates that when I made the request from my browser the service returned an empty tasks array:
✨ Starting the web server at http://0.0.0.0:8080 ...
Open the app in your browser and start annotating!
15:28:15 - GET: /project
{'choice_style': 'multiple', 'choice_auto_accept': True, 'dataset': 'landscape', 'recipe_name':
'imagecat.manual', 'port': 8080, 'host': '0.0.0.0', 'feed_overlap': False, 'global_css': '.pro
digy-title { font-size: 60px;}', 'view_id': 'choice', 'batch_size': 10, 'version': '1.8.3'}
15:28:15 - POST: /get_session_questions
15:28:15 - FEED: Finding next batch of questions in stream
15:28:15 - RESPONSE: /get_session_questions (0 examples)
{'tasks': [], 'total': 0, 'progress': None, 'session_id': 'landscape-default'}
I've been using Prodigy, and this recipe, for a while now without issue. So I must have changed something to cause the problem, but I can't see it. I'm hoping another set of eyes can help.