Hey I am attempting to combine image/image_manual with choice and have a few questions.
My ultimate goal with this recipe was to stream in images and make single selection. The "options" for choice are not listed in an existing jsonl file so I tried to add it through the recipe.
Here is the terminal recipe command
prodigy image2.manual 1KRandom -F /Downloads/image2_manual.py ./images --label Person --remove-base64
here is the full recipe. You can see where I defined the options thinking that I would make a selection and it would save. But I think I'm missing some logic here.
import prodigy
from prodigy.components.loaders import Images
from prodigy.util import split_string
from typing import List, Optional
@prodigy.recipe("image2.manual",
dataset=("The dataset to use", "positional", None, str),
source=("Path to a directory of images", "positional", None, str),
loader=("Loader if source is not directory of images", "option", "lo", str),
label=("One or more comma-separated labels", "option", "l", split_string),
exclude=("Names of datasets to exclude", "option", "e", split_string),
darken=("Darken image to make boxes stand out more", "flag", "D", bool),
remove_base64=("Remove base64-encoded image data before storing example in the DB. (Caution: if enabled, make sure to keep original files!)",
"flag", "R", bool))
def image2_manual(
dataset: str,
source: str,
loader: str = "jsonl",
label: Optional[List[str]] = None,
exclude: Optional[List[str]] = None,
darken: bool = False,
no_fetch=("Don't fetch images as base64", "flag", "NF", bool),
remove_base64: bool = False,
):
def get_stream():
stream = Images(source)
options = [
{"id": 0, "text": "1"},
{"id": 1, "text": "2"},
{"id": 2, "text": "3"},
{"id": 3, "text": "4"},
{"id": 4, "text": "5"},
{"id": 5, "text": "6"},
{"id": 6, "text": "7"},
{"id": 7, "text": "8"},
{"id": 8, "text": "9"},
{"id": 9, "text": "10+"},
]
for eg in stream:
eg["options"] = options
yield eg
blocks = [
{"view_id": "image_manual"},
{"view_id": "choice", "image": None, "text": None},
]
def before_db(examples):
# Remove all data URIs before storing example in the database
for eg in examples:
if eg["image"].startswith("data:"):
eg["image"] = eg.get("path")
return examples
return {
"view_id": "blocks",
"dataset": dataset,
"stream": get_stream(),
"before_db": before_db if remove_base64 else None,
"exclude": exclude,
"config": {
"blocks": blocks,
"label": ", ".join(label) if label is not None else "all",
"labels": label, # Selectable label options,
"darken_image": 0.2 if darken else 0,
"show_bounding_box_center": True,
"show_bounding_box_size": True,
"choice_style": "single",
"show_stats": True
},
}
However, after annotating 1,000 images, the output jsonl file still includes the base64 hash (I have a script to remove this) and the choices are ALL listed there. So not sure how to actually combine image_manual and choice to get it to only include the selection made during the session.
Here is an example output
{"image": "super long base64 hash",
"text": "mon276056",
"meta": { "file": "mon276056.jpg" },
"path": "images/mon276056.jpg",
"options": [
{ "id": 0, "text": "1" },
{ "id": 1, "text": "2" },
{ "id": 2, "text": "3" },
{ "id": 3, "text": "4" },
{ "id": 4, "text": "5" },
{ "id": 5, "text": "6" },
{ "id": 6, "text": "7" },
{ "id": 7, "text": "8" },
{ "id": 8, "text": "9" },
{ "id": 9, "text": "10+" }
],
"_input_hash": -393245504,
"_task_hash": 591501872,
"_view_id": "blocks",
"accept": [0],
"config": { "choice_style": "single" },
"width": 667,
"height": 1000,
"answer": "accept",
"_timestamp": 1659899815
}
Any guidance is appreciated.
thx