Below image is the result of using what you recommended above, we get empty examples
Below python script is our custom recipe
import csv
import logging
import prodigy
from tools.config import PAGE_CROPPING_BUCKET, page_cropping_s3_client
# s3 cofig
s3_client = page_cropping_s3_client
bucket_name = PAGE_CROPPING_BUCKET
# log config
logging.basicConfig(
filename="/usr/local/prodigy/logs/bdrc_crop_images.log",
format="%(levelname)s: %(message)s",
level=logging.INFO,
)
# Prodigy has a logger named "prodigy" according to
# https://support.prodi.gy/t/how-to-write-log-data-to-file/1427/10
prodigy_logger = logging.getLogger('prodigy')
prodigy_logger.setLevel(logging.INFO)
@prodigy.recipe("bdrc-crop-images-recipe")
def bdrc_crop_images_recipe(dataset, csv_file):
logging.info(f"dataset:{dataset}, csv_file_path:{csv_file}")
obj_keys = []
with open(csv_file) as _file:
for csv_line in list(csv.reader(_file, delimiter=",")):
s3_key = csv_line[0]
# TODO: filter non-image files
obj_keys.append(s3_key)
return {
"dataset": dataset,
"stream": stream_from_s3(obj_keys),
"view_id": "image_manual",
"config": {
"labels": ["PAGE"]
}
}
def stream_from_s3(obj_keys):
for obj_key in obj_keys:
image_url = s3_client.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": bucket_name, "Key": obj_key},
ExpiresIn=31536000
)
image_id = (obj_key.split("/"))[-1]
yield {"id": image_id, "image": image_url}
below is our configuration.json file
{
"theme": "basic",
"custom_theme": { "cardMaxWidth": 2000 },
"buttons": ["accept", "reject", "ignore", "undo"],
"batch_size": 10,
"history_size": 10,
"port": 8090,
"host": "localhost",
"cors": true,
"db": "sqlite",
"db_settings": {
"sqlite": {
"name": "bdrc_crop_images.sqlite",
"path": "/usr/local/prodigy"
}
},
"validate": true,
"auto_exclude_current": true,
"instant_submit": true,
"feed_overlap": false,
"auto_count_stream": false,
"total_examples_target": 0,
"ui_lang": "en",
"project_info": ["dataset", "session", "lang", "recipe_name", "view_id", "label"],
"show_stats": false,
"hide_meta": false,
"show_flag": false,
"instructions": false,
"swipe": false,
"swipe_gestures": { "left": "accept", "right": "reject" },
"split_sents_threshold": false,
"html_template": false,
"global_css": null,
"javascript": null,
"writing_dir": "ltr",
"show_whitespace": false,
"exclude_by": "task"
}
below is the .service which is at /etc/systemd/system/prodigy_bdrc_crop_images.service
[Unit]
Description=Prodigy for images
After=syslog.target network.target
[Service]
Type=simple
SyslogIdentifier=prodigy_img
Environment=PRODIGY_HOME="/usr/local/prodigy"
Environment=PRODIGY_LOGGING=verbose
Environment=PRODIGY_CONFIG="/usr/local/prodigy/prodigy-tools/configuration/bdrc_crop_images.json"
WorkingDirectory=/usr/local/prodigy
ExecStart=/usr/bin/python3.9 -m prodigy bdrc-crop-images-recipe bdrc_crop '/usr/local/prodigy/prodigy-tools/data/page_cropping.csv' -F /usr/local/prodigy/prodigy-tools/recipes/bdrc_crop_images.py
User=prodigy
Group=prodigy
UMask=0007
RestartSec=10
Restart=always
[Install]
WantedBy=multi-user.target