Hello,
I'm trying to build an pdf annotation tool page by page and I want to add some html
to show the progression. I save on the input stream the number of the page and some information. I want to access the data inside the stream but it does't work.
Here is the code:
@prodigy.recipe("classify-images.manual")
def classify_images_manual(dataset, source):
def get_stream():
files = os.listdir(source)
for f in files:
# Load each PDF and save images
pages = convert_from_path(f"{source}/{f}")
for i, p in enumerate(pages):
file_name = f.split(".")[0]
image_file_name = f"{os.getcwd()}/images/{file_name}_{i}.jpg"
p.save(image_file_name, "JPEG")
im = {"image": image_file_name}
im["options"] = OPTIONS
im["file_name"] = file_name
im["page"] = i + 1
im["total"] = len(pages)
yield im
return {
"dataset": dataset,
"stream": fetch_images(get_stream()),
"view_id": "blocks",
"config": {
"blocks": [
{
"view_id": "html",
"html": "<h1>File {{file_name}} ({{page}}/{{total}})</h1>",
},
{"view_id": "choice", "choice_style": "single",}, # or "multiple"
]
},
}
I don't know if it has any relation with the fetch_images
function.