Custom recipe for `fast.ai` integration in Prodigy for labels correction

Hi Support Team,

I'm trying to integrate fast.ai model for predicting the default labels for the input image. I have tried running the code from python script and it works perfectly. But when I try to run it inside the Prodigy's recipe function, it returns the exception.

The recipe code is as following:

__all__ = ['l', 'splitter', 'get_x', 'get_y', 'b64_uri_to_bytes', 'tool_labels_correct']

import prodigy
from prodigy.components.preprocess import fetch_media
from prodigy.components.loaders import JSONL
from fastai.vision.all import *
import base64
import io

def splitter(df): pass
    
def get_x(r): pass

def get_y(r): pass

def b64_uri_to_bytes(data_uri):
    """Convert a base64-encoded data URI to bytes."""
    data = data_uri.split("base64,", 1)[1]
    return base64.decodebytes(bytes(data, "ascii"))

l = load_learner(Path('models/teacher_convnext.pkl'), cpu=True)

@prodigy.recipe(("tool_labels.correct"))
def tool_labels_correct(dataset, images_file):     

    def predict(image):
        im_bytes = b64_uri_to_bytes(image)        
        im = PILImage.create(io.BytesIO(im_bytes))
        _, _, pred = l.get_preds(dl=l.dls.test_dl([im]), with_input=False,with_decoded=True)
        labels = str(list(l.dls.vocab[np.where(pred==True)[1]])).replace('[','').replace(']','').replace("'","").replace('"','').replace(' ','')
        return labels
    
    blocks = [{"view_id":"image"},{"view_id":"text_input","field_id":"labels"}]
    
    def get_stream():
        stream=JSONL(images_file)
        stream=fetch_media(stream, ["image"], skip=True)
        for eg in stream:
            labels=predict(eg["image"])
            eg["labels"]=labels
            yield eg
            
    return {
        "dataset": dataset,
        "view_id": "blocks",
        "stream": get_stream(),
        "config": {
            "blocks": blocks
        }
    }

Following is the error message:

I have the get_x function defined inside the file but it is not getting accessible when I run the code.

Any suggestion on how to fix this issue?

Thanks in advance

Kind regards,
Bilal

When I moved all the functions inside the recipe function, I got the following error which says the same thing:

Traceback (most recent call last):
  File "/home/big-deal/mambaforge/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/big-deal/mambaforge/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/prodigy/__main__.py", line 61, in <module>
    controller = recipe(*args, use_plac=True)
  File "cython_src/prodigy/core.pyx", line 364, in prodigy.core.recipe.recipe_decorator.recipe_proxy
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/plac_core.py", line 367, in call
    cmd, result = parser.consume(arglist)
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/plac_core.py", line 232, in consume
    return cmd, self.func(*(args + varargs + extraopts), **kwargs)
  File "/home/big-deal/mlworks/surgical-ds/surg-tmi/02a_fix_labels_prodigy.py", line 29, in tool_labels_correct
    l = load_learner(Path('models/teacher_convnext.pkl'), cpu=True)
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/fastai/learner.py", line 438, in load_learner
    try: res = torch.load(fname, map_location=map_loc, pickle_module=pickle_module)
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/torch/serialization.py", line 789, in load
    return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/torch/serialization.py", line 1131, in _load
    result = unpickler.load()
  File "/home/big-deal/mambaforge/lib/python3.10/site-packages/torch/serialization.py", line 1124, in find_class
    return super().find_class(mod_name, name)
AttributeError: Custom classes or functions exported with your `Learner` not available in namespace.\Re-declare/import before loading:
	Can't get attribute 'get_x' on <module 'prodigy.__main__' from '/home/big-deal/mambaforge/lib/python3.10/site-packages/prodigy/__main__.py'>

Strange. I'm not very familiar with the fast.ai implementation but I suppose a "quickfix": can you attach the label from a Jupyter notebook instead? If you generate a examples.json upfront, there's no need to worry about what happens inside Prodigy.