Labeling with both text and audio data

Hi @ninackjeong ,

Do I understand correctly that the layout you're after is something like this?

If that's the case, the easiest way (I think) to get there would be by defining an HTML template for the sentences and choices. This HTML block would then be combined with audio block in Prodigy blocks annotation interface.
You'd also need to define a couple of javascript functions to handle resetting and storing the checkbox information.

So assuming the input data has the following format:

{"audio":"/audios/cat_1.wav", "sentences":[{"id": "1","text":"This is sentence 1"},{"id": "2","text":"This is sentence 2"},{"id": "3","text":"This is sentence 3"}]}
{"audio":"/audios/cat_2.wav", "sentences":[{"id": "1","text":"This is sentence 1"},{"id": "2","text":"This is sentence 2"},{"id": "3","text":"This is sentence 3"}]}
{"audio":"/audios/cat_3.wav", "sentences":[{"id": "1","text":"This is sentence 1"},{"id": "2","text":"This is sentence 2"},{"id": "3","text":"This is sentence 3"}]}    

You could define the following recipe to programmatically build tasks consisting of the audio segment and the rendered jinja HTML template with options (note the options are harcoded in the recipe below, but if you have audio specific options you could define them in the input file and take it from there):

# the recipe audio_multiple_options.py
from pathlib import Path
from typing import Union

import jinja2
import prodigy
from prodigy import set_hashes
from prodigy.components.preprocess import \
    fetch_media as fetch_media_preprocessor
from prodigy.components.stream import get_stream
from prodigy.util import msg


def load_template(path: Union[str, Path]) -> jinja2.Template:
    if not isinstance(path, Path):
        path = Path(path)
    if not path.suffix == ".jinja2":
        msg.fail(
            "Must supply jinja2 file.",
            exits=1,
        )
    with path.open("r", encoding="utf8") as file_:
        text = file_.read()
    return jinja2.Template(text, undefined=jinja2.DebugUndefined)


@prodigy.recipe(
    "multiple.choice",
    dataset=("The dataset to use", "positional", None, str),
    source=("The source data as a JSONL file", "positional", None, str),
    fetch_media=("Fetch media data", "flag", "FM", bool),
)
def multiple_choice(
    dataset: str,
    source: str,
    fetch_media: bool = False,
):
    stream = get_stream(source, loader="jsonl", input_key="audio")
    if fetch_media:
        stream.apply(fetch_media_preprocessor, input_keys=["audio", "video"])
    labels = ["Label1", "Label2", "Label3"]
    template = load_template("template.jinja2")

    def add_template(stream, labels):
        for ex in stream:
            sentences = ex["sentences"]
            ex["html"] = template.render(sentences=sentences, labels=labels)
            yield set_hashes(ex)

    custom_js = Path("custom.js").read_text()

    def before_db(examples):
        for ex in examples:
            del ex["html"]
            if "audio" in ex and ex["audio"].startswith("data:") and "path" in ex:
                ex["audio"] = ex["path"]
        return examples

    return {
        "view_id": "blocks",
        "dataset": dataset,  # Name of dataset to save annotations
        "stream": add_template(stream, labels),  # Incoming stream of examples
        "config": {
            "blocks": [
                {"view_id": "audio"},
                {"view_id": "html"},
            ],
            "javascript": custom_js,
        },
        "before_db": before_db,
    }

Now the missing bits, the jinja template template.jinja2:

<form style="display: block;">
    {% for sentence in sentences %}
        <p>{{ sentence["text"] }}</p>
            {%- for label in labels -%}
                <input type="checkbox" class="checkbox" id="{{sentence["id"]+"_"+label}}" name="{{label}}" onchange="update()" style="margin: 0.4rem;"><label for="{{label}}">{{label}}</label><br>
            {%- endfor -%}
    {%- endfor -%}
</form>

Note, that we are building the id of the checkbox from the id of the sentence (defined in the input file) and the label. This is necessary for correct collection of the annotations via update javascript function (this js file is loaded by recipe as custom.js):

# custom.js
function reset(){
    var checkboxes = document.getElementsByClassName("checkbox");
    console.log(checkboxes)
    Array.from(checkboxes).forEach((checkbox) => {
        checkbox.checked = false;
    });
}

function update(){
    // store the selected options under the `selected_labels` key
    var checkboxes = document.getElementsByClassName("checkbox");

    var results = [];
    for(let elem in checkboxes){
        if(checkboxes[elem].checked){
            results.push(checkboxes[elem].id)
        }
    }
    prodigy.update({
        selected_labels: results
    })
}

document.addEventListener('prodigyanswer', event => {
  reset()
})

This UI should result in the following records in the DB

{
  "audio": "/Users/magdalenaaniol/Projects/support/multiple_options/cat_1.wav",
  "sentences": [
    {
      "id": "1",
      "text": "This is sentence 1"
    },
    {
      "id": "2",
      "text": "This is sentence 2"
    },
    {
      "id": "3",
      "text": "This is sentence 3"
    }
  ],
  "path": "/Users/magdalenaaniol/Projects/support/multiple_options/cat_1.wav",
  "_input_hash": 905128045,
  "_task_hash": 1354758391,
  "_view_id": "blocks",
  "audio_spans": [],
  "selected_labels": [
    "1_Label1",
    "2_Label2",
    "3_Label3"
  ],
  "answer": "accept",
  "_timestamp": 1715331984,
  "_annotator_id": "2024-05-10_11-06-15",
  "_session_id": "2024-05-10_11-06-15"
}

As you can see the annotations are stored under selected_labels key following the checkbox id, we've defined in the template so you should be able to retrieve all the information in postprocessing of this dataset.

The command to run the example would be:

python -m prodigy multiple.choice test input.jsonl -FM -F audio_multiple_options.py