Hi @ines , thanks for your timely reply!
Given your code, I got two follow-up questions:
- If the HTML checkbox is checked in one task, it will also be checked in the next tasks, so users will have to manually click or unclick it every time. I tried to unset it using some simple javascript (see recipe below), but this does not work. Perhaps because the javascript is executed only once when the webapp is started up? What would be a suitable spot do unset the checkbox? EDIT: It seems the js-code is never executed at all, I tried adding an
alert("test");
as the first line, but the popup never shows up.
- The html's checkbox state is not stored in the results using the code you posted (see my full recipe below). The output is as follows:
{"gid":"allsides_3871_1604_25_Brown_64_69","text":"Asked whether the encounter would have unfolded the same way if Brown had been white, Wilson said yes.","html":"Asked whether the encounter would have unfolded the same way if <span style=\"background-color:#beaed4;font-weight:bold;text-decoration:underline\">Brown had been white, Wilson said yes.","target":"Brown","options":[{"id":"q1_1","text":"strongly negative attitude (dislike)"},{"id":"q1_2","text":"2"},{"id":"q1_3","text":"3"},{"id":"q1_4","text":"neutral"},{"id":"q1_5","text":"5"},{"id":"q1_6","text":"6"},{"id":"q1_7","text":"strongly positive attitude (like)"}],"_input_hash":659129653,"_task_hash":-1447545197,"_session_id":"bias1-fabienne","_view_id":"blocks","accept":["q1_5"],"remarks":"remarki","answer":"accept"}
This is the recipe:
import prodigy
from prodigy.components.db import connect
from prodigy.components.loaders import JSONL
@prodigy.recipe(
"newstsc",
dataset=prodigy.recipe_args["dataset"],
file_path=("Path to texts", "positional", None, str),
)
def sentiment(dataset, file_path):
"""Annotate the sentiment of texts."""
stream = get_stream_loop(file_path, dataset)
HTML_TEMPLATE = '<input type="checkbox" id="is_wcl_checkbox" /><label for="is_wcl_checkbox">Is bias by word choice and labeling regading highlighted subject (how it is described, not what)?</label>'
JAVASCRIPT = """document.querySelector('#is_wcl_checkbox').checked = false;
document.addEventListener('prodigymount', () => {
const checkbox = document.querySelector('#is_wcl_checkbox')
checkbox.addEventListener('change', event => {
window.prodigy.update({ checked: event.target.checked })
})
})
"""
blocks = [
{"view_id": "choice"},
{"view_id": "html", "html_template": HTML_TEMPLATE, "javascript": JAVASCRIPT},
{"view_id": "text_input", "field_id": "remarks", "field_label": "Remarks"},
]
return {
"dataset": dataset, # save annotations in this dataset
"stream": stream,
"view_id": "blocks", # use the choice interface
"config": {
"choice_auto_accept": False,
"feed_overlap": True, # Whether to send out each example once so itβs annotated by someone (false) or whether to send out each example to every session (true, default). Should be used with custom user sessions set via the app (via /?session=user_name).
"force_stream_order": True, # Always send out tasks in the same order and re-send them until theyβre answered, even if the app is refreshed in the browser
"instructions": "/prodigy/manual.html",
"blocks": blocks,
},
}
def get_stream_loop(file_path, dataset):
# to prevent that no tasks are shown even though there are still unlabeled tasks
# left, cf.
# https://support.prodi.gy/t/struggling-to-create-a-multiple-choice-image-classification/1345/2
db = connect()
while True:
stream = get_stream(file_path)
hashes_in_dataset = db.get_task_hashes(dataset)
yielded = False
for eg in stream:
# Only send out task if its hash isn't in the dataset yet, which should mean
# that we will not have duplicates
if eg["_task_hash"] not in hashes_in_dataset:
yield eg
yielded = True
if not yielded:
break
def get_stream(file_path):
stream = JSONL(file_path) # load in the JSONL file
stream = add_options(stream) # add options to each task
for eg in stream:
eg = prodigy.set_hashes(eg)
yield eg
def add_options(stream):
"""Helper function to add options to every task in a stream."""
options = [
{"id": "q1_1", "text": "strongly negative attitude (dislike)"},
{"id": "q1_2", "text": "2"},
{"id": "q1_3", "text": "3"},
{"id": "q1_4", "text": "neutral"},
{"id": "q1_5", "text": "5"},
{"id": "q1_6", "text": "6"},
{"id": "q1_7", "text": "strongly positive attitude (like)"},
]
for task in stream:
task["options"] = options
yield task
Thanks a lot for your help!
PS: I removed some parts of an earlier version of this post as I was able to fix the issue by one modification of my recipe, see this message's history, in case someone is interested - to fix the described issue, I added one line of code that I commented out earlier, which added the options to the stream.