Spaces:
Runtime error
Runtime error
update app
Browse files- .gitignore +1 -0
- app.py +125 -25
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
app_ref.py
|
app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import random
|
| 3 |
from datasets import load_dataset
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
# # Sample dataset with unique 10-digit IDs
|
| 7 |
# qa_dataset = {
|
| 8 |
# "1234567890": {
|
|
@@ -18,20 +21,52 @@ from datasets import load_dataset
|
|
| 18 |
# # Add more questions with unique IDs as needed
|
| 19 |
# }
|
| 20 |
|
| 21 |
-
truth_data = load_dataset("commonsense-index-dev/commonsense-candidates", "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
qa_dataset = {}
|
| 24 |
for item in truth_data:
|
| 25 |
qa_dataset[item["id"]] = {
|
| 26 |
"question": item["task"],
|
| 27 |
"choices": item["choices"],
|
| 28 |
-
"answer": item["answer"]
|
| 29 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
def get_random_question():
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
question_data = qa_dataset[question_id]
|
| 34 |
-
|
|
|
|
| 35 |
|
| 36 |
def get_question_by_id(question_id):
|
| 37 |
if question_id in qa_dataset:
|
|
@@ -40,44 +75,109 @@ def get_question_by_id(question_id):
|
|
| 40 |
else:
|
| 41 |
return None, "Invalid question ID", []
|
| 42 |
|
| 43 |
-
def check_answer(question_id, choice):
|
| 44 |
correct_answer = qa_dataset[question_id]["answer"]
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
question_id, question, choices = get_question_by_id(question_id)
|
| 50 |
else:
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
choices_markdown = "\n".join(choices)
|
| 55 |
-
return question_id, question, choices_markdown,
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def show_buttons(choices_markdown):
|
| 58 |
choices = choices_markdown.split("\n")
|
| 59 |
visibility = [gr.update(visible=False)] * 10
|
| 60 |
for i in range(len(choices)):
|
|
|
|
|
|
|
| 61 |
visibility[i] = gr.update(visible=True, value=choices[i])
|
|
|
|
| 62 |
return visibility
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
with gr.Blocks() as app:
|
| 65 |
-
gr.Markdown("#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
question_id_input = gr.Textbox(label="Enter Question ID", placeholder="leave empty for random sampling")
|
| 68 |
-
random_button = gr.Button("Retrieve or Random Sample")
|
| 69 |
question_display = gr.Markdown(visible=True)
|
| 70 |
choices_markdown = gr.Markdown(visible=False)
|
| 71 |
choice_buttons = [gr.Button(visible=False) for _ in range(10)]
|
| 72 |
result_display = gr.Markdown(visible=True)
|
|
|
|
| 73 |
|
| 74 |
-
question_id = gr.
|
| 75 |
|
| 76 |
-
question_id_input.submit(fn=load_question, inputs=question_id_input, outputs=[question_id, question_display, choices_markdown, result_display])
|
| 77 |
-
random_button.click(fn=load_question, outputs=[question_id, question_display, choices_markdown, result_display])
|
| 78 |
-
choices_markdown.change(fn=show_buttons, inputs=choices_markdown, outputs=choice_buttons)
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
for i, button in enumerate(choice_buttons):
|
| 81 |
-
button.click(fn=check_answer, inputs=[question_id, button], outputs=result_display)
|
| 82 |
|
| 83 |
app.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import random
|
| 3 |
from datasets import load_dataset
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import uuid
|
| 7 |
+
from huggingface_hub import HfApi
|
| 8 |
+
import time
|
| 9 |
# # Sample dataset with unique 10-digit IDs
|
| 10 |
# qa_dataset = {
|
| 11 |
# "1234567890": {
|
|
|
|
| 21 |
# # Add more questions with unique IDs as needed
|
| 22 |
# }
|
| 23 |
|
| 24 |
+
truth_data = load_dataset("commonsense-index-dev/commonsense-candidates", "iter7-0520", split="train")
|
| 25 |
+
|
| 26 |
+
logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
|
| 27 |
+
|
| 28 |
+
LAST_LOG_UPDATE = time.time()
|
| 29 |
|
| 30 |
qa_dataset = {}
|
| 31 |
for item in truth_data:
|
| 32 |
qa_dataset[item["id"]] = {
|
| 33 |
"question": item["task"],
|
| 34 |
"choices": item["choices"],
|
| 35 |
+
"answer": item["answer"]
|
| 36 |
}
|
| 37 |
+
if "metadata" in item:
|
| 38 |
+
qa_dataset[item["id"]]["reason"] = item["metadata"].get("reasoning", "N/A")
|
| 39 |
+
|
| 40 |
+
def update_logs():
|
| 41 |
+
global LAST_LOG_UPDATE
|
| 42 |
+
global logs
|
| 43 |
+
if time.time() - LAST_LOG_UPDATE > 1800:
|
| 44 |
+
# update logs for every 30 minutes
|
| 45 |
+
logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
|
| 46 |
+
LAST_LOG_UPDATE = time.time()
|
| 47 |
|
| 48 |
+
def get_random_question(user_name="Anonymous"):
|
| 49 |
+
global logs
|
| 50 |
+
update_logs()
|
| 51 |
+
# if user_name == "":
|
| 52 |
+
# user_name = "Anonymous"
|
| 53 |
+
# question_id = random.choice(list(qa_dataset.keys()))
|
| 54 |
+
# else:
|
| 55 |
+
# logs = load_dataset("commonsense-index-dev/DemoFeedback", split="train")
|
| 56 |
+
feedback_counts = {qid: 0 for qid in qa_dataset.keys()}
|
| 57 |
+
user_seen_data = set()
|
| 58 |
+
for item in logs:
|
| 59 |
+
feedback_counts[item["question_id"]] += 1
|
| 60 |
+
if item["user_name"] == user_name:
|
| 61 |
+
user_seen_data.add(item["question_id"])
|
| 62 |
+
# sample a question that has the least feedback, and if there are multiple, sample randomly
|
| 63 |
+
min_feedback = min(feedback_counts.values())
|
| 64 |
+
question_ids = [k for k, v in feedback_counts.items() if v == min_feedback]
|
| 65 |
+
question_ids = list(set(question_ids) - user_seen_data)
|
| 66 |
+
question_id = random.choice(question_ids)
|
| 67 |
question_data = qa_dataset[question_id]
|
| 68 |
+
reasoning = question_data["reason"]
|
| 69 |
+
return question_id, question_data["question"], question_data["choices"], reasoning
|
| 70 |
|
| 71 |
def get_question_by_id(question_id):
|
| 72 |
if question_id in qa_dataset:
|
|
|
|
| 75 |
else:
|
| 76 |
return None, "Invalid question ID", []
|
| 77 |
|
| 78 |
+
def check_answer(question_id, choice, reasoning):
|
| 79 |
correct_answer = qa_dataset[question_id]["answer"]
|
| 80 |
+
text = ""
|
| 81 |
+
if choice[3:] == correct_answer:
|
| 82 |
+
text += "### β
Correct!"
|
| 83 |
+
text += "\n### Reasoning: " + reasoning
|
|
|
|
| 84 |
else:
|
| 85 |
+
text += "### β Incorrect. Try again!"
|
| 86 |
+
return text
|
| 87 |
+
|
| 88 |
+
def load_question(question_id=None, user_name="Anonymous"):
|
| 89 |
+
question_id, question, choices, reasoning = get_random_question(user_name)
|
| 90 |
+
question = f"---\n#### QID: {question_id}\n## {question} \n---"
|
| 91 |
choices_markdown = "\n".join(choices)
|
| 92 |
+
return question_id, question, choices_markdown, \
|
| 93 |
+
gr.update(value="", visible=True), reasoning, \
|
| 94 |
+
gr.update(value="", visible=True), \
|
| 95 |
+
gr.update(value="Submit your feedback! π", interactive=True)
|
| 96 |
|
| 97 |
def show_buttons(choices_markdown):
|
| 98 |
choices = choices_markdown.split("\n")
|
| 99 |
visibility = [gr.update(visible=False)] * 10
|
| 100 |
for i in range(len(choices)):
|
| 101 |
+
# generate ABCDEFGHIJ labels
|
| 102 |
+
choices[i] = chr(65 + i) + ") " + choices[i]
|
| 103 |
visibility[i] = gr.update(visible=True, value=choices[i])
|
| 104 |
+
|
| 105 |
return visibility
|
| 106 |
|
| 107 |
+
|
| 108 |
+
def submit_feedback(question_id, user_reason, example_quality, user_name_text):
|
| 109 |
+
if "N/A" in question_id or "N/A" in example_quality:
|
| 110 |
+
# send a message to the user to sample an example and select a choice first
|
| 111 |
+
return {
|
| 112 |
+
submit_button: {"interactive": True, "__type__": "update", "value": "Submit your feedback! π Please sample an example and select a choice!"},
|
| 113 |
+
}
|
| 114 |
+
# create a jsonl file and upload it to hf
|
| 115 |
+
if user_name_text == "":
|
| 116 |
+
user_name_text = "Anonymous"
|
| 117 |
+
feedback_item = {
|
| 118 |
+
"question_id": question_id,
|
| 119 |
+
"user_name": user_name_text,
|
| 120 |
+
"user_reason": user_reason,
|
| 121 |
+
"example_quality": example_quality,
|
| 122 |
+
}
|
| 123 |
+
jsonl_str = json.dumps(feedback_item)
|
| 124 |
+
api = HfApi()
|
| 125 |
+
token = os.getenv("HF_TOKEN")
|
| 126 |
+
if token is None:
|
| 127 |
+
raise ValueError("Hugging Face token not found. Ensure the HF_TOKEN environment variable is set.")
|
| 128 |
+
|
| 129 |
+
# Generate a random filename using UUID
|
| 130 |
+
filename = f"{uuid.uuid4()}.json"
|
| 131 |
+
|
| 132 |
+
# Define the repository
|
| 133 |
+
repo_id = "commonsense-index-dev/DemoFeedback"
|
| 134 |
+
|
| 135 |
+
# Upload the json_str as a file directly to the specified path in your dataset repository
|
| 136 |
+
api.upload_file(
|
| 137 |
+
token=token,
|
| 138 |
+
repo_id=repo_id,
|
| 139 |
+
repo_type="dataset",
|
| 140 |
+
path_or_fileobj=jsonl_str.encode("utf-8"), # Convert string to bytes
|
| 141 |
+
path_in_repo=filename,
|
| 142 |
+
commit_message=f"{user_name_text}'s feedback on {question_id}",
|
| 143 |
+
)
|
| 144 |
+
return {
|
| 145 |
+
submit_button: {"interactive": False, "__type__": "update", "value": "Submitted! β
\n Please sample the next one."}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
def refresh_feedback(question_id):
|
| 149 |
+
return gr.update(value="", visible=True), gr.update(value="", visible=True), gr.update(value="", visible=True)
|
| 150 |
+
|
| 151 |
with gr.Blocks() as app:
|
| 152 |
+
gr.Markdown("# Commonsense Index Data Viewer")
|
| 153 |
+
|
| 154 |
+
with gr.Row():
|
| 155 |
+
# question_id_input = gr.Textbox(label="Enter Question ID", placeholder="leave empty for random sampling")
|
| 156 |
+
random_button = gr.Button("π² Click here to randomly sample an example")
|
| 157 |
|
|
|
|
|
|
|
| 158 |
question_display = gr.Markdown(visible=True)
|
| 159 |
choices_markdown = gr.Markdown(visible=False)
|
| 160 |
choice_buttons = [gr.Button(visible=False) for _ in range(10)]
|
| 161 |
result_display = gr.Markdown(visible=True)
|
| 162 |
+
reasoning_display = gr.Markdown(visible=False)
|
| 163 |
|
| 164 |
+
question_id = gr.Textbox(label="Question ID:", interactive=False, visible=False)
|
| 165 |
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
+
with gr.Row():
|
| 168 |
+
with gr.Column(scale=2):
|
| 169 |
+
reason_textbox = gr.Textbox(label="Reason", placeholder="Please talk why the correct answer is correct and why the others are wrong. If you think this is a bad example, please explain too.", type="text", elem_classes="", max_lines=5, lines=5, show_copy_button=False, visible=True, scale=4, interactive=True)
|
| 170 |
+
with gr.Column():
|
| 171 |
+
example_quality = gr.Radio(label="Quality", choices=["Good", "Bad"], interactive=True, visible=True)
|
| 172 |
+
user_name = gr.Textbox(label="Your username", placeholder="Your username", type="text", elem_classes="", max_lines=1, show_copy_button=False, visible=True, interactive=True, show_label=False)
|
| 173 |
+
submit_button = gr.Button("Submit your feedback! π", elem_classes="btn_boderline", visible=True, interactive=True)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
random_button.click(fn=load_question, inputs=[user_name], outputs=[question_id, question_display, choices_markdown, result_display, reasoning_display, example_quality, submit_button])
|
| 177 |
+
choices_markdown.change(fn=show_buttons, inputs=choices_markdown, outputs=choice_buttons)
|
| 178 |
+
question_id.change(fn=refresh_feedback, inputs=[question_id], outputs=[reason_textbox, example_quality])
|
| 179 |
+
submit_button.click(fn=submit_feedback, inputs=[question_id, reason_textbox, example_quality, user_name], outputs=[submit_button])
|
| 180 |
for i, button in enumerate(choice_buttons):
|
| 181 |
+
button.click(fn=check_answer, inputs=[question_id, button, reasoning_display], outputs=result_display)
|
| 182 |
|
| 183 |
app.launch()
|