File size: 12,865 Bytes
ba99c06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
"""
Voting module for BigCodeArena
Handles vote submission, data management, and UI components
"""

import gradio as gr
import pandas as pd
import datetime
import os
import threading
from datasets import Dataset, load_dataset


# HuggingFace dataset configuration
HF_DATASET_NAME = os.getenv("HF_DATASET_NAME")
HF_TOKEN = os.getenv("HF_TOKEN")


def serialize_interactions(interactions):
    """Convert datetime objects in interactions to ISO format strings"""
    if not interactions:
        return interactions
    
    serialized = []
    for interaction in interactions:
        # Handle case where interaction might be a list instead of a dict
        if isinstance(interaction, list):
            # If it's a list, recursively serialize each item
            serialized.append(serialize_interactions(interaction))
        elif isinstance(interaction, dict):
            # If it's a dict, serialize it normally
            serialized_interaction = {}
            for key, value in interaction.items():
                if isinstance(value, datetime.datetime):
                    serialized_interaction[key] = value.isoformat()
                else:
                    serialized_interaction[key] = value
            serialized.append(serialized_interaction)
        else:
            # If it's neither list nor dict, just add it as is
            serialized.append(interaction)
    return serialized


def save_vote_to_hf(
    model_a, model_b, prompt, response_a, response_b, vote_result, interactions_a=None, interactions_b=None, conversation_a=None, conversation_b=None, hf_token=None
):
    """Save vote result to HuggingFace dataset with full conversation history"""
    try:
        # Use global token if not provided
        token = hf_token or HF_TOKEN
        if not token:
            return False, "HuggingFace token not found in environment (HF_TOKEN)"

        if not HF_DATASET_NAME:
            return False, "HuggingFace dataset name not found in environment (HF_DATASET_NAME)"

        # Serialize conversations for JSON compatibility
        serialized_conversation_a = serialize_interactions(conversation_a or [])
        serialized_conversation_b = serialize_interactions(conversation_b or [])

        # Organize interactions by turns - each turn contains a list of interactions
        def organize_interactions_by_turns(interactions, conversation):
            """Organize interactions by conversation turns"""
            if not interactions:
                return []

            # For now, put all interactions in a single turn
            # This can be enhanced later to properly group by conversation turns
            # when we have more context about how interactions are timestamped
            return interactions if interactions else []

        # Organize interactions by turns for both models
        action_a = organize_interactions_by_turns(interactions_a or [], conversation_a or [])
        action_b = organize_interactions_by_turns(interactions_b or [], conversation_b or [])

        # Serialize actions for JSON compatibility
        serialized_action_a = serialize_interactions(action_a)
        serialized_action_b = serialize_interactions(action_b)

        # Create vote data with full conversation history and actions organized by turns
        # Each conversation is a list of messages in format: [{"role": "user"/"assistant", "content": "...", "action": [...]}, ...]
        # Actions are organized as list of lists: [[turn1_interactions], [turn2_interactions], ...]
        vote_data = {
            "timestamp": datetime.datetime.now().isoformat(),
            "model_a": model_a,
            "model_b": model_b,
            "initial_prompt": prompt,  # Convert list to single string
            "action_a": serialized_action_a,  # Actions organized by turns for model A
            "action_b": serialized_action_b,  # Actions organized by turns for model B
            "conversation_a": serialized_conversation_a,  # Full conversation history for model A
            "conversation_b": serialized_conversation_b,  # Full conversation history for model B
            "vote": vote_result,  # "left", "right", "tie", "both_bad"
        }

        # Try to load existing dataset or create new one
        try:
            dataset = load_dataset(HF_DATASET_NAME, split="train", token=token)
            # Convert to pandas DataFrame - handle both Dataset and DatasetDict
            if hasattr(dataset, "to_pandas"):
                df = dataset.to_pandas()
            else:
                df = pd.DataFrame(dataset)
            # Add new vote
            new_df = pd.concat([df, pd.DataFrame([vote_data])], ignore_index=True)
        except Exception as load_error:
            # Create new dataset if it doesn't exist
            new_df = pd.DataFrame([vote_data])

        # Convert back to dataset and push
        new_dataset = Dataset.from_pandas(new_df)
        try:
            new_dataset.push_to_hub(HF_DATASET_NAME, token=token)
            return True, "Vote saved successfully!"
        except Exception as upload_error:
            return False, f"Error uploading to HuggingFace: {str(upload_error)}"
    except Exception as e:
        return False, f"Error saving vote: {str(e)}"


def handle_vote(state0, state1, vote_type):
    """Handle vote submission"""
    if (
        not state0
        or not state1
        or not state0.get("has_output")
        or not state1.get("has_output")
    ):
        return (
            "No output to vote on!",
            gr.update(),
            "**Last Updated:** No data available",
        )

    # Get all user messages and the last responses
    user_messages = []
    response_a = ""
    response_b = ""

    # Collect all user messages from the conversation
    for msg in state0["messages"]:
        if msg["role"] == "user":
            user_messages.append(msg["content"])

    for msg in reversed(state0["messages"]):
        if msg["role"] == "assistant":
            response_a = msg["content"]
            break

    for msg in reversed(state1["messages"]):
        if msg["role"] == "assistant":
            response_b = msg["content"]
            break

    # Get interactions and full conversation history for remote dataset saving
    interactions_a = state0.get("interactions", [])
    interactions_b = state1.get("interactions", [])
    
    # Get full conversation history for both models
    conversation_a = state0.get("messages", [])
    conversation_b = state1.get("messages", [])
    
    # Save vote with full conversation history to remote dataset in background (async)
    def save_vote_background():
        try:
            success, message = save_vote_to_hf(
                state0["model_name"],
                state1["model_name"],
                user_messages[0],
                response_a,
                response_b,
                vote_type,
                interactions_a,
                interactions_b,
                conversation_a,
                conversation_b,
            )

        except Exception as e:
            print(f"Error saving vote: {str(e)}")
            pass
    
    print("Saving vote in background...")
    # Start background upload thread
    upload_thread = threading.Thread(target=save_vote_background)
    upload_thread.daemon = True
    upload_thread.start()
    
    # Return immediately without waiting for upload
    success = True  # Assume success for immediate UI response
    message = "Vote recorded! Uploading data in background..."

    if success:
        # Return immediately without waiting for ranking refresh
        return (
            message + " Clearing conversation...",
            gr.update(),  # Keep existing ranking table
            "**Last Updated:** Processing in background...",
        )
    else:
        return message, gr.update(), "**Last Updated:** Error occurred"


def create_vote_ui():
    """Create vote UI components"""
    # Vote buttons section - only visible after output
    with gr.Row(visible=False) as vote_section:
        gr.Markdown("### πŸ—³οΈ Which response is better?")
    
    with gr.Row(visible=False) as vote_buttons_row:
        vote_left_btn = gr.Button(
            "πŸ‘ A is Better", variant="primary", size="lg"
        )
        vote_tie_btn = gr.Button(
            "🀝 It's a Tie", variant="secondary", size="lg"
        )
        vote_both_bad_btn = gr.Button(
            "πŸ‘Ž Both are Bad", variant="secondary", size="lg"
        )
        vote_right_btn = gr.Button(
            "πŸ‘ B is Better", variant="primary", size="lg"
        )

    # Vote status message
    vote_status = gr.Markdown("", visible=False)
    
    return {
        'vote_section': vote_section,
        'vote_buttons_row': vote_buttons_row,
        'vote_left_btn': vote_left_btn,
        'vote_right_btn': vote_right_btn,
        'vote_tie_btn': vote_tie_btn,
        'vote_both_bad_btn': vote_both_bad_btn,
        'vote_status': vote_status
    }


def should_show_vote_buttons(state0, state1):
    """Check if vote buttons should be shown"""
    return (
        state0
        and state0.get("has_output", False)
        and not state0.get("generating", False)
        and state1
        and state1.get("has_output", False)
        and not state1.get("generating", False)
    )


def get_vote_ui_updates(show_buttons=False):
    """Get UI updates for vote components"""
    return {
        'vote_section': gr.update(visible=show_buttons),
        'vote_buttons_row': gr.update(visible=show_buttons),
        'vote_status': gr.update(visible=False),
        'vote_left_btn': gr.update(interactive=show_buttons),
        'vote_right_btn': gr.update(interactive=show_buttons),
        'vote_tie_btn': gr.update(interactive=show_buttons),
        'vote_both_bad_btn': gr.update(interactive=show_buttons),
    }


def setup_vote_handlers(vote_components, state0_var, state1_var, text_input, ranking_table, ranking_last_update):
    """Setup vote button event handlers"""
    
    def process_vote(state0, state1, vote_type, current_text):
        # Save the vote and get updates
        message, ranking_update, last_update = handle_vote(
            state0, state1, vote_type
        )

        # Show thank you message
        gr.Info(
            "Thank you for your vote! πŸŽ‰ Your feedback has been recorded.",
            duration=5,
        )

        # Return only vote status, ranking updates and hide voting interface
        return (
            message,  # vote status message
            gr.update(),  # Keep state0 unchanged
            gr.update(),  # Keep state1 unchanged
            gr.update(),  # Keep chatbot_a unchanged
            gr.update(),  # Keep chatbot_b unchanged
            gr.update(),  # Keep response_a unchanged
            gr.update(),  # Keep response_b unchanged
            gr.update(),  # Keep code_a unchanged
            gr.update(),  # Keep code_b unchanged
            gr.update(),  # Keep sandbox_view_a unchanged
            gr.update(),  # Keep sandbox_view_b unchanged
            gr.update(),  # Keep sandbox_component_a unchanged
            gr.update(),  # Keep sandbox_component_b unchanged
            gr.update(),  # Keep chat_stats_a unchanged
            gr.update(),  # Keep chat_stats_b unchanged
            gr.update(),  # Keep model_display_a unchanged
            gr.update(),  # Keep model_display_b unchanged
            gr.update(visible=False),  # Hide vote_section
            gr.update(visible=False),  # Hide vote_buttons_row
            gr.update(),  # Keep state0_var unchanged
            gr.update(),  # Keep state1_var unchanged
            ranking_update,  # Update ranking_table
            last_update,  # Update ranking_last_update
            gr.update(),  # Keep vote_left_btn unchanged
            gr.update(),  # Keep vote_right_btn unchanged
            gr.update(),  # Keep vote_tie_btn unchanged
            gr.update(),  # Keep vote_both_bad_btn unchanged
            gr.update(),  # Keep text_input unchanged
        )

    # Vote button click handlers
    for vote_btn, vote_type in [
        (vote_components['vote_left_btn'], "left"),
        (vote_components['vote_right_btn'], "right"),
        (vote_components['vote_tie_btn'], "tie"),
        (vote_components['vote_both_bad_btn'], "both_bad"),
    ]:
        vote_btn.click(
            fn=process_vote,
            inputs=[state0_var, state1_var, gr.State(vote_type), text_input],
            outputs=[
                vote_components['vote_status'],  # vote status message
                state0_var,  # state0
                state1_var,  # state1
                # Note: The actual outputs list will need to be filled in by the calling code
                # as it depends on the specific UI components in the main app
            ],
        )
    
    return vote_components