rdune71 commited on
Commit
e2a70cb
·
verified ·
1 Parent(s): 0053bb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -26
app.py CHANGED
@@ -97,6 +97,39 @@ def tavily_search(query):
97
  except Exception as e:
98
  return f"Tavily search error: {str(e)}"
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def perform_search(query):
101
  """Perform search using Tavily"""
102
  if TAVILY_AVAILABLE and tavily_client:
@@ -151,7 +184,7 @@ def validate_history(chat_history):
151
 
152
  return validated
153
 
154
- def generate_with_streaming(messages, max_tokens=8192, temperature=0.7, top_p=0.9):
155
  """Generate text with streaming"""
156
  headers = {
157
  "Authorization": f"Bearer {HF_TOKEN}",
@@ -162,7 +195,7 @@ def generate_with_streaming(messages, max_tokens=8192, temperature=0.7, top_p=0.
162
  validated_messages = validate_history(messages)
163
 
164
  payload = {
165
- "model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
166
  "messages": validated_messages,
167
  "max_tokens": max_tokens,
168
  "temperature": temperature,
@@ -203,16 +236,19 @@ def generate_with_streaming(messages, max_tokens=8192, temperature=0.7, top_p=0.
203
  except Exception as e:
204
  yield f"Connection error: {str(e)}"
205
 
206
- def respond(message, chat_history, max_tokens, temperature, top_p, use_web_search):
207
  """Main response handler with conversation history"""
208
  if not message:
209
- yield "", chat_history
210
  return
211
 
212
- # Add preloaded context to the first message if history is empty
213
  if not chat_history:
214
- preloaded_context = get_preloaded_context()
215
- system_message = {"role": "system", "content": preloaded_context}
 
 
 
216
  chat_history = [system_message] + chat_history
217
 
218
  # Check if the message contains search results that need analysis
@@ -236,10 +272,10 @@ def respond(message, chat_history, max_tokens, temperature, top_p, use_web_searc
236
 
237
  # Generate analyzed response
238
  full_response = ""
239
- for chunk in generate_with_streaming(analysis_history, max_tokens, temperature, top_p):
240
  if isinstance(chunk, str):
241
  full_response = chunk
242
- yield "", chat_history + [{"role": "user", "content": message}, {"role": "assistant", "content": full_response}]
243
  return
244
 
245
  # Check if we should perform a search
@@ -248,65 +284,97 @@ def respond(message, chat_history, max_tokens, temperature, top_p, use_web_searc
248
  # Always perform search if web search is enabled
249
  if use_web_search:
250
  search_result = perform_search(message)
251
- yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}]
252
  return
253
 
254
  # Normal flow - generate response
255
  current_history = chat_history + [user_message]
256
  full_response = ""
257
 
258
- for chunk in generate_with_streaming(current_history, max_tokens, temperature, top_p):
259
  if isinstance(chunk, str):
260
  full_response = chunk
261
  # Break infinite loops
262
  if is_looping_content(full_response):
263
  # Force search instead of looping
264
  search_result = perform_search(message)
265
- yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}]
266
  return
267
  # Stream the response
268
- yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}]
269
 
270
  # Check for tool calls after completion or break loops
271
  if is_looping_content(full_response):
272
  # Force search for looping content
273
  search_result = perform_search(message)
274
- yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}]
275
  return
276
 
277
  # Normal completion
278
- yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}]
279
 
280
  # Gradio Interface
281
  with gr.Blocks(title="GPT-OSS Chat") as demo:
282
  gr.Markdown("# 🤖 GPT-OSS 20B Chat")
283
  gr.Markdown(f"Chat with automatic web search capabilities\n\n**Current Date/Time**: {FORMATTED_DATE_TIME}")
284
 
285
- chatbot = gr.Chatbot(height=500, type="messages")
286
- msg = gr.Textbox(label="Message", placeholder="Ask anything...")
 
 
 
 
287
 
288
  with gr.Row():
289
- submit = gr.Button("Send")
290
  clear = gr.Button("Clear")
291
 
 
 
 
292
  with gr.Accordion("Settings", open=False):
293
- max_tokens = gr.Slider(50, 8192, value=8192, label="Max Tokens")
294
- temperature = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
295
- top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top P")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  use_web_search = gr.Checkbox(label="Enable Web Search", value=True)
297
-
 
298
  submit.click(
299
  respond,
300
- [msg, chatbot, max_tokens, temperature, top_p, use_web_search],
301
- [msg, chatbot],
302
  queue=True
303
  )
 
304
  msg.submit(
305
  respond,
306
- [msg, chatbot, max_tokens, temperature, top_p, use_web_search],
307
- [msg, chatbot],
308
  queue=True
309
  )
 
310
  clear.click(lambda: None, None, chatbot, queue=False)
311
 
312
  if __name__ == "__main__":
 
97
  except Exception as e:
98
  return f"Tavily search error: {str(e)}"
99
 
100
+ def truncate_history(messages, max_tokens=4000):
101
+ """Truncate conversation history to prevent context overflow"""
102
+ if not messages:
103
+ return []
104
+
105
+ # Simplified token estimation (4 chars ~ 1 token)
106
+ estimated_tokens = sum(len(msg.get("content", "")) for msg in messages) // 4
107
+
108
+ if estimated_tokens <= max_tokens:
109
+ return messages
110
+
111
+ # Truncate older messages
112
+ truncated = []
113
+ current_tokens = 0
114
+
115
+ # Keep system message if present
116
+ if messages and messages[0].get("role") == "system":
117
+ truncated.append(messages[0])
118
+ messages = messages[1:]
119
+
120
+ # Add recent messages up to token limit
121
+ for message in reversed(messages):
122
+ content = message.get("content", "")
123
+ message_tokens = len(content) // 4
124
+
125
+ if current_tokens + message_tokens > max_tokens:
126
+ break
127
+
128
+ truncated.insert(0, message)
129
+ current_tokens += message_tokens
130
+
131
+ return truncated
132
+
133
  def perform_search(query):
134
  """Perform search using Tavily"""
135
  if TAVILY_AVAILABLE and tavily_client:
 
184
 
185
  return validated
186
 
187
+ def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, top_p=0.9):
188
  """Generate text with streaming"""
189
  headers = {
190
  "Authorization": f"Bearer {HF_TOKEN}",
 
195
  validated_messages = validate_history(messages)
196
 
197
  payload = {
198
+ "model": model,
199
  "messages": validated_messages,
200
  "max_tokens": max_tokens,
201
  "temperature": temperature,
 
236
  except Exception as e:
237
  yield f"Connection error: {str(e)}"
238
 
239
+ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search):
240
  """Main response handler with conversation history"""
241
  if not message:
242
+ yield "", chat_history, ""
243
  return
244
 
245
+ # Add custom system prompt or preloaded context
246
  if not chat_history:
247
+ if system_prompt:
248
+ system_message = {"role": "system", "content": system_prompt}
249
+ else:
250
+ preloaded_context = get_preloaded_context()
251
+ system_message = {"role": "system", "content": preloaded_context}
252
  chat_history = [system_message] + chat_history
253
 
254
  # Check if the message contains search results that need analysis
 
272
 
273
  # Generate analyzed response
274
  full_response = ""
275
+ for chunk in generate_with_streaming(analysis_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
276
  if isinstance(chunk, str):
277
  full_response = chunk
278
+ yield "", chat_history + [{"role": "user", "content": message}, {"role": "assistant", "content": full_response}], message
279
  return
280
 
281
  # Check if we should perform a search
 
284
  # Always perform search if web search is enabled
285
  if use_web_search:
286
  search_result = perform_search(message)
287
+ yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}], search_result
288
  return
289
 
290
  # Normal flow - generate response
291
  current_history = chat_history + [user_message]
292
  full_response = ""
293
 
294
+ for chunk in generate_with_streaming(current_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
295
  if isinstance(chunk, str):
296
  full_response = chunk
297
  # Break infinite loops
298
  if is_looping_content(full_response):
299
  # Force search instead of looping
300
  search_result = perform_search(message)
301
+ yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result
302
  return
303
  # Stream the response
304
+ yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], ""
305
 
306
  # Check for tool calls after completion or break loops
307
  if is_looping_content(full_response):
308
  # Force search for looping content
309
  search_result = perform_search(message)
310
+ yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result
311
  return
312
 
313
  # Normal completion
314
+ yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], ""
315
 
316
  # Gradio Interface
317
  with gr.Blocks(title="GPT-OSS Chat") as demo:
318
  gr.Markdown("# 🤖 GPT-OSS 20B Chat")
319
  gr.Markdown(f"Chat with automatic web search capabilities\n\n**Current Date/Time**: {FORMATTED_DATE_TIME}")
320
 
321
+ with gr.Row():
322
+ chatbot = gr.Chatbot(height=500, type="messages", label="Conversation")
323
+
324
+ with gr.Row():
325
+ msg = gr.Textbox(label="Message", placeholder="Ask anything...", scale=9)
326
+ submit = gr.Button("Send", scale=1)
327
 
328
  with gr.Row():
 
329
  clear = gr.Button("Clear")
330
 
331
+ with gr.Accordion("Search Results", open=False):
332
+ search_results = gr.Textbox(label="Raw Search Data", interactive=False, max_lines=10)
333
+
334
  with gr.Accordion("Settings", open=False):
335
+ with gr.Row():
336
+ model_choice = gr.Dropdown(
337
+ choices=[
338
+ "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
339
+ "other-model-variants"
340
+ ],
341
+ value="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
342
+ label="Model"
343
+ )
344
+
345
+ with gr.Row():
346
+ max_tokens = gr.Slider(50, 8192, value=8192, label="Max Tokens")
347
+ temperature = gr.Slider(0.1, 1.0, value=0.7, label="Base Temperature")
348
+ top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top P")
349
+
350
+ with gr.Row():
351
+ creativity = gr.Slider(0.1, 1.0, value=0.7, label="Creativity")
352
+ precision = gr.Slider(0.1, 1.0, value=0.9, label="Precision")
353
+
354
+ system_prompt = gr.Textbox(
355
+ label="System Prompt",
356
+ value="",
357
+ placeholder="Enter custom system prompt...",
358
+ max_lines=3
359
+ )
360
+
361
  use_web_search = gr.Checkbox(label="Enable Web Search", value=True)
362
+
363
+ # Event handling
364
  submit.click(
365
  respond,
366
+ [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search],
367
+ [msg, chatbot, search_results],
368
  queue=True
369
  )
370
+
371
  msg.submit(
372
  respond,
373
+ [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search],
374
+ [msg, chatbot, search_results],
375
  queue=True
376
  )
377
+
378
  clear.click(lambda: None, None, chatbot, queue=False)
379
 
380
  if __name__ == "__main__":