rdune71 commited on
Commit
064a3e4
·
verified ·
1 Parent(s): 6dabfc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -23
app.py CHANGED
@@ -3,7 +3,9 @@ import requests
3
  import json
4
  import os
5
  import re
 
6
  from datetime import datetime
 
7
  from requests.adapters import HTTPAdapter
8
  from urllib3.util.retry import Retry
9
 
@@ -43,6 +45,26 @@ except ImportError:
43
  TAVILY_AVAILABLE = False
44
  print("Tavily not available: Please install tavily-python")
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def get_preloaded_context():
47
  """Get preloaded context information"""
48
  context = f"""{FORMATTED_DATE_TIME}
@@ -130,6 +152,15 @@ def truncate_history(messages, max_tokens=4000):
130
 
131
  return truncated
132
 
 
 
 
 
 
 
 
 
 
133
  def perform_search(query):
134
  """Perform search using Tavily"""
135
  if TAVILY_AVAILABLE and tavily_client:
@@ -184,6 +215,105 @@ def validate_history(chat_history):
184
 
185
  return validated
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, top_p=0.9):
188
  """Generate text with streaming"""
189
  headers = {
@@ -203,6 +333,7 @@ def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, t
203
  "stream": True
204
  }
205
 
 
206
  try:
207
  response = session.post(
208
  f"{BASE_URL}chat/completions",
@@ -235,17 +366,22 @@ def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, t
235
 
236
  except Exception as e:
237
  yield f"Connection error: {str(e)}"
 
 
 
 
 
238
 
239
- def is_news_related_query(query):
240
- """Check if query is related to news"""
241
- news_keywords = ['news', 'headline', 'breaking', 'latest', 'today', 'current event', 'update', 'report']
242
- query_lower = query.lower()
243
- return any(word in query_lower for word in news_keywords)
244
-
245
- def respond(message, chat_history, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search):
246
  """Main response handler with conversation history"""
247
  if not message:
248
- yield "", chat_history, ""
 
 
 
 
 
249
  return
250
 
251
  # Add custom system prompt or preloaded context
@@ -258,7 +394,7 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
258
  chat_history = [system_message] + chat_history
259
 
260
  # Check if the message contains search results that need analysis
261
- if "SEARCH RESULTS" in message or "SEARCH RESULTS" in message:
262
  # This is search results that need analysis
263
  # Extract the original query and search results
264
  lines = message.split('\n')
@@ -270,7 +406,7 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
270
  else:
271
  query = message[:100] # Fallback
272
  else:
273
- query = "news summary"
274
 
275
  # Perform analysis
276
  analysis_prompt = analyze_search_results(query, message)
@@ -283,7 +419,9 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
283
  for chunk in generate_with_streaming(analysis_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
284
  if isinstance(chunk, str):
285
  full_response = chunk
286
- yield "", chat_history + [{"role": "user", "content": message}, {"role": "assistant", "content": full_response}], message
 
 
287
  return
288
 
289
  # Check if we should perform a search
@@ -319,12 +457,16 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
319
  for chunk in generate_with_streaming(analysis_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
320
  if isinstance(chunk, str):
321
  full_response = chunk
 
 
322
  # Stream both the analysis and raw search results
323
- yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}, {"role": "assistant", "content": full_response}], search_results_output
324
  return
325
  else:
326
  # Non-news search, just return the search results
327
- yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}], search_result
 
 
328
  return
329
 
330
  # Normal flow - generate response
@@ -338,26 +480,50 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
338
  if is_looping_content(full_response):
339
  # Force search instead of looping
340
  search_result = perform_search(message)
341
- yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result
 
342
  return
343
  # Stream the response
344
- yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], ""
 
345
 
346
  # Check for tool calls after completion or break loops
347
  if is_looping_content(full_response):
348
  # Force search for looping content
349
  search_result = perform_search(message)
350
- yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result
 
351
  return
352
 
353
  # Normal completion
354
- yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
 
356
  # Gradio Interface
357
  with gr.Blocks(title="GPT-OSS Chat") as demo:
358
  gr.Markdown("# 🤖 GPT-OSS 20B Chat")
359
  gr.Markdown(f"Chat with automatic web search capabilities\n\n**Current Date/Time**: {FORMATTED_DATE_TIME}")
360
 
 
 
 
361
  with gr.Row():
362
  chatbot = gr.Chatbot(height=500, type="messages", label="Conversation")
363
 
@@ -367,6 +533,24 @@ with gr.Blocks(title="GPT-OSS Chat") as demo:
367
 
368
  with gr.Row():
369
  clear = gr.Button("Clear")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  with gr.Accordion("Search Results", open=False):
372
  search_results = gr.Textbox(label="Raw Search Data", interactive=False, max_lines=10)
@@ -401,21 +585,45 @@ with gr.Blocks(title="GPT-OSS Chat") as demo:
401
  use_web_search = gr.Checkbox(label="Enable Web Search", value=True)
402
 
403
  # Event handling
404
- submit.click(
405
  respond,
406
- [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search],
407
- [msg, chatbot, search_results],
408
  queue=True
409
  )
410
 
411
- msg.submit(
412
  respond,
413
- [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search],
414
- [msg, chatbot, search_results],
415
  queue=True
416
  )
417
 
418
  clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
  if __name__ == "__main__":
421
  demo.launch()
 
3
  import json
4
  import os
5
  import re
6
+ import time
7
  from datetime import datetime
8
+ from functools import lru_cache
9
  from requests.adapters import HTTPAdapter
10
  from urllib3.util.retry import Retry
11
 
 
45
  TAVILY_AVAILABLE = False
46
  print("Tavily not available: Please install tavily-python")
47
 
48
+ # Rate limiter class
49
+ class RateLimiter:
50
+ def __init__(self, max_calls=10, time_window=60):
51
+ self.max_calls = max_calls
52
+ self.time_window = time_window
53
+ self.calls = []
54
+
55
+ def is_allowed(self):
56
+ now = time.time()
57
+ self.calls = [call for call in self.calls if now - call < self.time_window]
58
+ if len(self.calls) < self.max_calls:
59
+ self.calls.append(now)
60
+ return True
61
+ return False
62
+
63
+ rate_limiter = RateLimiter(max_calls=20, time_window=60)
64
+
65
+ # Feedback storage
66
+ feedback_data = []
67
+
68
  def get_preloaded_context():
69
  """Get preloaded context information"""
70
  context = f"""{FORMATTED_DATE_TIME}
 
152
 
153
  return truncated
154
 
155
+ def manage_conversation_memory(messages, max_turns=10):
156
+ """Keep conversation focused and prevent context overflow"""
157
+ if len(messages) > max_turns * 2: # *2 for user/assistant pairs
158
+ # Keep system message + last N turns
159
+ system_msg = [msg for msg in messages if msg.get("role") == "system"]
160
+ recent_messages = messages[-(max_turns * 2):]
161
+ return system_msg + recent_messages if system_msg else recent_messages
162
+ return messages
163
+
164
  def perform_search(query):
165
  """Perform search using Tavily"""
166
  if TAVILY_AVAILABLE and tavily_client:
 
215
 
216
  return validated
217
 
218
+ def generate_follow_up_questions(last_response):
219
+ """Generate 3-5 relevant follow-up questions"""
220
+ if not last_response:
221
+ return []
222
+
223
+ # Simple heuristic-based questions
224
+ question_words = ["What", "How", "Why", "When", "Where", "Who"]
225
+ topics = ["related", "similar", "detailed", "practical"]
226
+
227
+ # Extract key topics from response (simplified)
228
+ words = last_response.split()[:20] # First 20 words
229
+ key_topics = [word for word in words if len(word) > 4][:3] # Simple filtering
230
+
231
+ questions = []
232
+ for word in question_words[:3]: # Limit to 3
233
+ if key_topics:
234
+ topic = key_topics[0] if key_topics else "this"
235
+ questions.append(f"{word} about {topic}?")
236
+
237
+ return questions[:3] # Return max 3 questions
238
+
239
+ def format_code_blocks(text):
240
+ """Detect and format code blocks with syntax highlighting"""
241
+ import re
242
+ # Simple pattern to detect code blocks
243
+ pattern = r'```(\w+)?\n(.*?)```'
244
+ # Replace with HTML formatted code (simplified)
245
+ formatted = re.sub(pattern, r'<pre><code class="language-\1">\2</code></pre>', text, flags=re.DOTALL)
246
+ return formatted
247
+
248
+ def extract_and_format_citations(search_results):
249
+ """Extract sources and create clickable citations"""
250
+ # Simple citation extraction (can be enhanced)
251
+ citations = []
252
+ if "Source:" in search_results:
253
+ lines = search_results.split('\n')
254
+ for line in lines:
255
+ if "http" in line:
256
+ citations.append(line.strip())
257
+ return citations
258
+
259
+ def track_usage(user_id, query, response_time, tokens_used):
260
+ """Track usage metrics for improvement"""
261
+ metrics = {
262
+ "timestamp": datetime.now().isoformat(),
263
+ "user_id": user_id or "anonymous",
264
+ "query_length": len(query),
265
+ "response_time": response_time,
266
+ "tokens_used": tokens_used
267
+ }
268
+ # In a real app, you'd store this in a database
269
+ print(f"Usage tracked: {metrics}")
270
+ return metrics
271
+
272
+ def collect_feedback(feedback, query, response):
273
+ """Collect user feedback for model improvement"""
274
+ feedback_entry = {
275
+ "timestamp": datetime.now().isoformat(),
276
+ "feedback": feedback,
277
+ "query": query,
278
+ "response": response[:100] + "..." if len(response) > 100 else response
279
+ }
280
+ feedback_data.append(feedback_entry)
281
+ print(f"Feedback collected: {feedback_entry}")
282
+ return f"Thank you for your feedback: {feedback}"
283
+
284
+ @lru_cache(maxsize=100)
285
+ def cached_search(query):
286
+ """Cache frequent searches"""
287
+ return perform_search(query)
288
+
289
+ def handle_api_failure(error_type, fallback_strategy="retry"):
290
+ """Handle different types of API failures gracefully"""
291
+ # Simplified error handling
292
+ return f"API Error: {error_type}. Strategy: {fallback_strategy}"
293
+
294
+ def export_conversation(chat_history, export_format):
295
+ """Export conversation in various formats"""
296
+ if not chat_history:
297
+ return "No conversation to export"
298
+
299
+ if export_format == "JSON":
300
+ # Filter out system messages for export
301
+ exportable_history = [msg for msg in chat_history if msg.get("role") != "system"]
302
+ return json.dumps(exportable_history, indent=2, ensure_ascii=False)
303
+ elif export_format == "Text":
304
+ lines = []
305
+ for msg in chat_history:
306
+ if msg.get("role") != "system": # Skip system messages
307
+ lines.append(f"{msg.get('role', 'unknown').upper()}: {msg.get('content', '')}")
308
+ return "\n".join(lines)
309
+ return "Invalid format"
310
+
311
+ def is_news_related_query(query):
312
+ """Check if query is related to news"""
313
+ news_keywords = ['news', 'headline', 'breaking', 'latest', 'today', 'current event', 'update', 'report']
314
+ query_lower = query.lower()
315
+ return any(word in query_lower for word in news_keywords)
316
+
317
  def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, top_p=0.9):
318
  """Generate text with streaming"""
319
  headers = {
 
333
  "stream": True
334
  }
335
 
336
+ start_time = time.time()
337
  try:
338
  response = session.post(
339
  f"{BASE_URL}chat/completions",
 
366
 
367
  except Exception as e:
368
  yield f"Connection error: {str(e)}"
369
+ finally:
370
+ end_time = time.time()
371
+ # Track usage (simplified)
372
+ track_usage("user123", str(messages[-1]) if messages else "",
373
+ end_time - start_time, len(str(messages)))
374
 
375
+ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
376
+ creativity, precision, system_prompt, use_web_search, theme):
 
 
 
 
 
377
  """Main response handler with conversation history"""
378
  if not message:
379
+ yield "", chat_history, "", gr.update(choices=[], visible=False)
380
+ return
381
+
382
+ # Rate limiting check
383
+ if not rate_limiter.is_allowed():
384
+ yield "", chat_history + [{"role": "assistant", "content": "Rate limit exceeded. Please wait a moment before sending another message."}], "", ""
385
  return
386
 
387
  # Add custom system prompt or preloaded context
 
394
  chat_history = [system_message] + chat_history
395
 
396
  # Check if the message contains search results that need analysis
397
+ if "SEARCH RESULTS" in message or "[SEARCH RESULTS" in message:
398
  # This is search results that need analysis
399
  # Extract the original query and search results
400
  lines = message.split('\n')
 
406
  else:
407
  query = message[:100] # Fallback
408
  else:
409
+ query = "summary request"
410
 
411
  # Perform analysis
412
  analysis_prompt = analyze_search_results(query, message)
 
419
  for chunk in generate_with_streaming(analysis_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
420
  if isinstance(chunk, str):
421
  full_response = chunk
422
+ # Generate follow-up questions
423
+ follow_ups = generate_follow_up_questions(full_response)
424
+ yield "", chat_history + [{"role": "user", "content": message}, {"role": "assistant", "content": full_response}], message, gr.update(choices=follow_ups, visible=True if follow_ups else False)
425
  return
426
 
427
  # Check if we should perform a search
 
457
  for chunk in generate_with_streaming(analysis_history, model_choice, max_tokens, temperature * creativity, top_p * precision):
458
  if isinstance(chunk, str):
459
  full_response = chunk
460
+ # Generate follow-up questions
461
+ follow_ups = generate_follow_up_questions(full_response)
462
  # Stream both the analysis and raw search results
463
+ yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}, {"role": "assistant", "content": full_response}], search_results_output, gr.update(choices=follow_ups, visible=True if follow_ups else False)
464
  return
465
  else:
466
  # Non-news search, just return the search results
467
+ # Generate follow-up questions
468
+ follow_ups = generate_follow_up_questions(search_result)
469
+ yield "", chat_history + [user_message, {"role": "assistant", "content": search_result}], search_result, gr.update(choices=follow_ups, visible=True if follow_ups else False)
470
  return
471
 
472
  # Normal flow - generate response
 
480
  if is_looping_content(full_response):
481
  # Force search instead of looping
482
  search_result = perform_search(message)
483
+ follow_ups = generate_follow_up_questions(search_result)
484
+ yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result, gr.update(choices=follow_ups, visible=True if follow_ups else False)
485
  return
486
  # Stream the response
487
+ follow_ups = generate_follow_up_questions(full_response)
488
+ yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], "", gr.update(choices=follow_ups, visible=True if follow_ups else False)
489
 
490
  # Check for tool calls after completion or break loops
491
  if is_looping_content(full_response):
492
  # Force search for looping content
493
  search_result = perform_search(message)
494
+ follow_ups = generate_follow_up_questions(search_result)
495
+ yield "", chat_history + [user_message, {"role": "assistant", "content": f"[LOOP DETECTED - PERFORMING SEARCH]\n{search_result}"}], search_result, gr.update(choices=follow_ups, visible=True if follow_ups else False)
496
  return
497
 
498
  # Normal completion
499
+ follow_ups = generate_follow_up_questions(full_response)
500
+ yield "", chat_history + [user_message, {"role": "assistant", "content": full_response}], "", gr.update(choices=follow_ups, visible=True if follow_ups else False)
501
+
502
+ def apply_theme(theme):
503
+ """Apply theme-specific CSS"""
504
+ if theme == "Dark":
505
+ return """
506
+ <style>
507
+ body { background-color: #1a1a1a; color: #ffffff; }
508
+ .message { background-color: #2d2d2d; }
509
+ </style>
510
+ """
511
+ else:
512
+ return """
513
+ <style>
514
+ body { background-color: #ffffff; color: #000000; }
515
+ .message { background-color: #f0f0f0; }
516
+ </style>
517
+ """
518
 
519
  # Gradio Interface
520
  with gr.Blocks(title="GPT-OSS Chat") as demo:
521
  gr.Markdown("# 🤖 GPT-OSS 20B Chat")
522
  gr.Markdown(f"Chat with automatic web search capabilities\n\n**Current Date/Time**: {FORMATTED_DATE_TIME}")
523
 
524
+ # Theme CSS
525
+ theme_css = gr.HTML()
526
+
527
  with gr.Row():
528
  chatbot = gr.Chatbot(height=500, type="messages", label="Conversation")
529
 
 
533
 
534
  with gr.Row():
535
  clear = gr.Button("Clear")
536
+ theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", label="Theme")
537
+ feedback_radio = gr.Radio(
538
+ choices=["👍 Helpful", "👎 Not Helpful", "🔄 Needs Improvement"],
539
+ label="Rate Last Response"
540
+ )
541
+
542
+ with gr.Row():
543
+ with gr.Column():
544
+ follow_up_questions = gr.Radio(
545
+ choices=[],
546
+ label="Suggested Follow-up Questions",
547
+ visible=False
548
+ )
549
+ with gr.Column():
550
+ with gr.Row():
551
+ export_format = gr.Radio(choices=["JSON", "Text"], value="JSON", label="Export Format")
552
+ export_btn = gr.Button("Export Conversation")
553
+ export_output = gr.File(label="Download")
554
 
555
  with gr.Accordion("Search Results", open=False):
556
  search_results = gr.Textbox(label="Raw Search Data", interactive=False, max_lines=10)
 
585
  use_web_search = gr.Checkbox(label="Enable Web Search", value=True)
586
 
587
  # Event handling
588
+ submit_event = submit.click(
589
  respond,
590
+ [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search, theme_toggle],
591
+ [msg, chatbot, search_results, follow_up_questions],
592
  queue=True
593
  )
594
 
595
+ msg_event = msg.submit(
596
  respond,
597
+ [msg, chatbot, model_choice, max_tokens, temperature, top_p, creativity, precision, system_prompt, use_web_search, theme_toggle],
598
+ [msg, chatbot, search_results, follow_up_questions],
599
  queue=True
600
  )
601
 
602
  clear.click(lambda: None, None, chatbot, queue=False)
603
+
604
+ theme_toggle.change(
605
+ apply_theme,
606
+ [theme_toggle],
607
+ [theme_css]
608
+ )
609
+
610
+ feedback_radio.change(
611
+ collect_feedback,
612
+ [feedback_radio, msg, chatbot],
613
+ []
614
+ )
615
+
616
+ follow_up_questions.change(
617
+ lambda x: x,
618
+ [follow_up_questions],
619
+ [msg]
620
+ )
621
+
622
+ export_btn.click(
623
+ export_conversation,
624
+ [chatbot, export_format],
625
+ [export_output]
626
+ )
627
 
628
  if __name__ == "__main__":
629
  demo.launch()