File size: 9,436 Bytes
24a5f5e 5c265a9 24a5f5e 5c265a9 24a5f5e 5c265a9 3dde460 24a5f5e 5c265a9 3dde460 5c265a9 3dde460 5c265a9 3dde460 5c265a9 3dde460 5c265a9 3dde460 5c265a9 3dde460 5c265a9 3dde460 5c265a9 24a5f5e 5c265a9 24a5f5e 3dde460 24a5f5e 5c265a9 3dde460 5c265a9 3dde460 5c265a9 24a5f5e 3dde460 5c265a9 3dde460 5c265a9 24a5f5e 3dde460 24a5f5e 3dde460 24a5f5e 3dde460 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
import gradio as gr
import numpy as np
import base64
import re
import asyncio
from groq import Groq
from fastrtc import (
Stream,
AsyncStreamHandler,
AdditionalOutputs,
wait_for_item,
get_cloudflare_turn_credentials_async, # Sử dụng Cloudflare free
)
from gradio.utils import get_space
class VoiceCodingHandler(AsyncStreamHandler):
"""FastRTC Handler cho Voice Coding"""
def __init__(self, groq_client: Groq):
super().__init__(
expected_layout="mono",
output_sample_rate=24000,
input_sample_rate=16000,
)
self.groq_client = groq_client
self.input_queue = asyncio.Queue()
self.output_queue = asyncio.Queue()
self.is_active = False
# Prompts
self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
self.current_history = [{"role": "system", "content": self.system_prompt}]
self.current_code = ""
def copy(self):
return VoiceCodingHandler(self.groq_client)
def extract_html_content(self, text):
"""Extract content including HTML tags."""
if not text:
return None
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
return match.group(0) if match else text # Return full text if no HTML found
async def start_up(self):
"""Khởi động handler"""
self.is_active = True
print("✅ Voice Coding Handler started")
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
"""Nhận audio frame"""
if not self.is_active:
return
sample_rate, array = frame
array = array.squeeze()
# Xử lý audio trong background
asyncio.create_task(self._process_audio(array, sample_rate))
async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
"""Xử lý audio và generate code"""
try:
print("🎤 Processing audio for voice coding...")
# Tạm thời sử dụng text input thay vì audio transcription
# Trong thực tế, bạn sẽ tích hợp với VOSK/Whisper
transcription = await self._mock_transcribe_audio()
if transcription:
print(f"🎯 Received request: {transcription}")
# Generate loading state
await self.output_queue.put(AdditionalOutputs({
"type": "loading",
"message": "🦙 Llama đang code...",
"history": self.current_history,
"code": self.current_code
}))
# Generate code
await self._generate_code(transcription)
except Exception as e:
print(f"❌ Lỗi xử lý audio: {e}")
async def _mock_transcribe_audio(self) -> str:
"""Mock transcription - trong thực tế sẽ tích hợp với ASR"""
# Tạm thời return test text
# Bạn có thể tích hợp với VOSK/Whisper sau
return "Tạo trang web hello world với màu nền xanh và chữ màu trắng"
async def _generate_code(self, user_message: str):
"""Generate code từ text input"""
try:
# Format user message
user_msg_formatted = self.user_prompt.format(
user_message=user_message,
code=self.current_code
)
# Update history
self.current_history.append({"role": "user", "content": user_msg_formatted})
# Generate code với Groq
print("🦙 Generating code with Llama...")
response = self.groq_client.chat.completions.create(
model="llama-3.1-8b-instant", # Sử dụng model có sẵn
messages=self.current_history,
temperature=0.7,
max_tokens=1024,
top_p=0.9,
stream=False,
)
output = response.choices[0].message.content
print("✅ Code generated successfully")
# Extract HTML code
html_code = self.extract_html_content(output)
# Update state
self.current_history.append({"role": "assistant", "content": output})
self.current_code = html_code
# Send result
await self.output_queue.put(AdditionalOutputs({
"type": "code_generated",
"history": self.current_history,
"code": html_code,
"message": "✅ Code đã được generate!"
}))
except Exception as e:
print(f"❌ Lỗi generate code: {e}")
await self.output_queue.put(AdditionalOutputs({
"type": "error",
"message": f"❌ Lỗi: {str(e)}",
"history": self.current_history,
"code": self.current_code
}))
async def emit(self):
"""Emit outputs"""
try:
return await wait_for_item(self.output_queue)
except Exception as e:
print(f"❌ Lỗi emit: {e}")
return None
async def shutdown(self):
"""Dừng handler"""
self.is_active = False
print("🛑 Voice Coding Handler stopped")
class VoiceCodingService:
"""Dịch vụ Voice Coding sử dụng FastRTC"""
def __init__(self, groq_client: Groq):
self.groq_client = groq_client
# Sử dụng Cloudflare TURN miễn phí hoặc None cho local development
try:
self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async())
print("✅ Using Cloudflare TURN servers")
except Exception as e:
print(f"⚠️ Cannot get TURN credentials, using None: {e}")
self.rtc_configuration = None # Sẽ hoạt động trên local network
# HTML templates
self.sandbox_html = """
<div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;">
<h3>🎮 Sandbox Preview</h3>
<p>Code sẽ được hiển thị ở đây sau khi generate</p>
<p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p>
</div>
"""
self.loading_html = """
<div style="text-align: center; padding: 20px;">
<div class="spinner"></div>
<p>🦙 Llama đang code...</p>
</div>
<style>
.spinner {
border: 4px solid #f3f3f3;
border-top: 4px solid #3498db;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 2s linear infinite;
margin: 0 auto;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
</style>
"""
def extract_html_content(self, text):
"""Extract content including HTML tags."""
if not text:
return "<!-- No code generated -->"
match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>"
def create_stream(self):
"""Tạo FastRTC stream"""
return Stream(
VoiceCodingHandler(self.groq_client),
modality="audio",
mode="send-receive",
rtc_configuration=self.rtc_configuration,
concurrency_limit=3,
time_limit=120,
)
def display_in_sandbox(self, code):
"""Hiển thị code trong sandbox iframe"""
if not code or "No code" in code:
return self.sandbox_html
try:
# Kiểm tra xem code có phải HTML không
if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']):
encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>'
else:
# Nếu không phải HTML, hiển thị dưới dạng text
return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>'
except Exception as e:
print(f"❌ Lỗi display sandbox: {e}")
return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>' |