File size: 9,436 Bytes
24a5f5e
 
 
 
5c265a9
24a5f5e
5c265a9
 
 
24a5f5e
5c265a9
3dde460
24a5f5e
5c265a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dde460
 
5c265a9
3dde460
5c265a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dde460
 
 
5c265a9
3dde460
 
5c265a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dde460
 
 
 
 
5c265a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dde460
5c265a9
3dde460
 
 
5c265a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24a5f5e
 
5c265a9
24a5f5e
 
 
3dde460
 
 
 
 
 
 
 
24a5f5e
 
5c265a9
3dde460
5c265a9
 
3dde460
5c265a9
 
 
 
24a5f5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dde460
 
 
 
 
 
 
5c265a9
 
 
 
 
 
 
3dde460
 
5c265a9
24a5f5e
 
 
3dde460
24a5f5e
 
 
3dde460
 
 
 
 
 
 
 
24a5f5e
 
3dde460
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import gradio as gr
import numpy as np
import base64
import re
import asyncio
from groq import Groq
from fastrtc import (
    Stream,
    AsyncStreamHandler,
    AdditionalOutputs,
    wait_for_item,
    get_cloudflare_turn_credentials_async,  # Sử dụng Cloudflare free
)
from gradio.utils import get_space

class VoiceCodingHandler(AsyncStreamHandler):
    """FastRTC Handler cho Voice Coding"""
    
    def __init__(self, groq_client: Groq):
        super().__init__(
            expected_layout="mono",
            output_sample_rate=24000,
            input_sample_rate=16000,
        )
        self.groq_client = groq_client
        self.input_queue = asyncio.Queue()
        self.output_queue = asyncio.Queue()
        self.is_active = False
        
        # Prompts
        self.system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response. Respond in Vietnamese when appropriate."
        self.user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
        
        self.current_history = [{"role": "system", "content": self.system_prompt}]
        self.current_code = ""
    
    def copy(self):
        return VoiceCodingHandler(self.groq_client)
    
    def extract_html_content(self, text):
        """Extract content including HTML tags."""
        if not text:
            return None
        match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
        return match.group(0) if match else text  # Return full text if no HTML found
    
    async def start_up(self):
        """Khởi động handler"""
        self.is_active = True
        print("✅ Voice Coding Handler started")
    
    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
        """Nhận audio frame"""
        if not self.is_active:
            return
            
        sample_rate, array = frame
        array = array.squeeze()
        
        # Xử lý audio trong background
        asyncio.create_task(self._process_audio(array, sample_rate))
    
    async def _process_audio(self, audio_data: np.ndarray, sample_rate: int):
        """Xử lý audio và generate code"""
        try:
            print("🎤 Processing audio for voice coding...")
            
            # Tạm thời sử dụng text input thay vì audio transcription
            # Trong thực tế, bạn sẽ tích hợp với VOSK/Whisper
            transcription = await self._mock_transcribe_audio()
            
            if transcription:
                print(f"🎯 Received request: {transcription}")
                
                # Generate loading state
                await self.output_queue.put(AdditionalOutputs({
                    "type": "loading",
                    "message": "🦙 Llama đang code...",
                    "history": self.current_history,
                    "code": self.current_code
                }))
                
                # Generate code
                await self._generate_code(transcription)
                
        except Exception as e:
            print(f"❌ Lỗi xử lý audio: {e}")
    
    async def _mock_transcribe_audio(self) -> str:
        """Mock transcription - trong thực tế sẽ tích hợp với ASR"""
        # Tạm thời return test text
        # Bạn có thể tích hợp với VOSK/Whisper sau
        return "Tạo trang web hello world với màu nền xanh và chữ màu trắng"
    
    async def _generate_code(self, user_message: str):
        """Generate code từ text input"""
        try:
            # Format user message
            user_msg_formatted = self.user_prompt.format(
                user_message=user_message, 
                code=self.current_code
            )
            
            # Update history
            self.current_history.append({"role": "user", "content": user_msg_formatted})
            
            # Generate code với Groq
            print("🦙 Generating code with Llama...")
            response = self.groq_client.chat.completions.create(
                model="llama-3.1-8b-instant",  # Sử dụng model có sẵn
                messages=self.current_history,
                temperature=0.7,
                max_tokens=1024,
                top_p=0.9,
                stream=False,
            )
            
            output = response.choices[0].message.content
            print("✅ Code generated successfully")
            
            # Extract HTML code
            html_code = self.extract_html_content(output)
            
            # Update state
            self.current_history.append({"role": "assistant", "content": output})
            self.current_code = html_code
            
            # Send result
            await self.output_queue.put(AdditionalOutputs({
                "type": "code_generated",
                "history": self.current_history,
                "code": html_code,
                "message": "✅ Code đã được generate!"
            }))
            
        except Exception as e:
            print(f"❌ Lỗi generate code: {e}")
            await self.output_queue.put(AdditionalOutputs({
                "type": "error",
                "message": f"❌ Lỗi: {str(e)}",
                "history": self.current_history,
                "code": self.current_code
            }))
    
    async def emit(self):
        """Emit outputs"""
        try:
            return await wait_for_item(self.output_queue)
        except Exception as e:
            print(f"❌ Lỗi emit: {e}")
            return None
    
    async def shutdown(self):
        """Dừng handler"""
        self.is_active = False
        print("🛑 Voice Coding Handler stopped")

class VoiceCodingService:
    """Dịch vụ Voice Coding sử dụng FastRTC"""
    
    def __init__(self, groq_client: Groq):
        self.groq_client = groq_client
        
        # Sử dụng Cloudflare TURN miễn phí hoặc None cho local development
        try:
            self.rtc_configuration = asyncio.run(get_cloudflare_turn_credentials_async())
            print("✅ Using Cloudflare TURN servers")
        except Exception as e:
            print(f"⚠️ Cannot get TURN credentials, using None: {e}")
            self.rtc_configuration = None  # Sẽ hoạt động trên local network
        
        # HTML templates
        self.sandbox_html = """
        <div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;">
            <h3>🎮 Sandbox Preview</h3>
            <p>Code sẽ được hiển thị ở đây sau khi generate</p>
            <p><small>Chức năng voice đang được phát triển. Vui lòng sử dụng text input.</small></p>
        </div>
        """
        
        self.loading_html = """
        <div style="text-align: center; padding: 20px;">
            <div class="spinner"></div>
            <p>🦙 Llama đang code...</p>
        </div>
        <style>
        .spinner {
            border: 4px solid #f3f3f3;
            border-top: 4px solid #3498db;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 2s linear infinite;
            margin: 0 auto;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        </style>
        """
    
    def extract_html_content(self, text):
        """Extract content including HTML tags."""
        if not text:
            return "<!-- No code generated -->"
        match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
        return match.group(0) if match else f"<!-- Generated Code -->\n<pre>{text}</pre>"
    
    def create_stream(self):
        """Tạo FastRTC stream"""
        return Stream(
            VoiceCodingHandler(self.groq_client),
            modality="audio",
            mode="send-receive",
            rtc_configuration=self.rtc_configuration,
            concurrency_limit=3,
            time_limit=120,
        )
    
    def display_in_sandbox(self, code):
        """Hiển thị code trong sandbox iframe"""
        if not code or "No code" in code:
            return self.sandbox_html
            
        try:
            # Kiểm tra xem code có phải HTML không
            if any(tag in code.lower() for tag in ['<html', '<!doctype', '<body', '<head']):
                encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
                data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
                return f'<iframe src="{data_uri}" width="100%" height="600px" style="border: 1px solid #ccc; border-radius: 5px;"></iframe>'
            else:
                # Nếu không phải HTML, hiển thị dưới dạng text
                return f'<div style="padding: 20px; background: #f5f5f5; border-radius: 5px;"><h4>Generated Content:</h4><pre style="white-space: pre-wrap;">{code}</pre></div>'
        except Exception as e:
            print(f"❌ Lỗi display sandbox: {e}")
            return f'<div style="color: red; padding: 20px;">Lỗi hiển thị sandbox: {str(e)}</div>'