AC2513 commited on
Commit
ceb2ea0
·
1 Parent(s): 7f350a5
Files changed (2) hide show
  1. app.py +2 -198
  2. utils.py +213 -0
app.py CHANGED
@@ -8,15 +8,12 @@ from transformers import (
8
  Gemma3nForConditionalGeneration,
9
  )
10
  import spaces
11
- import tempfile
12
  from threading import Thread
13
  import gradio as gr
14
  import os
15
  from dotenv import load_dotenv, find_dotenv
16
- import cv2
17
  from loguru import logger
18
- from PIL import Image
19
- import fitz
20
 
21
  dotenv_path = find_dotenv()
22
 
@@ -25,9 +22,6 @@ load_dotenv(dotenv_path)
25
  model_12_id = os.getenv("MODEL_12_ID", "google/gemma-3-12b-it")
26
  model_3n_id = os.getenv("MODEL_3N_ID", "google/gemma-3n-E4B-it")
27
 
28
- MAX_VIDEO_SIZE = 100 * 1024 * 1024 # 100 MB
29
- MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MB
30
-
31
  input_processor = Gemma3Processor.from_pretrained(model_12_id)
32
 
33
  model_12 = Gemma3ForConditionalGeneration.from_pretrained(
@@ -45,164 +39,6 @@ model_3n = Gemma3nForConditionalGeneration.from_pretrained(
45
  )
46
 
47
 
48
- def check_file_size(file_path: str) -> bool:
49
- if not os.path.exists(file_path):
50
- raise ValueError(f"File not found: {file_path}")
51
-
52
- file_size = os.path.getsize(file_path)
53
-
54
- if file_path.lower().endswith((".mp4", ".mov")):
55
- if file_size > MAX_VIDEO_SIZE:
56
- raise ValueError(f"Video file too large: {file_size / (1024*1024):.1f}MB. Maximum allowed: {MAX_VIDEO_SIZE / (1024*1024):.0f}MB")
57
- else:
58
- if file_size > MAX_IMAGE_SIZE:
59
- raise ValueError(f"Image file too large: {file_size / (1024*1024):.1f}MB. Maximum allowed: {MAX_IMAGE_SIZE / (1024*1024):.0f}MB")
60
-
61
- return True
62
-
63
-
64
- def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
65
- check_file_size(video_path)
66
-
67
- frames: list[tuple[Image.Image, float]] = []
68
- capture = cv2.VideoCapture(video_path)
69
- if not capture.isOpened():
70
- raise ValueError(f"Could not open video file: {video_path}")
71
-
72
- fps = capture.get(cv2.CAP_PROP_FPS)
73
- total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
74
-
75
- frame_interval = max(total_frames // max_images, 1)
76
- max_position = min(total_frames, max_images * frame_interval)
77
- i = 0
78
-
79
- while i < max_position and len(frames) < max_images:
80
- capture.set(cv2.CAP_PROP_POS_FRAMES, i)
81
- success, image = capture.read()
82
- if success:
83
- image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
84
- pil_image = Image.fromarray(image)
85
- timestamp = round(i / fps, 2)
86
- frames.append((pil_image, timestamp))
87
-
88
- i += frame_interval
89
-
90
- capture.release()
91
- return frames
92
-
93
-
94
- def process_video(video_path: str, max_images: int) -> list[dict]:
95
- result_content = []
96
- frames = get_frames(video_path, max_images)
97
- for frame in frames:
98
- image, timestamp = frame
99
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
100
- image.save(temp_file.name)
101
- result_content.append({"type": "text", "text": f"Frame {timestamp}:"})
102
- result_content.append({"type": "image", "url": temp_file.name})
103
- logger.debug(
104
- f"Processed {len(frames)} frames from video {video_path} with frames {result_content}"
105
- )
106
- return result_content
107
-
108
-
109
- def extract_pdf_text(pdf_path: str) -> str:
110
- check_file_size(pdf_path)
111
-
112
- try:
113
- doc = fitz.open(pdf_path)
114
- text_content = []
115
-
116
- for page_num in range(len(doc)):
117
- page = doc.load_page(page_num)
118
- text = page.get_text()
119
- if text.strip(): # Only add non-empty pages
120
- text_content.append(f"Page {page_num + 1}:\n{text}")
121
-
122
- doc.close()
123
-
124
- if not text_content:
125
- return "No text content found in the PDF."
126
-
127
- return "\n\n".join(text_content)
128
-
129
- except Exception as e:
130
- logger.error(f"Error extracting text from PDF {pdf_path}: {e}")
131
- raise ValueError(f"Failed to extract text from PDF: {str(e)}")
132
-
133
-
134
- def process_user_input(message: dict, max_images: int) -> list[dict]:
135
- if not message["files"]:
136
- return [{"type": "text", "text": message["text"]}]
137
-
138
- result_content = [{"type": "text", "text": message["text"]}]
139
-
140
- for file_path in message["files"]:
141
- try:
142
- check_file_size(file_path)
143
- except ValueError as e:
144
- logger.error(f"File size check failed: {e}")
145
- result_content.append({"type": "text", "text": f"Error: {str(e)}"})
146
- continue
147
-
148
- if file_path.endswith((".mp4", ".mov")):
149
- try:
150
- result_content = [*result_content, *process_video(file_path, max_images)]
151
- except Exception as e:
152
- logger.error(f"Video processing failed: {e}")
153
- result_content.append({"type": "text", "text": f"Error processing video: {str(e)}"})
154
- elif file_path.lower().endswith(".pdf"):
155
- try:
156
- logger.info(f"Processing PDF file: {file_path}")
157
- pdf_text = extract_pdf_text(file_path)
158
- logger.debug(f"PDF text extracted successfully, length: {len(pdf_text)} characters")
159
- result_content.append({"type": "text", "text": f"PDF Content:\n{pdf_text}"})
160
- except ValueError as ve:
161
- logger.error(f"PDF validation failed: {ve}")
162
- result_content.append({"type": "text", "text": f"Error processing PDF: {str(ve)}"})
163
- except Exception as e:
164
- logger.error(f"PDF processing failed: {e}")
165
- result_content.append({"type": "text", "text": f"Error processing PDF: {str(e)}"})
166
- else:
167
- result_content = [*result_content, {"type": "image", "url": file_path}]
168
-
169
- return result_content
170
-
171
- def process_history(history: list[dict]) -> list[dict]:
172
- messages = []
173
- content_buffer = []
174
-
175
- for item in history:
176
- if item["role"] == "assistant":
177
- if content_buffer:
178
- messages.append({"role": "user", "content": content_buffer})
179
- content_buffer = []
180
-
181
- messages.append(
182
- {
183
- "role": "assistant",
184
- "content": [{"type": "text", "text": item["content"]}],
185
- }
186
- )
187
- else:
188
- content = item["content"]
189
- if isinstance(content, str):
190
- content_buffer.append({"type": "text", "text": content})
191
- elif isinstance(content, tuple) and len(content) > 0:
192
- file_path = content[0]
193
- if file_path.endswith((".mp4", ".mov")):
194
- content_buffer.append({"type": "text", "text": "[Video uploaded previously]"})
195
- elif file_path.lower().endswith(".pdf"):
196
- content_buffer.append({"type": "text", "text": "[PDF uploaded previously]"})
197
- else:
198
- content_buffer.append({"type": "image", "url": file_path})
199
-
200
- if content_buffer:
201
- messages.append({"role": "user", "content": content_buffer})
202
-
203
- return messages
204
-
205
-
206
  @spaces.GPU(duration=120)
207
  def run(
208
  message: dict,
@@ -219,19 +55,7 @@ def run(
219
  ) -> Iterator[str]:
220
 
221
  # Define preset system prompts
222
- preset_prompts = {
223
- "General Assistant": "You are a helpful AI assistant capable of analyzing images, videos, and PDF documents. Provide clear, accurate, and helpful responses to user queries.",
224
-
225
- "Document Analyzer": "You are a specialized document analysis assistant. Focus on extracting key information, summarizing content, and answering specific questions about uploaded documents. For PDFs, provide structured analysis including main topics, key points, and relevant details. For images containing text, perform OCR-like analysis.",
226
-
227
- "Visual Content Expert": "You are an expert in visual content analysis. When analyzing images, provide detailed descriptions of visual elements, composition, colors, objects, people, and scenes. For videos, describe the sequence of events, movements, and changes between frames. Identify artistic techniques, styles, and visual storytelling elements.",
228
-
229
- "Educational Tutor": "You are a patient and encouraging educational tutor. Break down complex concepts into simple, understandable explanations. When analyzing educational materials (images, videos, or documents), focus on learning objectives, key concepts, and provide additional context or examples to enhance understanding.",
230
-
231
- "Technical Reviewer": "You are a technical expert specializing in analyzing technical documents, diagrams, code screenshots, and instructional videos. Provide detailed technical insights, identify potential issues, suggest improvements, and explain technical concepts with precision and accuracy.",
232
-
233
- "Creative Storyteller": "You are a creative storyteller who brings visual content to life through engaging narratives. When analyzing images or videos, create compelling stories, describe scenes with rich detail, and help users explore the creative and emotional aspects of visual content.",
234
- }
235
 
236
  # Determine which system prompt to use
237
  if system_prompt_preset == "Custom Prompt":
@@ -466,26 +290,6 @@ demo = gr.ChatInterface(
466
  stop_btn=False,
467
  )
468
 
469
- # Add JavaScript to update custom prompt when preset changes
470
- def update_custom_prompt(preset_choice):
471
- preset_prompts = {
472
- "General Assistant": "You are a helpful AI assistant capable of analyzing images, videos, and PDF documents. Provide clear, accurate, and helpful responses to user queries.",
473
-
474
- "Document Analyzer": "You are a specialized document analysis assistant. Focus on extracting key information, summarizing content, and answering specific questions about uploaded documents. For PDFs, provide structured analysis including main topics, key points, and relevant details. For images containing text, perform OCR-like analysis.",
475
-
476
- "Visual Content Expert": "You are an expert in visual content analysis. When analyzing images, provide detailed descriptions of visual elements, composition, colors, objects, people, and scenes. For videos, describe the sequence of events, movements, and changes between frames. Identify artistic techniques, styles, and visual storytelling elements.",
477
-
478
- "Educational Tutor": "You are a patient and encouraging educational tutor. Break down complex concepts into simple, understandable explanations. When analyzing educational materials (images, videos, or documents), focus on learning objectives, key concepts, and provide additional context or examples to enhance understanding.",
479
-
480
- "Technical Reviewer": "You are a technical expert specializing in analyzing technical documents, diagrams, code screenshots, and instructional videos. Provide detailed technical insights, identify potential issues, suggest improvements, and explain technical concepts with precision and accuracy.",
481
-
482
- "Creative Storyteller": "You are a creative storyteller who brings visual content to life through engaging narratives. When analyzing images or videos, create compelling stories, describe scenes with rich detail, and help users explore the creative and emotional aspects of visual content.",
483
-
484
- "Custom Prompt": ""
485
- }
486
-
487
- return preset_prompts.get(preset_choice, "")
488
-
489
  # Connect the dropdown to update the textbox
490
  with demo:
491
  preset_dropdown = demo.additional_inputs[0]
 
8
  Gemma3nForConditionalGeneration,
9
  )
10
  import spaces
 
11
  from threading import Thread
12
  import gradio as gr
13
  import os
14
  from dotenv import load_dotenv, find_dotenv
 
15
  from loguru import logger
16
+ from utils import *
 
17
 
18
  dotenv_path = find_dotenv()
19
 
 
22
  model_12_id = os.getenv("MODEL_12_ID", "google/gemma-3-12b-it")
23
  model_3n_id = os.getenv("MODEL_3N_ID", "google/gemma-3n-E4B-it")
24
 
 
 
 
25
  input_processor = Gemma3Processor.from_pretrained(model_12_id)
26
 
27
  model_12 = Gemma3ForConditionalGeneration.from_pretrained(
 
39
  )
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  @spaces.GPU(duration=120)
43
  def run(
44
  message: dict,
 
55
  ) -> Iterator[str]:
56
 
57
  # Define preset system prompts
58
+ preset_prompts = get_preset_prompts()
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  # Determine which system prompt to use
61
  if system_prompt_preset == "Custom Prompt":
 
290
  stop_btn=False,
291
  )
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  # Connect the dropdown to update the textbox
294
  with demo:
295
  preset_dropdown = demo.additional_inputs[0]
utils.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import fitz
4
+ import tempfile
5
+ from PIL import Image
6
+ from loguru import logger
7
+
8
+ # Constants
9
+ MAX_VIDEO_SIZE = 100 * 1024 * 1024 # 100 MB
10
+ MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MB
11
+
12
+
13
+ def check_file_size(file_path: str) -> bool:
14
+ """Check if a file meets the size requirements."""
15
+ if not os.path.exists(file_path):
16
+ raise ValueError(f"File not found: {file_path}")
17
+
18
+ file_size = os.path.getsize(file_path)
19
+
20
+ if file_path.lower().endswith((".mp4", ".mov")):
21
+ if file_size > MAX_VIDEO_SIZE:
22
+ raise ValueError(f"Video file too large: {file_size / (1024*1024):.1f}MB. Maximum allowed: {MAX_VIDEO_SIZE / (1024*1024):.0f}MB")
23
+ else:
24
+ if file_size > MAX_IMAGE_SIZE:
25
+ raise ValueError(f"Image file too large: {file_size / (1024*1024):.1f}MB. Maximum allowed: {MAX_IMAGE_SIZE / (1024*1024):.0f}MB")
26
+
27
+ return True
28
+
29
+
30
+ def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
31
+ """Extract frames from a video file."""
32
+ check_file_size(video_path)
33
+
34
+ frames: list[tuple[Image.Image, float]] = []
35
+ capture = cv2.VideoCapture(video_path)
36
+ if not capture.isOpened():
37
+ raise ValueError(f"Could not open video file: {video_path}")
38
+
39
+ fps = capture.get(cv2.CAP_PROP_FPS)
40
+ total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
41
+
42
+ frame_interval = max(total_frames // max_images, 1)
43
+ max_position = min(total_frames, max_images * frame_interval)
44
+ i = 0
45
+
46
+ while i < max_position and len(frames) < max_images:
47
+ capture.set(cv2.CAP_PROP_POS_FRAMES, i)
48
+ success, image = capture.read()
49
+ if success:
50
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
51
+ pil_image = Image.fromarray(image)
52
+ timestamp = round(i / fps, 2)
53
+ frames.append((pil_image, timestamp))
54
+
55
+ i += frame_interval
56
+
57
+ capture.release()
58
+ return frames
59
+
60
+
61
+ def process_video(video_path: str, max_images: int) -> list[dict]:
62
+ """Process a video file and return formatted content for the model."""
63
+ result_content = []
64
+ frames = get_frames(video_path, max_images)
65
+ for frame in frames:
66
+ image, timestamp = frame
67
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
68
+ image.save(temp_file.name)
69
+ result_content.append({"type": "text", "text": f"Frame {timestamp}:"})
70
+ result_content.append({"type": "image", "url": temp_file.name})
71
+ logger.debug(
72
+ f"Processed {len(frames)} frames from video {video_path} with frames {result_content}"
73
+ )
74
+ return result_content
75
+
76
+
77
+ def extract_pdf_text(pdf_path: str) -> str:
78
+ """Extract text content from a PDF file."""
79
+ check_file_size(pdf_path)
80
+
81
+ try:
82
+ doc = fitz.open(pdf_path)
83
+ text_content = []
84
+
85
+ for page_num in range(len(doc)):
86
+ page = doc.load_page(page_num)
87
+ text = page.get_text()
88
+ if text.strip(): # Only add non-empty pages
89
+ text_content.append(f"Page {page_num + 1}:\n{text}")
90
+
91
+ doc.close()
92
+
93
+ if not text_content:
94
+ return "No text content found in the PDF."
95
+
96
+ return "\n\n".join(text_content)
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error extracting text from PDF {pdf_path}: {e}")
100
+ raise ValueError(f"Failed to extract text from PDF: {str(e)}")
101
+
102
+
103
+ def process_user_input(message: dict, max_images: int) -> list[dict]:
104
+ """Process user input including files and return formatted content for the model."""
105
+ if not message["files"]:
106
+ return [{"type": "text", "text": message["text"]}]
107
+
108
+ result_content = [{"type": "text", "text": message["text"]}]
109
+
110
+ for file_path in message["files"]:
111
+ try:
112
+ check_file_size(file_path)
113
+ except ValueError as e:
114
+ logger.error(f"File size check failed: {e}")
115
+ result_content.append({"type": "text", "text": f"Error: {str(e)}"})
116
+ continue
117
+
118
+ if file_path.endswith((".mp4", ".mov")):
119
+ try:
120
+ result_content = [*result_content, *process_video(file_path, max_images)]
121
+ except Exception as e:
122
+ logger.error(f"Video processing failed: {e}")
123
+ result_content.append({"type": "text", "text": f"Error processing video: {str(e)}"})
124
+ elif file_path.lower().endswith(".pdf"):
125
+ try:
126
+ logger.info(f"Processing PDF file: {file_path}")
127
+ pdf_text = extract_pdf_text(file_path)
128
+ logger.debug(f"PDF text extracted successfully, length: {len(pdf_text)} characters")
129
+ result_content.append({"type": "text", "text": f"PDF Content:\n{pdf_text}"})
130
+ except ValueError as ve:
131
+ logger.error(f"PDF validation failed: {ve}")
132
+ result_content.append({"type": "text", "text": f"Error processing PDF: {str(ve)}"})
133
+ except Exception as e:
134
+ logger.error(f"PDF processing failed: {e}")
135
+ result_content.append({"type": "text", "text": f"Error processing PDF: {str(e)}"})
136
+ else:
137
+ result_content = [*result_content, {"type": "image", "url": file_path}]
138
+
139
+ return result_content
140
+
141
+
142
+ def process_history(history: list[dict]) -> list[dict]:
143
+ """Process chat history into the format expected by the model."""
144
+ messages = []
145
+ content_buffer = []
146
+
147
+ for item in history:
148
+ if item["role"] == "assistant":
149
+ if content_buffer:
150
+ messages.append({"role": "user", "content": content_buffer})
151
+ content_buffer = []
152
+
153
+ messages.append(
154
+ {
155
+ "role": "assistant",
156
+ "content": [{"type": "text", "text": item["content"]}],
157
+ }
158
+ )
159
+ else:
160
+ content = item["content"]
161
+ if isinstance(content, str):
162
+ content_buffer.append({"type": "text", "text": content})
163
+ elif isinstance(content, tuple) and len(content) > 0:
164
+ file_path = content[0]
165
+ if file_path.endswith((".mp4", ".mov")):
166
+ content_buffer.append({"type": "text", "text": "[Video uploaded previously]"})
167
+ elif file_path.lower().endswith(".pdf"):
168
+ content_buffer.append({"type": "text", "text": "[PDF uploaded previously]"})
169
+ else:
170
+ content_buffer.append({"type": "image", "url": file_path})
171
+
172
+ if content_buffer:
173
+ messages.append({"role": "user", "content": content_buffer})
174
+
175
+ return messages
176
+
177
+
178
+ def update_custom_prompt(preset_choice):
179
+ """Update the custom prompt based on preset selection."""
180
+ preset_prompts = {
181
+ "General Assistant": "You are a helpful AI assistant capable of analyzing images, videos, and PDF documents. Provide clear, accurate, and helpful responses to user queries.",
182
+
183
+ "Document Analyzer": "You are a specialized document analysis assistant. Focus on extracting key information, summarizing content, and answering specific questions about uploaded documents. For PDFs, provide structured analysis including main topics, key points, and relevant details. For images containing text, perform OCR-like analysis.",
184
+
185
+ "Visual Content Expert": "You are an expert in visual content analysis. When analyzing images, provide detailed descriptions of visual elements, composition, colors, objects, people, and scenes. For videos, describe the sequence of events, movements, and changes between frames. Identify artistic techniques, styles, and visual storytelling elements.",
186
+
187
+ "Educational Tutor": "You are a patient and encouraging educational tutor. Break down complex concepts into simple, understandable explanations. When analyzing educational materials (images, videos, or documents), focus on learning objectives, key concepts, and provide additional context or examples to enhance understanding.",
188
+
189
+ "Technical Reviewer": "You are a technical expert specializing in analyzing technical documents, diagrams, code screenshots, and instructional videos. Provide detailed technical insights, identify potential issues, suggest improvements, and explain technical concepts with precision and accuracy.",
190
+
191
+ "Creative Storyteller": "You are a creative storyteller who brings visual content to life through engaging narratives. When analyzing images or videos, create compelling stories, describe scenes with rich detail, and help users explore the creative and emotional aspects of visual content.",
192
+
193
+ "Custom Prompt": ""
194
+ }
195
+
196
+ return preset_prompts.get(preset_choice, "")
197
+
198
+
199
+ def get_preset_prompts():
200
+ """Return the dictionary of preset prompts for the main application."""
201
+ return {
202
+ "General Assistant": "You are a helpful AI assistant capable of analyzing images, videos, and PDF documents. Provide clear, accurate, and helpful responses to user queries.",
203
+
204
+ "Document Analyzer": "You are a specialized document analysis assistant. Focus on extracting key information, summarizing content, and answering specific questions about uploaded documents. For PDFs, provide structured analysis including main topics, key points, and relevant details. For images containing text, perform OCR-like analysis.",
205
+
206
+ "Visual Content Expert": "You are an expert in visual content analysis. When analyzing images, provide detailed descriptions of visual elements, composition, colors, objects, people, and scenes. For videos, describe the sequence of events, movements, and changes between frames. Identify artistic techniques, styles, and visual storytelling elements.",
207
+
208
+ "Educational Tutor": "You are a patient and encouraging educational tutor. Break down complex concepts into simple, understandable explanations. When analyzing educational materials (images, videos, or documents), focus on learning objectives, key concepts, and provide additional context or examples to enhance understanding.",
209
+
210
+ "Technical Reviewer": "You are a technical expert specializing in analyzing technical documents, diagrams, code screenshots, and instructional videos. Provide detailed technical insights, identify potential issues, suggest improvements, and explain technical concepts with precision and accuracy.",
211
+
212
+ "Creative Storyteller": "You are a creative storyteller who brings visual content to life through engaging narratives. When analyzing images or videos, create compelling stories, describe scenes with rich detail, and help users explore the creative and emotional aspects of visual content.",
213
+ }