Spaces:
Running
Running
Update document_generator.py
Browse files- document_generator.py +216 -23
document_generator.py
CHANGED
|
@@ -81,6 +81,156 @@ FORMAT YOUR OUTPUT AS A TEMPLATE ENCLOSED IN <response></response> tags
|
|
| 81 |
DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
|
| 82 |
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
# File: app.py
|
| 86 |
import os
|
|
@@ -138,30 +288,17 @@ class DatabaseManager:
|
|
| 138 |
"""
|
| 139 |
cur.execute(insert_query, (user_id, user_query, response))
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
self,
|
| 151 |
-
messages: List[Dict[str, str]],
|
| 152 |
-
model: str = "openai/gpt-4o-mini",
|
| 153 |
-
max_tokens: int = 32000
|
| 154 |
-
) -> Optional[str]:
|
| 155 |
-
if not messages:
|
| 156 |
-
return None
|
| 157 |
-
response = self.client.chat.completions.create(
|
| 158 |
-
model=model,
|
| 159 |
-
messages=messages,
|
| 160 |
-
max_tokens=max_tokens,
|
| 161 |
-
stream=False
|
| 162 |
-
)
|
| 163 |
-
return response.choices[0].message.content
|
| 164 |
|
|
|
|
| 165 |
class DocumentGenerator:
|
| 166 |
def __init__(self, ai_client: AIClient):
|
| 167 |
self.ai_client = ai_client
|
|
@@ -395,6 +532,62 @@ async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRe
|
|
| 395 |
|
| 396 |
return StreamingResponse(stream_generator(), media_type="application/octet-stream")
|
| 397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
###########################################
|
| 399 |
class MarkdownDocumentResponse(BaseModel):
|
| 400 |
markdown_document: str
|
|
|
|
| 81 |
DOCUMENT_TEMPLATE_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
|
| 82 |
|
| 83 |
|
| 84 |
+
# File: llm_observability.py
|
| 85 |
+
|
| 86 |
+
import sqlite3
|
| 87 |
+
import json
|
| 88 |
+
from datetime import datetime
|
| 89 |
+
from typing import Dict, Any, List, Optional
|
| 90 |
+
|
| 91 |
+
class LLMObservabilityManager:
|
| 92 |
+
def __init__(self, db_path: str = "llm_observability.db"):
|
| 93 |
+
self.db_path = db_path
|
| 94 |
+
self.create_table()
|
| 95 |
+
|
| 96 |
+
def create_table(self):
|
| 97 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 98 |
+
cursor = conn.cursor()
|
| 99 |
+
cursor.execute('''
|
| 100 |
+
CREATE TABLE IF NOT EXISTS llm_observations (
|
| 101 |
+
id TEXT PRIMARY KEY,
|
| 102 |
+
conversation_id TEXT,
|
| 103 |
+
created_at DATETIME,
|
| 104 |
+
status TEXT,
|
| 105 |
+
request TEXT,
|
| 106 |
+
response TEXT,
|
| 107 |
+
model TEXT,
|
| 108 |
+
total_tokens INTEGER,
|
| 109 |
+
prompt_tokens INTEGER,
|
| 110 |
+
completion_tokens INTEGER,
|
| 111 |
+
latency FLOAT,
|
| 112 |
+
user TEXT
|
| 113 |
+
)
|
| 114 |
+
''')
|
| 115 |
+
|
| 116 |
+
def insert_observation(self, response: Dict[str, Any], conversation_id: str, status: str, request: str, latency: float, user: str):
|
| 117 |
+
created_at = datetime.fromtimestamp(response['created'])
|
| 118 |
+
|
| 119 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 120 |
+
cursor = conn.cursor()
|
| 121 |
+
cursor.execute('''
|
| 122 |
+
INSERT INTO llm_observations
|
| 123 |
+
(id, conversation_id, created_at, status, request, response, model, total_tokens, prompt_tokens, completion_tokens, latency, user)
|
| 124 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 125 |
+
''', (
|
| 126 |
+
response['id'],
|
| 127 |
+
conversation_id,
|
| 128 |
+
created_at,
|
| 129 |
+
status,
|
| 130 |
+
request,
|
| 131 |
+
json.dumps(response['choices'][0]['message']),
|
| 132 |
+
response['model'],
|
| 133 |
+
response['usage']['total_tokens'],
|
| 134 |
+
response['usage']['prompt_tokens'],
|
| 135 |
+
response['usage']['completion_tokens'],
|
| 136 |
+
latency,
|
| 137 |
+
user
|
| 138 |
+
))
|
| 139 |
+
|
| 140 |
+
def get_observations(self, conversation_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
| 141 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 142 |
+
cursor = conn.cursor()
|
| 143 |
+
if conversation_id:
|
| 144 |
+
cursor.execute('SELECT * FROM llm_observations WHERE conversation_id = ? ORDER BY created_at', (conversation_id,))
|
| 145 |
+
else:
|
| 146 |
+
cursor.execute('SELECT * FROM llm_observations ORDER BY created_at')
|
| 147 |
+
rows = cursor.fetchall()
|
| 148 |
+
|
| 149 |
+
column_names = [description[0] for description in cursor.description]
|
| 150 |
+
return [dict(zip(column_names, row)) for row in rows]
|
| 151 |
+
|
| 152 |
+
def get_all_observations(self) -> List[Dict[str, Any]]:
|
| 153 |
+
return self.get_observations()
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# aiclient.py
|
| 157 |
+
|
| 158 |
+
class AIClient:
|
| 159 |
+
def __init__(self):
|
| 160 |
+
self.client = OpenAI(
|
| 161 |
+
base_url="https://openrouter.ai/api/v1",
|
| 162 |
+
api_key="sk-or-v1-" + os.environ['OPENROUTER_API_KEY']
|
| 163 |
+
)
|
| 164 |
+
self.observability_manager = LLMObservabilityManager()
|
| 165 |
+
|
| 166 |
+
@log_execution
|
| 167 |
+
def generate_response(
|
| 168 |
+
self,
|
| 169 |
+
messages: List[Dict[str, str]],
|
| 170 |
+
model: str = "openai/gpt-4o-mini",
|
| 171 |
+
max_tokens: int = 32000,
|
| 172 |
+
conversation_id: str = None,
|
| 173 |
+
user: str = "anonymous"
|
| 174 |
+
) -> Optional[str]:
|
| 175 |
+
if not messages:
|
| 176 |
+
return None
|
| 177 |
+
|
| 178 |
+
start_time = time.time()
|
| 179 |
+
response = self.client.chat.completions.create(
|
| 180 |
+
model=model,
|
| 181 |
+
messages=messages,
|
| 182 |
+
max_tokens=max_tokens,
|
| 183 |
+
stream=False
|
| 184 |
+
)
|
| 185 |
+
end_time = time.time()
|
| 186 |
+
latency = end_time - start_time
|
| 187 |
+
|
| 188 |
+
# Log the observation
|
| 189 |
+
self.observability_manager.insert_observation(
|
| 190 |
+
response=response.dict(),
|
| 191 |
+
conversation_id=conversation_id or "default",
|
| 192 |
+
status="success",
|
| 193 |
+
request=json.dumps(messages),
|
| 194 |
+
latency=latency,
|
| 195 |
+
user=user
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
return response.choices[0].message.content
|
| 199 |
+
|
| 200 |
+
@log_execution
|
| 201 |
+
def generate_vision_response(
|
| 202 |
+
self,
|
| 203 |
+
messages: List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]],
|
| 204 |
+
model: str = "google/gemini-flash-1.5-8b",
|
| 205 |
+
max_tokens: int = 32000,
|
| 206 |
+
conversation_id: str = None,
|
| 207 |
+
user: str = "anonymous"
|
| 208 |
+
) -> Optional[str]:
|
| 209 |
+
if not messages:
|
| 210 |
+
return None
|
| 211 |
+
|
| 212 |
+
start_time = time.time()
|
| 213 |
+
response = self.client.chat.completions.create(
|
| 214 |
+
model=model,
|
| 215 |
+
messages=messages,
|
| 216 |
+
max_tokens=max_tokens,
|
| 217 |
+
stream=False
|
| 218 |
+
)
|
| 219 |
+
end_time = time.time()
|
| 220 |
+
latency = end_time - start_time
|
| 221 |
+
|
| 222 |
+
# Log the observation
|
| 223 |
+
self.observability_manager.insert_observation(
|
| 224 |
+
response=response.dict(),
|
| 225 |
+
conversation_id=conversation_id or "default",
|
| 226 |
+
status="success",
|
| 227 |
+
request=json.dumps(messages),
|
| 228 |
+
latency=latency,
|
| 229 |
+
user=user
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
return response.choices[0].message.content
|
| 233 |
+
|
| 234 |
|
| 235 |
# File: app.py
|
| 236 |
import os
|
|
|
|
| 288 |
"""
|
| 289 |
cur.execute(insert_query, (user_id, user_query, response))
|
| 290 |
|
| 291 |
+
def log_execution(func):
|
| 292 |
+
@wraps(func)
|
| 293 |
+
def wrapper(*args, **kwargs):
|
| 294 |
+
start_time = time.time()
|
| 295 |
+
result = func(*args, **kwargs)
|
| 296 |
+
end_time = time.time()
|
| 297 |
+
print(f"{func.__name__} executed in {end_time - start_time:.2f} seconds")
|
| 298 |
+
return result
|
| 299 |
+
return wrapper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
+
|
| 302 |
class DocumentGenerator:
|
| 303 |
def __init__(self, ai_client: AIClient):
|
| 304 |
self.ai_client = ai_client
|
|
|
|
| 532 |
|
| 533 |
return StreamingResponse(stream_generator(), media_type="application/octet-stream")
|
| 534 |
|
| 535 |
+
|
| 536 |
+
## OBSERVABILITY
|
| 537 |
+
from uuid import uuid4
|
| 538 |
+
import csv
|
| 539 |
+
from io import StringIO
|
| 540 |
+
|
| 541 |
+
def create_csv_response(observations: List[Dict]) -> StreamingResponse:
|
| 542 |
+
def iter_csv(data):
|
| 543 |
+
output = StringIO()
|
| 544 |
+
writer = csv.DictWriter(output, fieldnames=data[0].keys() if data else [])
|
| 545 |
+
writer.writeheader()
|
| 546 |
+
for row in data:
|
| 547 |
+
writer.writerow(row)
|
| 548 |
+
output.seek(0)
|
| 549 |
+
yield output.read()
|
| 550 |
+
|
| 551 |
+
headers = {
|
| 552 |
+
'Content-Disposition': 'attachment; filename="observations.csv"'
|
| 553 |
+
}
|
| 554 |
+
return StreamingResponse(iter_csv(observations), media_type="text/csv", headers=headers)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
@router.get("/last-observations/{limit}")
|
| 558 |
+
async def get_last_observations(limit: int = 10, format: str = "json"):
|
| 559 |
+
observability_manager = LLMObservabilityManager()
|
| 560 |
+
|
| 561 |
+
try:
|
| 562 |
+
# Get all observations, sorted by created_at in descending order
|
| 563 |
+
all_observations = observability_manager.get_observations()
|
| 564 |
+
all_observations.sort(key=lambda x: x['created_at'], reverse=True)
|
| 565 |
+
|
| 566 |
+
# Get the last conversation_id
|
| 567 |
+
if all_observations:
|
| 568 |
+
last_conversation_id = all_observations[0]['conversation_id']
|
| 569 |
+
|
| 570 |
+
# Filter observations for the last conversation
|
| 571 |
+
last_conversation_observations = [
|
| 572 |
+
obs for obs in all_observations
|
| 573 |
+
if obs['conversation_id'] == last_conversation_id
|
| 574 |
+
][:limit]
|
| 575 |
+
|
| 576 |
+
if format.lower() == "csv":
|
| 577 |
+
return create_csv_response(last_conversation_observations)
|
| 578 |
+
else:
|
| 579 |
+
return ObservationResponse(observations=last_conversation_observations)
|
| 580 |
+
else:
|
| 581 |
+
if format.lower() == "csv":
|
| 582 |
+
return create_csv_response([])
|
| 583 |
+
else:
|
| 584 |
+
return ObservationResponse(observations=[])
|
| 585 |
+
except Exception as e:
|
| 586 |
+
raise HTTPException(status_code=500, detail=f"Failed to retrieve observations: {str(e)}")
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
|
| 591 |
###########################################
|
| 592 |
class MarkdownDocumentResponse(BaseModel):
|
| 593 |
markdown_document: str
|