Update services/llm_service.py
Browse files- services/llm_service.py +14 -12
services/llm_service.py
CHANGED
|
@@ -14,8 +14,8 @@ class LLMService:
|
|
| 14 |
self.config = config.config
|
| 15 |
|
| 16 |
self.anthropic_client = None
|
| 17 |
-
self.mistral_client = None
|
| 18 |
-
self.openai_async_client = None
|
| 19 |
|
| 20 |
self._initialize_clients()
|
| 21 |
|
|
@@ -53,7 +53,7 @@ class LLMService:
|
|
| 53 |
async def generate_text(self, prompt: str, model: str = "auto", max_tokens: int = 1000, temperature: float = 0.7) -> str:
|
| 54 |
"""Generate text using the specified model, with new priority for 'auto'."""
|
| 55 |
try:
|
| 56 |
-
selected_model_name_for_call: str = ""
|
| 57 |
|
| 58 |
if model == "auto":
|
| 59 |
# New Priority: 1. OpenAI, 2. Mistral, 3. Anthropic
|
|
@@ -132,7 +132,7 @@ class LLMService:
|
|
| 132 |
response = await loop.run_in_executor(
|
| 133 |
None,
|
| 134 |
lambda: self.anthropic_client.messages.create(
|
| 135 |
-
model=model_name,
|
| 136 |
max_tokens=max_tokens,
|
| 137 |
temperature=temperature,
|
| 138 |
messages=[
|
|
@@ -160,7 +160,7 @@ class LLMService:
|
|
| 160 |
response = await loop.run_in_executor(
|
| 161 |
None,
|
| 162 |
lambda: self.mistral_client.chat(
|
| 163 |
-
model=model_name,
|
| 164 |
messages=[{"role": "user", "content": prompt}],
|
| 165 |
max_tokens=max_tokens,
|
| 166 |
temperature=temperature
|
|
@@ -265,21 +265,23 @@ class LLMService:
|
|
| 265 |
context = context[:max_context_length] + "..."
|
| 266 |
logger.warning(f"Context truncated to {max_context_length} characters for question answering.")
|
| 267 |
|
| 268 |
-
prompt = f"""You are
|
| 269 |
-
|
| 270 |
-
|
| 271 |
|
| 272 |
-
|
| 273 |
-
---
|
| 274 |
{context}
|
| 275 |
-
---
|
|
|
|
|
|
|
| 276 |
|
| 277 |
Question: {question}
|
| 278 |
|
| 279 |
Answer:"""
|
| 280 |
|
| 281 |
try:
|
| 282 |
-
answer = await self.generate_text(prompt, model="auto", max_tokens=
|
| 283 |
return answer.strip()
|
| 284 |
except Exception as e:
|
| 285 |
logger.error(f"Error answering question: {str(e)}")
|
|
|
|
| 14 |
self.config = config.config
|
| 15 |
|
| 16 |
self.anthropic_client = None
|
| 17 |
+
self.mistral_client = None
|
| 18 |
+
self.openai_async_client = None
|
| 19 |
|
| 20 |
self._initialize_clients()
|
| 21 |
|
|
|
|
| 53 |
async def generate_text(self, prompt: str, model: str = "auto", max_tokens: int = 1000, temperature: float = 0.7) -> str:
|
| 54 |
"""Generate text using the specified model, with new priority for 'auto'."""
|
| 55 |
try:
|
| 56 |
+
selected_model_name_for_call: str = ""
|
| 57 |
|
| 58 |
if model == "auto":
|
| 59 |
# New Priority: 1. OpenAI, 2. Mistral, 3. Anthropic
|
|
|
|
| 132 |
response = await loop.run_in_executor(
|
| 133 |
None,
|
| 134 |
lambda: self.anthropic_client.messages.create(
|
| 135 |
+
model=model_name,
|
| 136 |
max_tokens=max_tokens,
|
| 137 |
temperature=temperature,
|
| 138 |
messages=[
|
|
|
|
| 160 |
response = await loop.run_in_executor(
|
| 161 |
None,
|
| 162 |
lambda: self.mistral_client.chat(
|
| 163 |
+
model=model_name,
|
| 164 |
messages=[{"role": "user", "content": prompt}],
|
| 165 |
max_tokens=max_tokens,
|
| 166 |
temperature=temperature
|
|
|
|
| 265 |
context = context[:max_context_length] + "..."
|
| 266 |
logger.warning(f"Context truncated to {max_context_length} characters for question answering.")
|
| 267 |
|
| 268 |
+
prompt = f"""You are an expert Q&A assistant. Your task is to synthesize an answer to the user's question based *only* on the provided source documents.
|
| 269 |
+
Analyze all the source documents provided in the context below.
|
| 270 |
+
If the information is present, provide a comprehensive answer.
|
| 271 |
|
| 272 |
+
Here are the source documents:
|
| 273 |
+
--- START OF CONTEXT ---
|
| 274 |
{context}
|
| 275 |
+
--- END OF CONTEXT ---
|
| 276 |
+
|
| 277 |
+
Based on the context above, please provide a clear and concise answer to the following question.
|
| 278 |
|
| 279 |
Question: {question}
|
| 280 |
|
| 281 |
Answer:"""
|
| 282 |
|
| 283 |
try:
|
| 284 |
+
answer = await self.generate_text(prompt, model="auto", max_tokens=800, temperature=0.5)
|
| 285 |
return answer.strip()
|
| 286 |
except Exception as e:
|
| 287 |
logger.error(f"Error answering question: {str(e)}")
|