Abhinav Gavireddi
commited on
Commit
·
a1d050d
1
Parent(s):
80de6a9
[fix]: fixed logger issues
Browse files- .github/workflows/ci.yaml +3 -2
- src/__init__.py +4 -11
- src/qa.py +2 -2
- src/retriever.py +1 -1
- src/utils.py +3 -2
.github/workflows/ci.yaml
CHANGED
|
@@ -19,14 +19,15 @@ jobs:
|
|
| 19 |
- name: Install dependencies
|
| 20 |
run: |
|
| 21 |
python -m pip install --upgrade pip
|
| 22 |
-
pip install
|
|
|
|
| 23 |
# - name: Run tests
|
| 24 |
# run: |
|
| 25 |
# if [ -f tests/test.py ]; then python -m unittest discover -s tests; fi
|
| 26 |
|
| 27 |
deploy-to-hf:
|
| 28 |
runs-on: ubuntu-latest
|
| 29 |
-
|
| 30 |
environment: prod
|
| 31 |
steps:
|
| 32 |
- name: Checkout code
|
|
|
|
| 19 |
- name: Install dependencies
|
| 20 |
run: |
|
| 21 |
python -m pip install --upgrade pip
|
| 22 |
+
pip install uv
|
| 23 |
+
uv pip install -r requirements.txt
|
| 24 |
# - name: Run tests
|
| 25 |
# run: |
|
| 26 |
# if [ -f tests/test.py ]; then python -m unittest discover -s tests; fi
|
| 27 |
|
| 28 |
deploy-to-hf:
|
| 29 |
runs-on: ubuntu-latest
|
| 30 |
+
needs: build-and-test
|
| 31 |
environment: prod
|
| 32 |
steps:
|
| 33 |
- name: Checkout code
|
src/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
import bleach
|
|
|
|
| 4 |
|
| 5 |
load_dotenv()
|
| 6 |
|
|
@@ -40,16 +41,8 @@ class GPPConfig:
|
|
| 40 |
DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9))
|
| 41 |
EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85))
|
| 42 |
COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3))
|
| 43 |
-
|
| 44 |
-
class GPPConfig:
|
| 45 |
-
"""
|
| 46 |
-
Configuration for GPP pipeline.
|
| 47 |
-
"""
|
| 48 |
-
|
| 49 |
-
CHUNK_TOKEN_SIZE = 256
|
| 50 |
-
DEDUP_SIM_THRESHOLD = 0.9
|
| 51 |
-
EXPANSION_SIM_THRESHOLD = 0.85
|
| 52 |
-
COREF_CONTEXT_SIZE = 3
|
| 53 |
HNSW_EF_CONSTRUCTION = int(os.getenv("HNSW_EF_CONSTRUCTION", "200"))
|
| 54 |
HNSW_M = int(os.getenv("HNSW_M", "16"))
|
| 55 |
-
HNSW_EF_SEARCH = int(os.getenv("HNSW_EF_SEARCH", "50"))
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
import bleach
|
| 4 |
+
from loguru import logger
|
| 5 |
|
| 6 |
load_dotenv()
|
| 7 |
|
|
|
|
| 41 |
DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9))
|
| 42 |
EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85))
|
| 43 |
COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
HNSW_EF_CONSTRUCTION = int(os.getenv("HNSW_EF_CONSTRUCTION", "200"))
|
| 45 |
HNSW_M = int(os.getenv("HNSW_M", "16"))
|
| 46 |
+
HNSW_EF_SEARCH = int(os.getenv("HNSW_EF_SEARCH", "50"))
|
| 47 |
+
|
| 48 |
+
|
src/qa.py
CHANGED
|
@@ -11,8 +11,8 @@ Each component is modular and can be swapped or extended (e.g., add HyDE retriev
|
|
| 11 |
import os
|
| 12 |
from typing import List, Dict, Any, Tuple
|
| 13 |
|
| 14 |
-
from src import RerankerConfig
|
| 15 |
-
from src.utils import LLMClient
|
| 16 |
from src.retriever import Retriever, RetrieverConfig
|
| 17 |
|
| 18 |
class Reranker:
|
|
|
|
| 11 |
import os
|
| 12 |
from typing import List, Dict, Any, Tuple
|
| 13 |
|
| 14 |
+
from src import RerankerConfig, logger
|
| 15 |
+
from src.utils import LLMClient
|
| 16 |
from src.retriever import Retriever, RetrieverConfig
|
| 17 |
|
| 18 |
class Reranker:
|
src/retriever.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
|
| 4 |
from src.config import RetrieverConfig
|
| 5 |
-
from src
|
| 6 |
|
| 7 |
class Retriever:
|
| 8 |
"""
|
|
|
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
|
| 4 |
from src.config import RetrieverConfig
|
| 5 |
+
from src import logger # Use logger from src/__init__.py
|
| 6 |
|
| 7 |
class Retriever:
|
| 8 |
"""
|
src/utils.py
CHANGED
|
@@ -6,6 +6,7 @@ import openai
|
|
| 6 |
from typing import List
|
| 7 |
from openai import AzureOpenAI
|
| 8 |
from langchain_openai import AzureOpenAIEmbeddings
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class LLMClient:
|
|
@@ -21,7 +22,7 @@ class LLMClient:
|
|
| 21 |
openai_model_name = model or os.getenv('OPENAI_MODEL', 'gpt-4o')
|
| 22 |
|
| 23 |
if not (azure_api_key or azure_endpoint or azure_api_version or openai_model_name):
|
| 24 |
-
|
| 25 |
raise EnvironmentError('Missing OPENAI_API_KEY')
|
| 26 |
client = AzureOpenAI(
|
| 27 |
api_key=azure_api_key,
|
|
@@ -40,7 +41,7 @@ class LLMClient:
|
|
| 40 |
text = resp.choices[0].message.content.strip()
|
| 41 |
return text
|
| 42 |
except Exception as e:
|
| 43 |
-
|
| 44 |
raise
|
| 45 |
|
| 46 |
|
|
|
|
| 6 |
from typing import List
|
| 7 |
from openai import AzureOpenAI
|
| 8 |
from langchain_openai import AzureOpenAIEmbeddings
|
| 9 |
+
from src import logger # Import logger from src/__init__.py
|
| 10 |
|
| 11 |
|
| 12 |
class LLMClient:
|
|
|
|
| 22 |
openai_model_name = model or os.getenv('OPENAI_MODEL', 'gpt-4o')
|
| 23 |
|
| 24 |
if not (azure_api_key or azure_endpoint or azure_api_version or openai_model_name):
|
| 25 |
+
logger.error('OPENAI_API_KEY is not set')
|
| 26 |
raise EnvironmentError('Missing OPENAI_API_KEY')
|
| 27 |
client = AzureOpenAI(
|
| 28 |
api_key=azure_api_key,
|
|
|
|
| 41 |
text = resp.choices[0].message.content.strip()
|
| 42 |
return text
|
| 43 |
except Exception as e:
|
| 44 |
+
logger.error(f'LLM generation failed: {e}')
|
| 45 |
raise
|
| 46 |
|
| 47 |
|