VIRTUS commited on
Commit
030d3cf
·
1 Parent(s): d45fd05

feat: chaging router model, create s3 access file

Browse files
.gitignore CHANGED
@@ -2,4 +2,5 @@
2
  .idea/
3
  data/
4
  vector_stores/
5
- */__pycache__/
 
 
2
  .idea/
3
  data/
4
  vector_stores/
5
+ */__pycache__/
6
+ .env
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  from langchain.agents import create_agent
4
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
5
  from langgraph.checkpoint.memory import InMemorySaver
 
6
 
7
 
8
  class GradioAgent:
@@ -17,15 +18,13 @@ class GradioAgent:
17
  )
18
 
19
  with gr.Blocks() as demo:
20
- with gr.Sidebar():
21
- gr.LoginButton()
22
  chatbot.render()
23
 
24
  demo.launch()
25
 
26
  def __create_agent(self):
27
  hf_model = HuggingFaceEndpoint(
28
- repo_id="meta-llama/Llama-3.1-8B-Instruct",
29
  task="text-generation",
30
  provider="auto",
31
  huggingfacehub_api_token=os.getenv("HF_TOKEN")
@@ -35,7 +34,8 @@ class GradioAgent:
35
  return create_agent(
36
  tools=[],
37
  model=llm,
38
- checkpointer=InMemorySaver()
 
39
  )
40
 
41
  def _respond(
@@ -53,5 +53,6 @@ class GradioAgent:
53
 
54
 
55
  if __name__ == "__main__":
 
56
  gradio = GradioAgent()
57
  gradio.inicialize()
 
3
  from langchain.agents import create_agent
4
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
5
  from langgraph.checkpoint.memory import InMemorySaver
6
+ from dotenv import load_dotenv
7
 
8
 
9
  class GradioAgent:
 
18
  )
19
 
20
  with gr.Blocks() as demo:
 
 
21
  chatbot.render()
22
 
23
  demo.launch()
24
 
25
  def __create_agent(self):
26
  hf_model = HuggingFaceEndpoint(
27
+ repo_id="Qwen/Qwen3-30B-A3B-Instruct-2507",
28
  task="text-generation",
29
  provider="auto",
30
  huggingfacehub_api_token=os.getenv("HF_TOKEN")
 
34
  return create_agent(
35
  tools=[],
36
  model=llm,
37
+ checkpointer=InMemorySaver(),
38
+ system_prompt="You are a helpful and usefull assistant."
39
  )
40
 
41
  def _respond(
 
53
 
54
 
55
  if __name__ == "__main__":
56
+ load_dotenv()
57
  gradio = GradioAgent()
58
  gradio.inicialize()
rag/BooksRag.py ADDED
@@ -0,0 +1 @@
 
 
1
+
rag/PDFRag.py DELETED
@@ -1,40 +0,0 @@
1
- from langchain_community.document_loaders import PyPDFLoader
2
- from langchain_huggingface import HuggingFaceEmbeddings
3
- from langchain_text_splitters import RecursiveCharacterTextSplitter
4
- from langchain_chroma import Chroma
5
-
6
-
7
- class PDFRag:
8
-
9
- def __init__(self, vs_path: str, embedding_model: str):
10
- self.embedding = HuggingFaceEmbeddings(
11
- model_name=embedding_model
12
- )
13
- self.vector_store = Chroma(
14
- collection_name="pdf",
15
- embedding_function=self.embedding,
16
- persist_directory=vs_path,
17
- )
18
-
19
-
20
- def add(self, pdf_path: str):
21
- loader = PyPDFLoader(pdf_path)
22
- docs = loader.load()
23
-
24
- text_splitter = RecursiveCharacterTextSplitter(
25
- chunk_size=1000,
26
- chunk_overlap=200,
27
- add_start_index=True
28
- )
29
- all_splits = text_splitter.split_documents(docs)
30
-
31
- return self.vector_store.add_documents(documents=all_splits)
32
-
33
-
34
- if __name__ == "__main__":
35
- pdf_rag = PDFRag(
36
- "../vector_stores/",
37
- "sentence-transformers/all-mpnet-base-v2"
38
- )
39
- aux = pdf_rag.add("../data/Designing_Data-Intensive_Applications.pdf")
40
- print(aux)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
rag/__init__.py CHANGED
@@ -1 +1 @@
1
- from .PDFRag import PDFRag
 
1
+ from .BooksRag import *
requirements.txt CHANGED
@@ -8,6 +8,8 @@ attrs==25.4.0
8
  Authlib==1.6.5
9
  backoff==2.2.1
10
  bcrypt==5.0.0
 
 
11
  Brotli==1.1.0
12
  build==1.3.0
13
  cachetools==6.2.1
@@ -47,6 +49,7 @@ importlib_metadata==8.7.0
47
  importlib_resources==6.5.2
48
  itsdangerous==2.2.0
49
  Jinja2==3.1.6
 
50
  joblib==1.5.2
51
  jsonpatch==1.33
52
  jsonpointer==3.0.0
@@ -130,6 +133,7 @@ rich==14.2.0
130
  rpds-py==0.27.1
131
  rsa==4.9.1
132
  ruff==0.14.0
 
133
  safehttpx==0.1.6
134
  safetensors==0.6.2
135
  scikit-learn==1.7.2
 
8
  Authlib==1.6.5
9
  backoff==2.2.1
10
  bcrypt==5.0.0
11
+ boto3==1.40.57
12
+ botocore==1.40.57
13
  Brotli==1.1.0
14
  build==1.3.0
15
  cachetools==6.2.1
 
49
  importlib_resources==6.5.2
50
  itsdangerous==2.2.0
51
  Jinja2==3.1.6
52
+ jmespath==1.0.1
53
  joblib==1.5.2
54
  jsonpatch==1.33
55
  jsonpointer==3.0.0
 
133
  rpds-py==0.27.1
134
  rsa==4.9.1
135
  ruff==0.14.0
136
+ s3transfer==0.14.0
137
  safehttpx==0.1.6
138
  safetensors==0.6.2
139
  scikit-learn==1.7.2
tools/CCBooksToolAgent.py DELETED
@@ -1,31 +0,0 @@
1
- from langchain.chains.retrieval_qa.base import RetrievalQA
2
- from langchain_huggingface import HuggingFaceEndpoint
3
- from langchain_core.tools import Tool
4
- from rag import PDFRag
5
-
6
-
7
- def CC_books_rag(hf_token) -> Tool:
8
- vector_store = PDFRag(
9
- "./vector_stores/",
10
- "sentence-transformers/all-mpnet-base-v2"
11
- ).vector_store
12
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
13
-
14
- llm = HuggingFaceEndpoint(
15
- repo_id="Qwen/Qwen3-4B-Instruct-2507",
16
- task="text-generation",
17
- provider="hf-inference",
18
- huggingfacehub_api_token=hf_token
19
- )
20
- retrieval_qa = RetrievalQA.from_chain_type(
21
- llm=llm,
22
- chain_type="stuff",
23
- retriever=retriever,
24
- return_source_documents=True
25
- )
26
-
27
- return Tool(
28
- name="Computer Science Books KnowledgeBaseSearch",
29
- func=lambda q: retrieval_qa.run(q),
30
- description="Use this tool when asked about computer science knowledge"
31
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/__init__.py CHANGED
@@ -1 +0,0 @@
1
- from .CCBooksToolAgent import CC_books_rag
 
 
util/aws/S3Access.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import boto3
2
+
3
+
4
+ def retrieve_s3_data(
5
+ bucket_name: str,
6
+ prefix: str,
7
+ access_key: str,
8
+ secret_key: str,
9
+ region: str
10
+ ):
11
+ s3 = boto3.client(
12
+ 's3',
13
+ aws_access_key_id=access_key,
14
+ aws_secret_access_key=secret_key,
15
+ region_name=region
16
+ )
17
+
18
+ objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
19
+ data = []
20
+ for obj in objects['Contents']:
21
+ key = obj['Key']
22
+ if key.endswith('/'):
23
+ continue
24
+
25
+ content = s3.get_object(Bucket=bucket_name, Key=key)['Body'].read()
26
+ data.append({
27
+ "name": key,
28
+ "content": content
29
+ })
30
+
31
+ return data