smitb2005 commited on
Commit
18489eb
·
1 Parent(s): 175c1e6

initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +20 -0
  2. app/main.py +147 -0
  3. app/requirements.txt +118 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ ENV PATH="/home/user/.local/bin:$PATH"
4
+
5
+ WORKDIR /app
6
+
7
+ COPY app/requirements.txt /app/
8
+
9
+
10
+ RUN python -m venv /venv && \
11
+ /venv/bin/pip install --upgrade pip && \
12
+ /venv/bin/pip install -r /app/requirements.txt
13
+
14
+ COPY . .
15
+
16
+ ENV PATH="/venv/bin:$PATH"
17
+
18
+ EXPOSE 8000
19
+
20
+ ENTRYPOINT [ "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2" ]
app/main.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from apify_client import ApifyClient
2
+ from fastapi import FastAPI
3
+ from astrapy import DataAPIClient
4
+
5
+ from groq import Groq
6
+ from langchain.chat_models import init_chat_model
7
+ from langchain_core.messages import HumanMessage, SystemMessage
8
+ from langchain_core.vectorstores import InMemoryVectorStore
9
+ from langchain_core.documents import Document
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+ from langchain_community.document_loaders import UnstructuredMarkdownLoader
12
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
13
+ from langchain import hub
14
+ from langgraph.graph import START, StateGraph
15
+
16
+ from pydantic.main import BaseModel
17
+ from typing_extensions import List, TypedDict
18
+
19
+ from transformers import pipeline
20
+
21
+ import os
22
+ import dotenv
23
+
24
+ dotenv.load_dotenv()
25
+
26
+ client = ApifyClient(os.getenv("APIFY_API_TOKEN"))
27
+ dbclient = DataAPIClient(os.getenv("ASTRA_DB_TOKEN"))
28
+ db = dbclient.get_database_by_api_endpoint(
29
+ "https://654d738f-1326-4e94-a2a0-cf79bd1ac826-us-east-2.apps.astra.datastax.com"
30
+ )
31
+ client = Groq()
32
+ # llm = init_chat_model("deepseek-r1-distill-llama-70b", model_provider="groq", api_key=os.getenv("GROQ_API_KEY"))
33
+
34
+ print(f"Connected to Astra DB: {db.list_collection_names()}")
35
+
36
+ coll_cursor = db.list_collections()
37
+ cursor = db.get_collection("posts")
38
+
39
+ app = FastAPI()
40
+
41
+ @app.get("/fetch/{username}/{posts}")
42
+ async def root(username: str, posts: int):
43
+ run_input = {
44
+ "directUrls": [f"https://www.instagram.com/{username}/"],
45
+ "resultsType": "posts",
46
+ "resultsLimit": posts,
47
+ "searchType": "hashtag",
48
+ "searchLimit": 1,
49
+ "addParentData": False,
50
+ }
51
+
52
+ run = client.actor("shu8hvrXbJbY3Eb9W").call(run_input=run_input)
53
+ for item in client.dataset(run["defaultDatasetId"]).iterate_items():
54
+ #print(item)
55
+ #print(type(item))
56
+ result = cursor.find_one({"id": item["id"]})
57
+
58
+ if (result == None):
59
+ cursor.insert_one(item, vectorize=item['id'])
60
+ else:
61
+ print(f"Post is cached already! ({item['id']})")
62
+
63
+ class Query(BaseModel):
64
+ question: str
65
+
66
+ @app.get("/chat/{username}")
67
+ async def chat(username: str, request: Query):
68
+ results = list(cursor.find({"ownerUsername": username}, projection={"type": True, "caption": True, "commentsCount": True, "alt": True, "likesCount": True, "ownerFullName": True, "videoDuration": True, "videoViewCount": True, "videoPlayCount": True}))
69
+ knowledge = []
70
+ if not results:
71
+ await root(username, 2)
72
+ results = list(cursor.find({"ownerUsername": username}))
73
+ if results:
74
+ for doc in results:
75
+ knowledge.append(doc)
76
+ else:
77
+ return "No posts found even after fetching."
78
+
79
+ # print(knowledge)
80
+
81
+ chat_completion = client.chat.completions.create(
82
+ messages=[
83
+ {
84
+ "role": "system",
85
+ "content": f"you will solve the users queries about social media with your data {knowledge} hide any calculations you perform."
86
+ },
87
+ {
88
+ "role": "user",
89
+ "content": f"{request}",
90
+ }
91
+ ],
92
+
93
+ model="llama-3.3-70b-versatile",
94
+ temperature=0.7,
95
+ max_completion_tokens=1024,
96
+ top_p=1,
97
+ stop=None,
98
+ stream=False,
99
+ )
100
+
101
+ return (chat_completion.choices[0].message.content)
102
+
103
+ from statistics import mean
104
+
105
+ sentiment_pipeline = pipeline("sentiment-analysis")
106
+
107
+ @app.get("/analysis/{username}")
108
+ async def analysis(username: str):
109
+ results = list(cursor.find({"ownerUsername": username}, projection={"latestComments": True}))
110
+ texts = [comment["text"] for doc in results for comment in doc.get("latestComments", []) if comment["text"].strip()]
111
+
112
+ if not texts:
113
+ return {"error": "No valid comments found"}
114
+
115
+ sentiment_scores = sentiment_pipeline(texts)
116
+
117
+ positive_scores = [s["score"] for s in sentiment_scores if s["label"] == "POSITIVE"]
118
+ negative_scores = [s["score"] for s in sentiment_scores if s["label"] == "NEGATIVE"]
119
+
120
+ scores = {
121
+ "average_positive_sentiment": mean(positive_scores) if positive_scores else 0,
122
+ "count_positive": len(positive_scores),
123
+ "average_negative_sentiment": mean(negative_scores) if negative_scores else 0,
124
+ "count_negative": len(negative_scores)
125
+ }
126
+
127
+ chat_completion = client.chat.completions.create(
128
+ messages=[
129
+ {
130
+ "role": "system",
131
+ "content": f"Help the user interpret the sentiment score of their comments be conscise and clear and straight to the point"
132
+ },
133
+ {
134
+ "role": "user",
135
+ "content": f"{scores}",
136
+ }
137
+ ],
138
+
139
+ model="llama-3.3-70b-versatile",
140
+ temperature=0.7,
141
+ max_completion_tokens=1024,
142
+ top_p=1,
143
+ stop=None,
144
+ stream=False,
145
+ )
146
+
147
+ return (chat_completion.choices[0].message.content)
app/requirements.txt ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.6.1
2
+ aiohttp==3.11.14
3
+ aiosignal==1.3.2
4
+ annotated-types==0.7.0
5
+ anyio==4.9.0
6
+ apify==2.4.0
7
+ apify_client==1.9.2
8
+ apify_fingerprint_datapoints==0.0.2
9
+ apify_shared==1.3.2
10
+ astrapy==1.5.2
11
+ attrs==25.3.0
12
+ Brotli==1.1.0
13
+ browserforge==1.2.3
14
+ cachetools==5.5.2
15
+ certifi==2025.1.31
16
+ cffi==1.17.1
17
+ charset-normalizer==3.4.1
18
+ click==8.1.8
19
+ colorama==0.4.6
20
+ crawlee==0.6.5
21
+ cryptography==44.0.2
22
+ dataclasses-json==0.6.7
23
+ deprecation==2.1.0
24
+ distro==1.9.0
25
+ dnspython==2.7.0
26
+ email_validator==2.2.0
27
+ eval_type_backport==0.2.2
28
+ fastapi==0.115.11
29
+ fastapi-cli==0.0.7
30
+ filelock==3.18.0
31
+ frozenlist==1.5.0
32
+ fsspec==2025.3.0
33
+ groq==0.20.0
34
+ h11==0.14.0
35
+ h2==4.2.0
36
+ hpack==4.1.0
37
+ httpcore==1.0.7
38
+ httptools==0.6.4
39
+ httpx==0.28.1
40
+ httpx-sse==0.4.0
41
+ huggingface-hub==0.29.3
42
+ hyperframe==6.1.0
43
+ idna==3.10
44
+ Jinja2==3.1.6
45
+ jsonpatch==1.33
46
+ jsonpointer==3.0.0
47
+ langchain==0.3.21
48
+ langchain-community==0.3.20
49
+ langchain-core==0.3.46
50
+ langchain-groq==0.3.0
51
+ langchain-text-splitters==0.3.7
52
+ langgraph==0.3.18
53
+ langgraph-checkpoint==2.0.21
54
+ langgraph-prebuilt==0.1.3
55
+ langgraph-sdk==0.1.58
56
+ langsmith==0.3.18
57
+ lazy-object-proxy==1.10.0
58
+ markdown-it-py==3.0.0
59
+ MarkupSafe==3.0.2
60
+ marshmallow==3.26.1
61
+ mdurl==0.1.2
62
+ more-itertools==10.6.0
63
+ mpmath==1.3.0
64
+ msgpack==1.1.0
65
+ multidict==6.2.0
66
+ mypy-extensions==1.0.0
67
+ networkx==3.4.2
68
+ numpy==2.2.4
69
+ orjson==3.10.15
70
+ packaging==24.2
71
+ pillow==11.1.0
72
+ propcache==0.3.0
73
+ psutil==7.0.0
74
+ pycparser==2.22
75
+ pydantic==2.10.6
76
+ pydantic-settings==2.6.1
77
+ pydantic_core==2.27.2
78
+ pyee==13.0.0
79
+ Pygments==2.19.1
80
+ pymongo==4.11.3
81
+ python-dotenv==1.0.1
82
+ python-multipart==0.0.20
83
+ PyYAML==6.0.2
84
+ regex==2024.11.6
85
+ requests==2.32.3
86
+ requests-file==2.1.0
87
+ requests-toolbelt==1.0.0
88
+ rich==13.9.4
89
+ rich-toolkit==0.13.2
90
+ safetensors==0.5.3
91
+ setuptools==77.0.3
92
+ shellingham==1.5.4
93
+ sniffio==1.3.1
94
+ sortedcollections==2.1.0
95
+ sortedcontainers==2.4.0
96
+ SQLAlchemy==2.0.39
97
+ starlette==0.46.1
98
+ sympy==1.13.1
99
+ tenacity==9.0.0
100
+ tldextract==5.1.3
101
+ tokenizers==0.21.1
102
+ toml==0.10.2
103
+ torch==2.6.0
104
+ torchaudio==2.6.0
105
+ torchvision==0.21.0
106
+ tqdm==4.67.1
107
+ transformers==4.49.0
108
+ typer==0.15.2
109
+ typing-inspect==0.9.0
110
+ typing_extensions==4.12.2
111
+ urllib3==2.3.0
112
+ uuid6==2024.7.10
113
+ uvicorn==0.34.0
114
+ uvloop==0.21.0
115
+ watchfiles==1.0.4
116
+ websockets==15.0.1
117
+ yarl==1.18.3
118
+ zstandard==0.23.0