Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
|
|
| 4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 5 |
from llama_index.core import Settings
|
| 6 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 7 |
-
from langchain_community.document_loaders import WebBaseLoader
|
| 8 |
import shutil
|
| 9 |
import os
|
| 10 |
import time
|
|
@@ -40,13 +39,8 @@ def data_ingestion():
|
|
| 40 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 41 |
|
| 42 |
def remove_old_files():
|
| 43 |
-
# Specify the directory path you want to clear
|
| 44 |
directory_path = "data"
|
| 45 |
-
|
| 46 |
-
# Remove all files and subdirectories in the specified directory
|
| 47 |
shutil.rmtree(directory_path)
|
| 48 |
-
|
| 49 |
-
# Recreate an empty directory if needed
|
| 50 |
os.makedirs(directory_path)
|
| 51 |
|
| 52 |
def extract_transcript_details(youtube_video_url):
|
|
@@ -64,18 +58,6 @@ def extract_transcript_details(youtube_video_url):
|
|
| 64 |
except Exception as e:
|
| 65 |
st.error(e)
|
| 66 |
|
| 67 |
-
def get_url_text(url_link):
|
| 68 |
-
try:
|
| 69 |
-
loader = WebBaseLoader(url_link)
|
| 70 |
-
loader.requests_per_second = 1
|
| 71 |
-
docs = loader.aload()
|
| 72 |
-
extracted_text = ""
|
| 73 |
-
for page in docs:
|
| 74 |
-
extracted_text += page.page_content
|
| 75 |
-
return extracted_text
|
| 76 |
-
except Exception as e:
|
| 77 |
-
st.error(e)
|
| 78 |
-
|
| 79 |
def handle_query(query):
|
| 80 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 81 |
index = load_index_from_storage(storage_context)
|
|
@@ -134,13 +116,6 @@ with st.sidebar:
|
|
| 134 |
print(filepath)
|
| 135 |
with open(filepath, "wb") as f:
|
| 136 |
f.write(uploaded_file.getbuffer())
|
| 137 |
-
|
| 138 |
-
if uploaded_url:
|
| 139 |
-
url_text = get_url_text(uploaded_url)
|
| 140 |
-
print(url_text)
|
| 141 |
-
with open("data/url_text.txt", "w") as fil:
|
| 142 |
-
fil.write(url_text)
|
| 143 |
-
print(os.listdir("data"))
|
| 144 |
|
| 145 |
if video_url:
|
| 146 |
extracted_text = extract_transcript_details(video_url)
|
|
|
|
| 4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 5 |
from llama_index.core import Settings
|
| 6 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
| 7 |
import shutil
|
| 8 |
import os
|
| 9 |
import time
|
|
|
|
| 39 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
| 40 |
|
| 41 |
def remove_old_files():
|
|
|
|
| 42 |
directory_path = "data"
|
|
|
|
|
|
|
| 43 |
shutil.rmtree(directory_path)
|
|
|
|
|
|
|
| 44 |
os.makedirs(directory_path)
|
| 45 |
|
| 46 |
def extract_transcript_details(youtube_video_url):
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
st.error(e)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
def handle_query(query):
|
| 62 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
| 63 |
index = load_index_from_storage(storage_context)
|
|
|
|
| 116 |
print(filepath)
|
| 117 |
with open(filepath, "wb") as f:
|
| 118 |
f.write(uploaded_file.getbuffer())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
if video_url:
|
| 121 |
extracted_text = extract_transcript_details(video_url)
|