Spaces:
Sleeping
Sleeping
| import sqlalchemy | |
| from langchain.document_loaders import PyPDFLoader | |
| import pandas as pd | |
| from llama_index.objects import ( | |
| SQLTableNodeMapping, | |
| ObjectIndex, | |
| SQLTableSchema, | |
| ) | |
| from llama_index import SQLDatabase | |
| from llama_index.indices.vector_store.base import VectorStoreIndex | |
| from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine | |
| def read_context_pdf(file): | |
| filepath = file.name | |
| loader = PyPDFLoader(filepath) | |
| pages = loader.load() | |
| content = "".join([page.page_content for page in pages]) | |
| content = [c.lstrip() for c in content.split(";")] | |
| content = [c.split(":") for c in content] | |
| return content | |
| def query(engine, sql_query): | |
| with engine.begin() as conn: | |
| df = pd.read_sql_query(sqlalchemy.text(sql_query), conn) | |
| return df | |
| def create_query_engine(context_pdf, username, password, host, port, mydatabase): | |
| # Parse context pdf | |
| context = read_context_pdf(context_pdf) | |
| # create sql engine | |
| pg_uri = f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{mydatabase}" | |
| engine = sqlalchemy.create_engine(pg_uri) | |
| sql_database = SQLDatabase(engine) | |
| # create context mapping | |
| table_node_mapping = SQLTableNodeMapping(sql_database) | |
| table_schema_objs = [(SQLTableSchema(table_name=c[0], context_str=c[1])) for c in context] | |
| obj_index = ObjectIndex.from_objects( | |
| table_schema_objs, | |
| table_node_mapping, | |
| VectorStoreIndex, | |
| ) | |
| query_engine = SQLTableRetrieverQueryEngine( | |
| sql_database, obj_index.as_retriever(similarity_top_k=3) | |
| ) | |
| return query_engine, engine, "Connection good" | |