Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| from typing import Tuple, List | |
| from dotenv import load_dotenv | |
| from msal import ConfidentialClientApplication | |
| from langchain.schema import format_document | |
| def init_env(): | |
| try: | |
| load_dotenv() | |
| except: | |
| pass | |
| def get_token() -> str | None: | |
| app = ConfidentialClientApplication( | |
| client_id=os.getenv("CLIENT_ID"), | |
| client_credential=os.getenv("CLIENT_SECRET"), | |
| authority=f"https://login.microsoftonline.com/{os.getenv('TENANT_ID')}", | |
| ) | |
| result = app.acquire_token_for_client(scopes=[os.getenv("SCOPE")]) | |
| if result is not None: | |
| return result["access_token"] | |
| def get_llm(): | |
| os.environ["OPENAI_API_KEY"] = get_token() | |
| os.environ["AZURE_OPENAI_ENDPOINT"] = ( | |
| f"{os.getenv('OPENAI_API_ENDPOINT')}{os.getenv('DEPLOYMENT_ID')}/chat/completions?api-version={os.getenv('OPENAI_API_VERSION')}" | |
| ) | |
| return AzureChatOpenAI() | |
| def _combine_documents(docs, document_prompt, document_separator="\n\n"): | |
| doc_strings = [ | |
| f"Document {i}: \n'''\n{format_document(doc, document_prompt)}\n'''" | |
| for i, doc in enumerate(docs, 1) | |
| ] | |
| return document_separator.join(doc_strings) | |
| def _format_chat_history(chat_history: List[Tuple]) -> str: | |
| turn = 1 | |
| buffer = [] | |
| for dialogue in chat_history: | |
| buffer.append(("Human: " if turn else "Assistant: ") + dialogue.content) | |
| turn ^= 1 | |
| return "\n".join(buffer) + "\n" | |
| def make_pairs(lst): | |
| """from a list of even lenght, make tupple pairs""" | |
| return [(lst[i], lst[i + 1]) for i in range(0, len(lst), 2)] | |
| def make_html_source(i, doc): | |
| if doc.metadata["source"] == "ESRS": | |
| return f""" | |
| <div class="card" id="doc{i}"> | |
| <div class="card-content"> | |
| <h3>Doc {i}</h2> | |
| <p>{doc.page_content}</p> | |
| </div> | |
| <div class="card-footer"> | |
| <span>{doc.metadata['ESRS_filter']} \n</span> | |
| <span>DR: {doc.metadata['DR']} \n</span> | |
| <span>Data type: {doc.metadata['Data type']} \n</span> | |
| </div> | |
| </div> | |
| """ | |
| else: | |
| return f""" | |
| <div class="card"> | |
| <div class="card-content"> | |
| <h3>Doc {i}</h2> | |
| <p>{doc.page_content}</p> | |
| </div> | |
| <div class="card-footer"> | |
| <span>Source: {doc.metadata['source']} \n</span> | |
| </div> | |
| </div> | |
| """ | |
| def parse_output_llm_with_sources(output): | |
| # Split the content into a list of text and "[Doc X]" references | |
| content_parts = re.split(r"\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]", output) | |
| parts = [] | |
| for part in content_parts: | |
| if part.startswith("Doc"): | |
| subparts = part.split(",") | |
| subparts = [ | |
| subpart.lower().replace("doc", "").strip() for subpart in subparts | |
| ] | |
| subparts = [ | |
| f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" | |
| for subpart in subparts | |
| ] | |
| parts.append("".join(subparts)) | |
| else: | |
| parts.append(part) | |
| content_parts = "".join(parts) | |
| return content_parts | |