Switched to OpenAI instead of Azure OAI
Browse files- app.py +1 -1
- climateqa/engine/llm.py +0 -26
- climateqa/engine/llm/__init__.py +15 -0
- climateqa/engine/llm/azure.py +99 -0
- climateqa/engine/llm/mistral.py +0 -0
- climateqa/engine/llm/openai.py +22 -0
- requirements.txt +3 -1
    	
        app.py
    CHANGED
    
    | @@ -90,7 +90,7 @@ def parse_output_llm_with_sources(output): | |
| 90 |  | 
| 91 | 
             
            # Create vectorstore and retriever
         | 
| 92 | 
             
            vectorstore = get_pinecone_vectorstore(embeddings_function)
         | 
| 93 | 
            -
            llm = get_llm(max_tokens = 1024,temperature = 0.0)
         | 
| 94 |  | 
| 95 |  | 
| 96 | 
             
            def make_pairs(lst):
         | 
|  | |
| 90 |  | 
| 91 | 
             
            # Create vectorstore and retriever
         | 
| 92 | 
             
            vectorstore = get_pinecone_vectorstore(embeddings_function)
         | 
| 93 | 
            +
            llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
         | 
| 94 |  | 
| 95 |  | 
| 96 | 
             
            def make_pairs(lst):
         | 
    	
        climateqa/engine/llm.py
    DELETED
    
    | @@ -1,26 +0,0 @@ | |
| 1 | 
            -
            from langchain_community.chat_models import AzureChatOpenAI
         | 
| 2 | 
            -
            import os
         | 
| 3 | 
            -
            # LOAD ENVIRONMENT VARIABLES
         | 
| 4 | 
            -
            try:
         | 
| 5 | 
            -
                from dotenv import load_dotenv
         | 
| 6 | 
            -
                load_dotenv()
         | 
| 7 | 
            -
            except:
         | 
| 8 | 
            -
                pass
         | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
            def get_llm(max_tokens = 1024,temperature = 0.0,verbose = True,streaming = False, **kwargs):
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                llm = AzureChatOpenAI(
         | 
| 14 | 
            -
                    openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"],
         | 
| 15 | 
            -
                    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
         | 
| 16 | 
            -
                    deployment_name=os.environ["AZURE_OPENAI_API_DEPLOYMENT_NAME"],
         | 
| 17 | 
            -
                    openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
         | 
| 18 | 
            -
                    openai_api_type = "azure",
         | 
| 19 | 
            -
                    max_tokens = max_tokens,
         | 
| 20 | 
            -
                    temperature = temperature,
         | 
| 21 | 
            -
                    request_timeout = 60,
         | 
| 22 | 
            -
                    verbose = verbose,
         | 
| 23 | 
            -
                    streaming = streaming,
         | 
| 24 | 
            -
                    **kwargs,
         | 
| 25 | 
            -
                )
         | 
| 26 | 
            -
                return llm
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        climateqa/engine/llm/__init__.py
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from climateqa.engine.llm.openai import get_llm as get_openai_llm
         | 
| 2 | 
            +
            from climateqa.engine.llm.azure import get_llm as get_azure_llm
         | 
| 3 | 
            +
             | 
| 4 | 
            +
             | 
| 5 | 
            +
            def get_llm(provider="openai",**kwargs):
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                if provider == "openai":
         | 
| 8 | 
            +
                    return get_openai_llm(**kwargs)
         | 
| 9 | 
            +
                elif provider == "azure":
         | 
| 10 | 
            +
                    return get_azure_llm(**kwargs)
         | 
| 11 | 
            +
                else:
         | 
| 12 | 
            +
                    raise ValueError(f"Unknown provider: {provider}")
         | 
| 13 | 
            +
                
         | 
| 14 | 
            +
                
         | 
| 15 | 
            +
             | 
    	
        climateqa/engine/llm/azure.py
    ADDED
    
    | @@ -0,0 +1,99 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import time
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            from langchain_openai import AzureChatOpenAI
         | 
| 5 | 
            +
            from msal import ConfidentialClientApplication
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            DEFAULT_TOKEN_UPDATE_FREQUENCY = 3300  # Default token duration is 1 hour (3600 s.)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # LOAD ENVIRONMENT VARIABLES
         | 
| 10 | 
            +
            try:
         | 
| 11 | 
            +
                from dotenv import load_dotenv
         | 
| 12 | 
            +
                load_dotenv()
         | 
| 13 | 
            +
            except Exception:
         | 
| 14 | 
            +
                pass
         | 
| 15 | 
            +
             | 
| 16 | 
            +
             | 
| 17 | 
            +
            client_id = os.environ.get("AZURE_CLIENT_ID", None)
         | 
| 18 | 
            +
            client_credential = os.environ.get("AZURE_CLIENT_CREDENTIAL", None)
         | 
| 19 | 
            +
            tenant_name = os.environ.get("AZURE_TENANT_NAME", None)
         | 
| 20 | 
            +
            scopes = [os.environ.get("AZURE_SCOPE", None)]
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            azure_ad_token_frequency = int(
         | 
| 23 | 
            +
                os.environ.get("TOKEN_UPDATE_FREQUENCY", DEFAULT_TOKEN_UPDATE_FREQUENCY)
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
            azure_ad_token = None
         | 
| 26 | 
            +
            azure_ad_token_timestamp = 0.0
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            def _get_azure_ad_token():
         | 
| 30 | 
            +
                global azure_ad_token
         | 
| 31 | 
            +
                global azure_ad_token_timestamp
         | 
| 32 | 
            +
                now = time.time()
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                # Return current token if not outdated:
         | 
| 35 | 
            +
                if (azure_ad_token is not None) and (
         | 
| 36 | 
            +
                    azure_ad_token_timestamp + azure_ad_token_frequency > now
         | 
| 37 | 
            +
                ):
         | 
| 38 | 
            +
                    print("Using current token (not expired)...")
         | 
| 39 | 
            +
                    return azure_ad_token
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                # Else, generate a new token:
         | 
| 42 | 
            +
                print("Generating new token...")
         | 
| 43 | 
            +
                app = ConfidentialClientApplication(
         | 
| 44 | 
            +
                    client_id=client_id,
         | 
| 45 | 
            +
                    client_credential=client_credential,
         | 
| 46 | 
            +
                    authority=f"https://login.microsoftonline.com/{tenant_name}",
         | 
| 47 | 
            +
                )
         | 
| 48 | 
            +
                result = app.acquire_token_for_client(scopes=scopes)
         | 
| 49 | 
            +
                if "access_token" not in result:
         | 
| 50 | 
            +
                    raise ValueError("No access token in result")
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                if result["access_token"] != azure_ad_token:
         | 
| 53 | 
            +
                    print("New token received.")
         | 
| 54 | 
            +
                    azure_ad_token = result["access_token"]
         | 
| 55 | 
            +
                    azure_ad_token_timestamp = now
         | 
| 56 | 
            +
                else:
         | 
| 57 | 
            +
                    print("Same token received.")
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                return azure_ad_token
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            def get_llm(
         | 
| 63 | 
            +
                max_tokens: int = 1024,
         | 
| 64 | 
            +
                temperature: float = 0.0,
         | 
| 65 | 
            +
                verbose: bool = True,
         | 
| 66 | 
            +
                streaming: bool = False,
         | 
| 67 | 
            +
                **kwargs,
         | 
| 68 | 
            +
            ) -> AzureChatOpenAI:
         | 
| 69 | 
            +
                auth_dict = dict(openai_api_type="azure")
         | 
| 70 | 
            +
                # Note: OPENAI_API_VERSION is automatically taken from environment variables.
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                # First option: provide AZURE_OPENAI_API_BASE_URL, OPENAI_API_VERSION, AZURE_CLIENT_ID,
         | 
| 73 | 
            +
                # AZURE_CLIENT_CREDENTIAL, AZURE_TENANT_NAME & AZURE_SCOPE:
         | 
| 74 | 
            +
                if (
         | 
| 75 | 
            +
                    (client_id is not None)
         | 
| 76 | 
            +
                    and (client_credential is not None)
         | 
| 77 | 
            +
                    and (tenant_name is not None)
         | 
| 78 | 
            +
                ):
         | 
| 79 | 
            +
                    print("Using Azure AD token")
         | 
| 80 | 
            +
                    auth_dict["openai_api_base"] = os.environ["AZURE_OPENAI_API_BASE_URL"]
         | 
| 81 | 
            +
                    auth_dict["azure_ad_token_provider"] = _get_azure_ad_token
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                # Second option: provide AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_DEPLOYMENT_NAME,
         | 
| 84 | 
            +
                # OPENAI_API_VERSION & AZURE_OPENAI_API_KEY:
         | 
| 85 | 
            +
                else:
         | 
| 86 | 
            +
                    print("Using AZURE_OPENAI_API_DEPLOYMENT_NAME and AZURE_OPENAI_API_KEY")
         | 
| 87 | 
            +
                    auth_dict["deployment_name"] = os.environ["AZURE_OPENAI_API_DEPLOYMENT_NAME"]
         | 
| 88 | 
            +
                    # Note: AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY are automatically taken
         | 
| 89 | 
            +
                    # from environment variable.
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                llm = AzureChatOpenAI(
         | 
| 92 | 
            +
                    **auth_dict,
         | 
| 93 | 
            +
                    max_tokens=max_tokens,
         | 
| 94 | 
            +
                    temperature=temperature,
         | 
| 95 | 
            +
                    verbose=verbose,
         | 
| 96 | 
            +
                    streaming=streaming,
         | 
| 97 | 
            +
                    **kwargs,
         | 
| 98 | 
            +
                )
         | 
| 99 | 
            +
                return llm
         | 
    	
        climateqa/engine/llm/mistral.py
    ADDED
    
    | 
            File without changes
         | 
    	
        climateqa/engine/llm/openai.py
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from langchain_openai import ChatOpenAI
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            try:
         | 
| 5 | 
            +
                from dotenv import load_dotenv
         | 
| 6 | 
            +
                load_dotenv()
         | 
| 7 | 
            +
            except Exception:
         | 
| 8 | 
            +
                pass
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            def get_llm(model="gpt-3.5-turbo-0125",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
         | 
| 11 | 
            +
                
         | 
| 12 | 
            +
                llm = ChatOpenAI(
         | 
| 13 | 
            +
                    model=model,
         | 
| 14 | 
            +
                    api_key=os.environ.get("THEO_API_KEY", None),
         | 
| 15 | 
            +
                    max_tokens = max_tokens,
         | 
| 16 | 
            +
                    streaming = streaming,
         | 
| 17 | 
            +
                    temperature=temperature,
         | 
| 18 | 
            +
                    timeout = timeout,
         | 
| 19 | 
            +
                    **kwargs,
         | 
| 20 | 
            +
                )
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                return llm
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -4,6 +4,8 @@ azure-storage-file-share==12.11.1 | |
| 4 | 
             
            azure-storage-blob
         | 
| 5 | 
             
            python-dotenv==1.0.0
         | 
| 6 | 
             
            langchain==0.1.4
         | 
|  | |
| 7 | 
             
            pinecone-client==3.0.2
         | 
| 8 | 
             
            sentence-transformers
         | 
| 9 | 
            -
            huggingface-hub
         | 
|  | 
|  | |
| 4 | 
             
            azure-storage-blob
         | 
| 5 | 
             
            python-dotenv==1.0.0
         | 
| 6 | 
             
            langchain==0.1.4
         | 
| 7 | 
            +
            langchain_openai==0.0.6
         | 
| 8 | 
             
            pinecone-client==3.0.2
         | 
| 9 | 
             
            sentence-transformers
         | 
| 10 | 
            +
            huggingface-hub
         | 
| 11 | 
            +
            msal
         | 

