prompt are now open source
Browse files+ got rid of tqdm bars for every retriever call
    	
        app.py
    CHANGED
    
    | 
         @@ -13,17 +13,44 @@ import numpy as np 
     | 
|
| 13 | 
         
             
            from datetime import datetime
         
     | 
| 14 | 
         
             
            from azure.storage.fileshare import ShareServiceClient
         
     | 
| 15 | 
         | 
| 16 | 
         
            -
            # from dotenv import load_dotenv
         
     | 
| 17 | 
         
            -
            # load_dotenv()
         
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
            print(os.environ["content"], os.environ["sources"], sep="\n-\n" * 2)
         
     | 
| 20 | 
         | 
| 21 | 
         
             
            theme = gr.themes.Soft(
         
     | 
| 22 | 
         
             
                primary_hue="sky",
         
     | 
| 23 | 
         
             
                font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
         
     | 
| 24 | 
         
             
            )
         
     | 
| 25 | 
         | 
| 26 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 27 | 
         | 
| 28 | 
         
             
            openai.api_type = "azure"
         
     | 
| 29 | 
         
             
            openai.api_key = os.environ["api_key"]
         
     | 
| 
         @@ -37,6 +64,7 @@ retrieve_all = EmbeddingRetriever( 
     | 
|
| 37 | 
         
             
                ),
         
     | 
| 38 | 
         
             
                embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
         
     | 
| 39 | 
         
             
                model_format="sentence_transformers",
         
     | 
| 
         | 
|
| 40 | 
         
             
            )
         
     | 
| 41 | 
         | 
| 42 | 
         
             
            retrieve_giec = EmbeddingRetriever(
         
     | 
| 
         @@ -88,7 +116,7 @@ def chat( 
     | 
|
| 88 | 
         | 
| 89 | 
         
             
                reformulated_query = openai.Completion.create(
         
     | 
| 90 | 
         
             
                    engine="climateGPT",
         
     | 
| 91 | 
         
            -
                    prompt= 
     | 
| 92 | 
         
             
                    temperature=0,
         
     | 
| 93 | 
         
             
                    max_tokens=128,
         
     | 
| 94 | 
         
             
                    stop=["\n---\n", "<|im_end|>"],
         
     | 
| 
         @@ -105,7 +133,7 @@ def chat( 
     | 
|
| 105 | 
         
             
                            for i, d in enumerate(docs, 1)
         
     | 
| 106 | 
         
             
                        ]
         
     | 
| 107 | 
         
             
                    )
         
     | 
| 108 | 
         
            -
                    messages.append({"role": "system", "content": f"{ 
     | 
| 109 | 
         | 
| 110 | 
         
             
                    response = openai.Completion.create(
         
     | 
| 111 | 
         
             
                        engine="climateGPT",
         
     | 
| 
         @@ -255,11 +283,7 @@ Version 0.2-beta - This tool is under active development 
     | 
|
| 255 | 
         
             
                    with gr.Column(scale=1, variant="panel"):
         
     | 
| 256 | 
         
             
                        gr.Markdown("### Sources")
         
     | 
| 257 | 
         
             
                        sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
         
     | 
| 258 | 
         
            -
             
     | 
| 259 | 
         
            -
                #     ["IPCC only", "All available"],
         
     | 
| 260 | 
         
            -
                #     default="All available",
         
     | 
| 261 | 
         
            -
                #     label="Select reports",
         
     | 
| 262 | 
         
            -
                # ),
         
     | 
| 263 | 
         
             
                ask.submit(
         
     | 
| 264 | 
         
             
                    fn=chat,
         
     | 
| 265 | 
         
             
                    inputs=[
         
     | 
| 
         | 
|
| 13 | 
         
             
            from datetime import datetime
         
     | 
| 14 | 
         
             
            from azure.storage.fileshare import ShareServiceClient
         
     | 
| 15 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 16 | 
         | 
| 17 | 
         
             
            theme = gr.themes.Soft(
         
     | 
| 18 | 
         
             
                primary_hue="sky",
         
     | 
| 19 | 
         
             
                font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
         
     | 
| 20 | 
         
             
            )
         
     | 
| 21 | 
         | 
| 22 | 
         
            +
            init_prompt = (
         
     | 
| 23 | 
         
            +
                "You are ClimateGPT, an AI Assistant by Ekimetrics. "
         
     | 
| 24 | 
         
            +
                "You are given extracted parts of IPCC reports and a question."
         
     | 
| 25 | 
         
            +
                " Provide a clear and structured answer based on the context provided. "
         
     | 
| 26 | 
         
            +
                "When relevant, use bullet points and lists to structure your answers."
         
     | 
| 27 | 
         
            +
            )
         
     | 
| 28 | 
         
            +
            sources_prompt = (
         
     | 
| 29 | 
         
            +
                "When relevant, use facts and numbers from the following documents in your answer. "
         
     | 
| 30 | 
         
            +
                "Whenever you use information from a document, reference it at the end of the sentence (ex: [doc 2]). "
         
     | 
| 31 | 
         
            +
                "You don't have to use all documents, only if it makes sense in the conversation. "
         
     | 
| 32 | 
         
            +
                "If no relevant information to answer the question is present in the documents, "
         
     | 
| 33 | 
         
            +
                "just say you don't have enough information to answer."
         
     | 
| 34 | 
         
            +
            )
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            def get_reformulation_prompt(query: str) -> str:
         
     | 
| 38 | 
         
            +
                return f"""Reformulate the following user message to be a short standalone question in English, in the context of an educationnal discussion about climate change.
         
     | 
| 39 | 
         
            +
            ---
         
     | 
| 40 | 
         
            +
            query: La technologie nous sauvera-t-elle ?
         
     | 
| 41 | 
         
            +
            standalone question: Can technology help humanity mitigate the effects of climate change?
         
     | 
| 42 | 
         
            +
            ---
         
     | 
| 43 | 
         
            +
            query: what are our reserves in fossil fuel?
         
     | 
| 44 | 
         
            +
            standalone question: What are the current reserves of fossil fuels and how long will they last?
         
     | 
| 45 | 
         
            +
            ---
         
     | 
| 46 | 
         
            +
            query: {query}
         
     | 
| 47 | 
         
            +
            standalone question:"""
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            system_template = {
         
     | 
| 51 | 
         
            +
                "role": "system",
         
     | 
| 52 | 
         
            +
                "content": init_prompt,
         
     | 
| 53 | 
         
            +
            }
         
     | 
| 54 | 
         | 
| 55 | 
         
             
            openai.api_type = "azure"
         
     | 
| 56 | 
         
             
            openai.api_key = os.environ["api_key"]
         
     | 
| 
         | 
|
| 64 | 
         
             
                ),
         
     | 
| 65 | 
         
             
                embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
         
     | 
| 66 | 
         
             
                model_format="sentence_transformers",
         
     | 
| 67 | 
         
            +
                progress_bar=False,
         
     | 
| 68 | 
         
             
            )
         
     | 
| 69 | 
         | 
| 70 | 
         
             
            retrieve_giec = EmbeddingRetriever(
         
     | 
| 
         | 
|
| 116 | 
         | 
| 117 | 
         
             
                reformulated_query = openai.Completion.create(
         
     | 
| 118 | 
         
             
                    engine="climateGPT",
         
     | 
| 119 | 
         
            +
                    prompt=get_reformulation_prompt(query),
         
     | 
| 120 | 
         
             
                    temperature=0,
         
     | 
| 121 | 
         
             
                    max_tokens=128,
         
     | 
| 122 | 
         
             
                    stop=["\n---\n", "<|im_end|>"],
         
     | 
| 
         | 
|
| 133 | 
         
             
                            for i, d in enumerate(docs, 1)
         
     | 
| 134 | 
         
             
                        ]
         
     | 
| 135 | 
         
             
                    )
         
     | 
| 136 | 
         
            +
                    messages.append({"role": "system", "content": f"{sources_prompt}\n\n{sources}"})
         
     | 
| 137 | 
         | 
| 138 | 
         
             
                    response = openai.Completion.create(
         
     | 
| 139 | 
         
             
                        engine="climateGPT",
         
     | 
| 
         | 
|
| 283 | 
         
             
                    with gr.Column(scale=1, variant="panel"):
         
     | 
| 284 | 
         
             
                        gr.Markdown("### Sources")
         
     | 
| 285 | 
         
             
                        sources_textbox = gr.Textbox(interactive=False, show_label=False, max_lines=50)
         
     | 
| 286 | 
         
            +
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 287 | 
         
             
                ask.submit(
         
     | 
| 288 | 
         
             
                    fn=chat,
         
     | 
| 289 | 
         
             
                    inputs=[
         
     |