Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Add initial version of PDF-based PPT generation
Browse files
    	
        .streamlit/config.toml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 | 
             
            [server]
         | 
| 2 | 
             
            runOnSave = true
         | 
| 3 | 
             
            headless = false
         | 
| 4 | 
            -
            maxUploadSize =  | 
| 5 |  | 
| 6 | 
             
            [browser]
         | 
| 7 | 
             
            gatherUsageStats = false
         | 
|  | |
| 1 | 
             
            [server]
         | 
| 2 | 
             
            runOnSave = true
         | 
| 3 | 
             
            headless = false
         | 
| 4 | 
            +
            maxUploadSize = 2
         | 
| 5 |  | 
| 6 | 
             
            [browser]
         | 
| 7 | 
             
            gatherUsageStats = false
         | 
    	
        app.py
    CHANGED
    
    | @@ -19,6 +19,7 @@ from dotenv import load_dotenv | |
| 19 | 
             
            from langchain_community.chat_message_histories import StreamlitChatMessageHistory
         | 
| 20 | 
             
            from langchain_core.messages import HumanMessage
         | 
| 21 | 
             
            from langchain_core.prompts import ChatPromptTemplate
         | 
|  | |
| 22 |  | 
| 23 | 
             
            import global_config as gcfg
         | 
| 24 | 
             
            from global_config import GlobalConfig
         | 
| @@ -266,8 +267,17 @@ def set_up_chat_ui(): | |
| 266 |  | 
| 267 | 
             
                if prompt := st.chat_input(
         | 
| 268 | 
             
                    placeholder=APP_TEXT['chat_placeholder'],
         | 
| 269 | 
            -
                    max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH
         | 
|  | |
|  | |
| 270 | 
             
                ):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 271 | 
             
                    provider, llm_name = llm_helper.get_provider_model(
         | 
| 272 | 
             
                        llm_provider_to_use,
         | 
| 273 | 
             
                        use_ollama=RUN_IN_OFFLINE_MODE
         | 
| @@ -279,20 +289,20 @@ def set_up_chat_ui(): | |
| 279 | 
             
                    api_ver = api_version.strip()
         | 
| 280 |  | 
| 281 | 
             
                    if not are_all_inputs_valid(
         | 
| 282 | 
            -
                             | 
| 283 | 
             
                            az_deployment, az_endpoint, api_ver
         | 
| 284 | 
             
                    ):
         | 
| 285 | 
             
                        return
         | 
| 286 |  | 
| 287 | 
             
                    logger.info(
         | 
| 288 | 
             
                        'User input: %s | #characters: %d | LLM: %s',
         | 
| 289 | 
            -
                         | 
| 290 | 
             
                    )
         | 
| 291 | 
            -
                    st.chat_message('user').write( | 
| 292 |  | 
| 293 | 
             
                    if _is_it_refinement():
         | 
| 294 | 
             
                        user_messages = _get_user_messages()
         | 
| 295 | 
            -
                        user_messages.append( | 
| 296 | 
             
                        list_of_msgs = [
         | 
| 297 | 
             
                            f'{idx + 1}. {msg}' for idx, msg in enumerate(user_messages)
         | 
| 298 | 
             
                        ]
         | 
| @@ -300,10 +310,16 @@ def set_up_chat_ui(): | |
| 300 | 
             
                            **{
         | 
| 301 | 
             
                                'instructions': '\n'.join(list_of_msgs),
         | 
| 302 | 
             
                                'previous_content': _get_last_response(),
         | 
|  | |
| 303 | 
             
                            }
         | 
| 304 | 
             
                        )
         | 
| 305 | 
             
                    else:
         | 
| 306 | 
            -
                        formatted_template = prompt_template.format( | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 307 |  | 
| 308 | 
             
                    progress_bar = st.progress(0, 'Preparing to call LLM...')
         | 
| 309 | 
             
                    response = ''
         | 
| @@ -392,7 +408,7 @@ def set_up_chat_ui(): | |
| 392 | 
             
                            )
         | 
| 393 | 
             
                        return
         | 
| 394 |  | 
| 395 | 
            -
                    history.add_user_message( | 
| 396 | 
             
                    history.add_ai_message(response)
         | 
| 397 |  | 
| 398 | 
             
                    # The content has been generated as JSON
         | 
| @@ -487,6 +503,30 @@ def generate_slide_deck(json_str: str) -> Union[pathlib.Path, None]: | |
| 487 | 
             
                return path
         | 
| 488 |  | 
| 489 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 490 | 
             
            def _is_it_refinement() -> bool:
         | 
| 491 | 
             
                """
         | 
| 492 | 
             
                Whether it is the initial prompt or a refinement.
         | 
|  | |
| 19 | 
             
            from langchain_community.chat_message_histories import StreamlitChatMessageHistory
         | 
| 20 | 
             
            from langchain_core.messages import HumanMessage
         | 
| 21 | 
             
            from langchain_core.prompts import ChatPromptTemplate
         | 
| 22 | 
            +
            from pypdf import PdfReader
         | 
| 23 |  | 
| 24 | 
             
            import global_config as gcfg
         | 
| 25 | 
             
            from global_config import GlobalConfig
         | 
|  | |
| 267 |  | 
| 268 | 
             
                if prompt := st.chat_input(
         | 
| 269 | 
             
                    placeholder=APP_TEXT['chat_placeholder'],
         | 
| 270 | 
            +
                    max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH,
         | 
| 271 | 
            +
                    accept_file=True,
         | 
| 272 | 
            +
                    file_type=['pdf', ],
         | 
| 273 | 
             
                ):
         | 
| 274 | 
            +
                    print(f'{prompt=}')
         | 
| 275 | 
            +
                    prompt_text = prompt.text or ''
         | 
| 276 | 
            +
                    if prompt['files']:
         | 
| 277 | 
            +
                        additional_text = get_pdf_contents(prompt['files'][0])
         | 
| 278 | 
            +
                    else:
         | 
| 279 | 
            +
                        additional_text = ''
         | 
| 280 | 
            +
             | 
| 281 | 
             
                    provider, llm_name = llm_helper.get_provider_model(
         | 
| 282 | 
             
                        llm_provider_to_use,
         | 
| 283 | 
             
                        use_ollama=RUN_IN_OFFLINE_MODE
         | 
|  | |
| 289 | 
             
                    api_ver = api_version.strip()
         | 
| 290 |  | 
| 291 | 
             
                    if not are_all_inputs_valid(
         | 
| 292 | 
            +
                            prompt_text, provider, llm_name, user_key,
         | 
| 293 | 
             
                            az_deployment, az_endpoint, api_ver
         | 
| 294 | 
             
                    ):
         | 
| 295 | 
             
                        return
         | 
| 296 |  | 
| 297 | 
             
                    logger.info(
         | 
| 298 | 
             
                        'User input: %s | #characters: %d | LLM: %s',
         | 
| 299 | 
            +
                        prompt_text, len(prompt_text), llm_name
         | 
| 300 | 
             
                    )
         | 
| 301 | 
            +
                    st.chat_message('user').write(prompt_text)
         | 
| 302 |  | 
| 303 | 
             
                    if _is_it_refinement():
         | 
| 304 | 
             
                        user_messages = _get_user_messages()
         | 
| 305 | 
            +
                        user_messages.append(prompt_text)
         | 
| 306 | 
             
                        list_of_msgs = [
         | 
| 307 | 
             
                            f'{idx + 1}. {msg}' for idx, msg in enumerate(user_messages)
         | 
| 308 | 
             
                        ]
         | 
|  | |
| 310 | 
             
                            **{
         | 
| 311 | 
             
                                'instructions': '\n'.join(list_of_msgs),
         | 
| 312 | 
             
                                'previous_content': _get_last_response(),
         | 
| 313 | 
            +
                                'additional_info': additional_text,
         | 
| 314 | 
             
                            }
         | 
| 315 | 
             
                        )
         | 
| 316 | 
             
                    else:
         | 
| 317 | 
            +
                        formatted_template = prompt_template.format(
         | 
| 318 | 
            +
                            **{
         | 
| 319 | 
            +
                                'question': prompt_text,
         | 
| 320 | 
            +
                                'additional_info': additional_text,
         | 
| 321 | 
            +
                            }
         | 
| 322 | 
            +
                        )
         | 
| 323 |  | 
| 324 | 
             
                    progress_bar = st.progress(0, 'Preparing to call LLM...')
         | 
| 325 | 
             
                    response = ''
         | 
|  | |
| 408 | 
             
                            )
         | 
| 409 | 
             
                        return
         | 
| 410 |  | 
| 411 | 
            +
                    history.add_user_message(prompt_text)
         | 
| 412 | 
             
                    history.add_ai_message(response)
         | 
| 413 |  | 
| 414 | 
             
                    # The content has been generated as JSON
         | 
|  | |
| 503 | 
             
                return path
         | 
| 504 |  | 
| 505 |  | 
| 506 | 
            +
            def get_pdf_contents(
         | 
| 507 | 
            +
                    pdf_file: st.runtime.uploaded_file_manager.UploadedFile,
         | 
| 508 | 
            +
                    max_pages: int = 20
         | 
| 509 | 
            +
            ) -> str:
         | 
| 510 | 
            +
                """
         | 
| 511 | 
            +
                Extract the text contents from a PDF file.
         | 
| 512 | 
            +
             | 
| 513 | 
            +
                :param pdf_file: The uploaded PDF file.
         | 
| 514 | 
            +
                :param max_pages: The max no. of pages to extract contents from.
         | 
| 515 | 
            +
                :return: The contents.
         | 
| 516 | 
            +
                """
         | 
| 517 | 
            +
             | 
| 518 | 
            +
                print(f'{type(pdf_file)=}')
         | 
| 519 | 
            +
                reader = PdfReader(pdf_file)
         | 
| 520 | 
            +
                n_pages = min(max_pages, len(reader.pages))
         | 
| 521 | 
            +
                text = ''
         | 
| 522 | 
            +
             | 
| 523 | 
            +
                for page in range(n_pages):
         | 
| 524 | 
            +
                    page = reader.pages[page]
         | 
| 525 | 
            +
                    text += page.extract_text()
         | 
| 526 | 
            +
             | 
| 527 | 
            +
                return text
         | 
| 528 | 
            +
             | 
| 529 | 
            +
             | 
| 530 | 
             
            def _is_it_refinement() -> bool:
         | 
| 531 | 
             
                """
         | 
| 532 | 
             
                Whether it is the initial prompt or a refinement.
         | 
    	
        langchain_templates/chat_prompts/initial_template_v4_two_cols_img.txt
    CHANGED
    
    | @@ -5,6 +5,10 @@ Include main headings for each slide, detailed bullet points for each slide. | |
| 5 | 
             
            Add relevant, detailed content to each slide. When relevant, add one or two EXAMPLES to illustrate the concept.
         | 
| 6 | 
             
            For two or three important slides, generate the key message that those slides convey.
         | 
| 7 |  | 
|  | |
|  | |
|  | |
|  | |
| 8 | 
             
            Identify if a slide describes a step-by-step/sequential process, then begin the bullet points with a special marker >>.
         | 
| 9 | 
             
            Limit this to max two or three slides.
         | 
| 10 |  | 
| @@ -16,7 +20,7 @@ In addition, create one slide containing 4 TO 6 icons (pictograms) illustrating | |
| 16 | 
             
            In this slide, each line of text will begin with the name of a relevant icon enclosed between [[ and ]], e.g., [[machine-learning]] and [[fairness]].
         | 
| 17 | 
             
            Insert icons only in this slide.
         | 
| 18 |  | 
| 19 | 
            -
            Your output, i.e., the content of each slide should be  | 
| 20 | 
             
            Each bullet point should be detailed and explanatory, not just short phrases.
         | 
| 21 |  | 
| 22 | 
             
            ALWAYS add a concluding slide at the end, containing a list of the key takeaways and an optional call-to-action if relevant to the context.
         | 
| @@ -102,5 +106,10 @@ The output must be only a valid and syntactically correct JSON adhering to the f | |
| 102 | 
             
            }}
         | 
| 103 |  | 
| 104 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 105 | 
             
            ### Output:
         | 
| 106 | 
             
            ```json
         | 
|  | |
| 5 | 
             
            Add relevant, detailed content to each slide. When relevant, add one or two EXAMPLES to illustrate the concept.
         | 
| 6 | 
             
            For two or three important slides, generate the key message that those slides convey.
         | 
| 7 |  | 
| 8 | 
            +
            The <ADDITIONAL_INFO> may provide additional information. If available, you should incorporate them while making the slides.
         | 
| 9 | 
            +
            Rather than simply listing them line by line, try to understand these concepts and data provided and present them appropriately in the slides.
         | 
| 10 | 
            +
            If <ADDITIONAL_INFO> is empty, ignore it.
         | 
| 11 | 
            +
             | 
| 12 | 
             
            Identify if a slide describes a step-by-step/sequential process, then begin the bullet points with a special marker >>.
         | 
| 13 | 
             
            Limit this to max two or three slides.
         | 
| 14 |  | 
|  | |
| 20 | 
             
            In this slide, each line of text will begin with the name of a relevant icon enclosed between [[ and ]], e.g., [[machine-learning]] and [[fairness]].
         | 
| 21 | 
             
            Insert icons only in this slide.
         | 
| 22 |  | 
| 23 | 
            +
            Your output, i.e., the content of each slide should be vert detailed and descriptive but not way too verbose (you're creating a presentation, not a report).
         | 
| 24 | 
             
            Each bullet point should be detailed and explanatory, not just short phrases.
         | 
| 25 |  | 
| 26 | 
             
            ALWAYS add a concluding slide at the end, containing a list of the key takeaways and an optional call-to-action if relevant to the context.
         | 
|  | |
| 106 | 
             
            }}
         | 
| 107 |  | 
| 108 |  | 
| 109 | 
            +
            <ADDITIONAL_INFO>
         | 
| 110 | 
            +
            {additional_info}
         | 
| 111 | 
            +
            </ADDITIONAL_INFO>
         | 
| 112 | 
            +
             | 
| 113 | 
            +
             | 
| 114 | 
             
            ### Output:
         | 
| 115 | 
             
            ```json
         | 
    	
        langchain_templates/chat_prompts/refinement_template_v4_two_cols_img.txt
    CHANGED
    
    | @@ -8,6 +8,10 @@ Include main headings for each slide, detailed bullet points for each slide. | |
| 8 | 
             
            Add relevant, detailed content to each slide. When relevant, add one or two EXAMPLES to illustrate the concept.
         | 
| 9 | 
             
            For two or three important slides, generate the key message that those slides convey.
         | 
| 10 |  | 
|  | |
|  | |
|  | |
|  | |
| 11 | 
             
            Identify if a slide describes a step-by-step/sequential process, then begin the bullet points with a special marker >>. Limit this to max two or three slides.
         | 
| 12 | 
             
            Also, add at least one slide with a double column layout by generating appropriate content based on the description in the JSON schema provided below.
         | 
| 13 | 
             
            In addition, for each slide, add image keywords based on the content of the respective slides.
         | 
| @@ -18,7 +22,7 @@ In this slide, each line of text will begin with the name of a relevant icon enc | |
| 18 | 
             
            Insert icons only in this slide.
         | 
| 19 | 
             
            Do not repeat any icons or the icons slide.
         | 
| 20 |  | 
| 21 | 
            -
            Your output, i.e., the content of each slide should be  | 
| 22 | 
             
            Each bullet point should be detailed and explanatory, not just short phrases.
         | 
| 23 |  | 
| 24 | 
             
            ALWAYS add a concluding slide at the end, containing a list of the key takeaways and an optional call-to-action if relevant to the context.
         | 
| @@ -108,5 +112,10 @@ The output must be only a valid and syntactically correct JSON adhering to the f | |
| 108 | 
             
            }}
         | 
| 109 |  | 
| 110 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 111 | 
             
            ### Output:
         | 
| 112 | 
             
            ```json
         | 
|  | |
| 8 | 
             
            Add relevant, detailed content to each slide. When relevant, add one or two EXAMPLES to illustrate the concept.
         | 
| 9 | 
             
            For two or three important slides, generate the key message that those slides convey.
         | 
| 10 |  | 
| 11 | 
            +
            The <ADDITIONAL_INFO> may provide additional information. If available, you should incorporate them while making the slides.
         | 
| 12 | 
            +
            Rather than simply listing them line by line, try to understand these concepts and data provided and present them appropriately in the slides.
         | 
| 13 | 
            +
            If <ADDITIONAL_INFO> is empty, ignore it.
         | 
| 14 | 
            +
             | 
| 15 | 
             
            Identify if a slide describes a step-by-step/sequential process, then begin the bullet points with a special marker >>. Limit this to max two or three slides.
         | 
| 16 | 
             
            Also, add at least one slide with a double column layout by generating appropriate content based on the description in the JSON schema provided below.
         | 
| 17 | 
             
            In addition, for each slide, add image keywords based on the content of the respective slides.
         | 
|  | |
| 22 | 
             
            Insert icons only in this slide.
         | 
| 23 | 
             
            Do not repeat any icons or the icons slide.
         | 
| 24 |  | 
| 25 | 
            +
            Your output, i.e., the content of each slide should be vert detailed and descriptive but not way too verbose (you're creating a presentation, not a report).
         | 
| 26 | 
             
            Each bullet point should be detailed and explanatory, not just short phrases.
         | 
| 27 |  | 
| 28 | 
             
            ALWAYS add a concluding slide at the end, containing a list of the key takeaways and an optional call-to-action if relevant to the context.
         | 
|  | |
| 112 | 
             
            }}
         | 
| 113 |  | 
| 114 |  | 
| 115 | 
            +
            <ADDITIONAL_INFO>
         | 
| 116 | 
            +
            {additional_info}
         | 
| 117 | 
            +
            </ADDITIONAL_INFO>
         | 
| 118 | 
            +
             | 
| 119 | 
            +
             | 
| 120 | 
             
            ### Output:
         | 
| 121 | 
             
            ```json
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -15,11 +15,12 @@ langchain-cohere==0.3.3 | |
| 15 | 
             
            langchain-together==0.3.0
         | 
| 16 | 
             
            langchain-ollama==0.2.1
         | 
| 17 | 
             
            langchain-openai==0.3.3
         | 
| 18 | 
            -
            streamlit~=1. | 
| 19 |  | 
| 20 | 
             
            python-pptx~=1.0.2
         | 
| 21 | 
             
            json5~=0.9.14
         | 
| 22 | 
             
            requests~=2.32.3
         | 
|  | |
| 23 |  | 
| 24 | 
             
            transformers>=4.48.0
         | 
| 25 | 
             
            torch==2.4.0
         | 
|  | |
| 15 | 
             
            langchain-together==0.3.0
         | 
| 16 | 
             
            langchain-ollama==0.2.1
         | 
| 17 | 
             
            langchain-openai==0.3.3
         | 
| 18 | 
            +
            streamlit~=1.44.0
         | 
| 19 |  | 
| 20 | 
             
            python-pptx~=1.0.2
         | 
| 21 | 
             
            json5~=0.9.14
         | 
| 22 | 
             
            requests~=2.32.3
         | 
| 23 | 
            +
            pypdf~=5.4.0
         | 
| 24 |  | 
| 25 | 
             
            transformers>=4.48.0
         | 
| 26 | 
             
            torch==2.4.0
         |