general_chat

Running

App Files Files Community

pvanand commited on Oct 8, 2024

Commit

db1b7b5

verified ·

1 Parent(s): 3c03878

Update document_generator.py

Browse files

Files changed (1) hide show

document_generator.py +74 -68

document_generator.py CHANGED Viewed

@@ -40,29 +40,28 @@ FORMAT YOUR OUTPUT AS MARKDOWN ENCLOSED IN <response></response> tags
 DOCUMENT_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
 # File: app.py
-import os
 import json
 import re
-import time
 import asyncio
 from typing import List, Dict, Optional, Any, Callable
 from openai import OpenAI
 import logging
 import functools
-from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 from fastapi_cache.decorator import cache
-from starlette.responses import StreamingResponse
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 def log_execution(func: Callable) -> Callable:
     @functools.wraps(func)
-    async def wrapper(*args: Any, **kwargs: Any) -> Any:
         logger.info(f"Executing {func.__name__}")
         try:
-            result = await func(*args, **kwargs)
             logger.info(f"{func.__name__} completed successfully")
             return result
         except Exception as e:
@@ -78,7 +77,7 @@ class AIClient:
         )
     @log_execution
-    async def generate_response(
         self,
         messages: List[Dict[str, str]],
         model: str = "openai/gpt-4o-mini",
@@ -86,14 +85,12 @@ class AIClient:
     ) -> Optional[str]:
         if not messages:
             return None
-        loop = asyncio.get_event_loop()
-        response = await loop.run_in_executor(None, functools.partial(
-            self.client.chat.completions.create,
             model=model,
             messages=messages,
             max_tokens=max_tokens,
             stream=False
-        ))
         return response.choices[0].message.content
 class DocumentGenerator:
@@ -123,14 +120,14 @@ class DocumentGenerator:
         return content.lstrip()
     @log_execution
-    async def generate_document_outline(self, query: str, max_retries: int = 3) -> Optional[Dict]:
         messages = [
             {"role": "system", "content": DOCUMENT_OUTLINE_PROMPT_SYSTEM},
             {"role": "user", "content": DOCUMENT_OUTLINE_PROMPT_USER.format(query=query)}
         ]
         for attempt in range(max_retries):
-            outline_response = await self.ai_client.generate_response(messages, model="openai/gpt-4o")
             outline_json_text = self.extract_between_tags(outline_response, "output")
             try:
@@ -145,7 +142,7 @@ class DocumentGenerator:
                     return None
     @log_execution
-    async def generate_content(self, title: str, content_instruction: str, section_number: str) -> str:
         self.content_messages.append({
             "role": "user",
             "content": DOCUMENT_SECTION_PROMPT_USER.format(
@@ -153,7 +150,7 @@ class DocumentGenerator:
                 content_instruction=content_instruction
             )
         })
-        section_response = await self.ai_client.generate_response(self.content_messages)
         content = self.extract_between_tags(section_response, "response")
         content = self.remove_duplicate_title(content, title, section_number)
         self.content_messages.append({
@@ -162,44 +159,6 @@ class DocumentGenerator:
         })
         return content
-    @log_execution
-    async def generate_full_document(self, document_outline: Dict, query: str):
-        self.document_outline = document_outline
-        overall_objective = query
-        document_layout = json.dumps(self.document_outline, indent=2)
-        self.content_messages = [
-            {
-                "role": "system",
-                "content": DOCUMENT_SECTION_PROMPT_SYSTEM.format(
-                    overall_objective=overall_objective,
-                    document_layout=document_layout
-                )
-            }
-        ]
-        for section in self.document_outline["Document"].get("Sections", []):
-            section_title = section.get("Title", "")
-            section_number = section.get("SectionNumber", "")
-            content_instruction = section.get("Content", "")
-            logger.info(f"Generating content for section: {section_title}")
-            section["Content"] = await self.generate_content(section_title, content_instruction, section_number)
-            yield json.dumps({"type": "document_section", "content": section}) + "\n"
-            for subsection in section.get("Subsections", []):
-                subsection_title = subsection.get("Title", "")
-                subsection_number = subsection.get("SectionNumber", "")
-                subsection_content_instruction = subsection.get("Content", "")
-                logger.info(f"Generating content for subsection: {subsection_title}")
-                subsection["Content"] = await self.generate_content(subsection_title, subsection_content_instruction, subsection_number)
-                yield json.dumps({"type": "document_subsection", "content": subsection}) + "\n"
-        # Generate the complete markdown document
-        full_document = self.document_outline
-        markdown_document = MarkdownConverter.convert_to_markdown(full_document["Document"])
-        yield json.dumps({"type": "complete_document", "content": markdown_document}) + "\n"
 class MarkdownConverter:
     @staticmethod
     def slugify(text: str) -> str:
@@ -225,17 +184,14 @@ class MarkdownConverter:
     @classmethod
     def convert_to_markdown(cls, document: Dict) -> str:
-        # First page with centered content
         markdown = "<div style='text-align: center; padding-top: 33vh;'>\n\n"
         markdown += f"<h1 style='color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; display: inline-block;'>{document['Title']}</h1>\n\n"
         markdown += f"<p style='color: #7f8c8d;'><em>By {document['Author']}</em></p>\n\n"
         markdown += f"<p style='color: #95a5a6;'>Version {document['Version']} | {document['Date']}</p>\n\n"
         markdown += "</div>\n\n"
-        # Table of Contents on the second page
         markdown += cls.generate_toc(document['Sections'])
-        # Main content
         markdown += "<div style='max-width: 800px; margin: 0 auto; font-family: \"Segoe UI\", Arial, sans-serif; line-height: 1.6;'>\n\n"
         for section in document['Sections']:
@@ -266,8 +222,56 @@ class MarkdownDocumentRequest(BaseModel):
     json_document: Dict
     query: str
-class MarkdownDocumentResponse(BaseModel):
-    markdown_document: str
 @cache(expire=600*24*7)
 @router.post("/generate-document/json", response_model=JsonDocumentResponse)
@@ -276,8 +280,7 @@ async def generate_document_outline_endpoint(request: DocumentRequest):
     document_generator = DocumentGenerator(ai_client)
     try:
-        # Generate the document outline
-        json_document = await document_generator.generate_document_outline(request.query)
         if json_document is None:
             raise HTTPException(status_code=500, detail="Failed to generate a valid document outline")
@@ -286,20 +289,23 @@ async def generate_document_outline_endpoint(request: DocumentRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@router.post("/generate-document/markdown")
-async def generate_markdown_document_endpoint(request: MarkdownDocumentRequest):
     ai_client = AIClient()
     document_generator = DocumentGenerator(ai_client)
-    async def event_stream():
         try:
-            # Generate the full document content and stream it
-            async for section in document_generator.generate_full_document(request.json_document, request.query):
-                yield section
         except Exception as e:
-            yield json.dumps({"type": "error", "message": str(e)}) + "\n"
-    return StreamingResponse(event_stream(), media_type="application/json")
 @router.post("/generate-document-test", response_model=MarkdownDocumentResponse)
 async def test_generate_document_endpoint(request: DocumentRequest):

 DOCUMENT_SECTION_PROMPT_USER = """<prompt>Output the content for the section "{section_or_subsection_title}" formatted as markdown. Follow this instruction: {content_instruction}</prompt>"""
 # File: app.py
+import os
 import json
 import re
 import asyncio
 from typing import List, Dict, Optional, Any, Callable
 from openai import OpenAI
 import logging
 import functools
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from fastapi_cache.decorator import cache
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 def log_execution(func: Callable) -> Callable:
     @functools.wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
         logger.info(f"Executing {func.__name__}")
         try:
+            result = func(*args, **kwargs)
             logger.info(f"{func.__name__} completed successfully")
             return result
         except Exception as e:
         )
     @log_execution
+    def generate_response(
         self,
         messages: List[Dict[str, str]],
         model: str = "openai/gpt-4o-mini",
     ) -> Optional[str]:
         if not messages:
             return None
+        response = self.client.chat.completions.create(
             model=model,
             messages=messages,
             max_tokens=max_tokens,
             stream=False
+        )
         return response.choices[0].message.content
 class DocumentGenerator:
         return content.lstrip()
     @log_execution
+    def generate_document_outline(self, query: str, max_retries: int = 3) -> Optional[Dict]:
         messages = [
             {"role": "system", "content": DOCUMENT_OUTLINE_PROMPT_SYSTEM},
             {"role": "user", "content": DOCUMENT_OUTLINE_PROMPT_USER.format(query=query)}
         ]
         for attempt in range(max_retries):
+            outline_response = self.ai_client.generate_response(messages, model="openai/gpt-4o")
             outline_json_text = self.extract_between_tags(outline_response, "output")
             try:
                     return None
     @log_execution
+    def generate_content(self, title: str, content_instruction: str, section_number: str) -> str:
         self.content_messages.append({
             "role": "user",
             "content": DOCUMENT_SECTION_PROMPT_USER.format(
                 content_instruction=content_instruction
             )
         })
+        section_response = self.ai_client.generate_response(self.content_messages)
         content = self.extract_between_tags(section_response, "response")
         content = self.remove_duplicate_title(content, title, section_number)
         self.content_messages.append({
         })
         return content
 class MarkdownConverter:
     @staticmethod
     def slugify(text: str) -> str:
     @classmethod
     def convert_to_markdown(cls, document: Dict) -> str:
         markdown = "<div style='text-align: center; padding-top: 33vh;'>\n\n"
         markdown += f"<h1 style='color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; display: inline-block;'>{document['Title']}</h1>\n\n"
         markdown += f"<p style='color: #7f8c8d;'><em>By {document['Author']}</em></p>\n\n"
         markdown += f"<p style='color: #95a5a6;'>Version {document['Version']} | {document['Date']}</p>\n\n"
         markdown += "</div>\n\n"
         markdown += cls.generate_toc(document['Sections'])
         markdown += "<div style='max-width: 800px; margin: 0 auto; font-family: \"Segoe UI\", Arial, sans-serif; line-height: 1.6;'>\n\n"
         for section in document['Sections']:
     json_document: Dict
     query: str
+async def generate_document_stream(document_generator: DocumentGenerator, document_outline: Dict, query: str):
+    document_generator.document_outline = document_outline
+    overall_objective = query
+    document_layout = json.dumps(document_generator.document_outline, indent=2)
+    document_generator.content_messages = [
+        {
+            "role": "system",
+            "content": DOCUMENT_SECTION_PROMPT_SYSTEM.format(
+                overall_objective=overall_objective,
+                document_layout=document_layout
+            )
+        }
+    ]
+    for section in document_generator.document_outline["Document"].get("Sections", []):
+        section_title = section.get("Title", "")
+        section_number = section.get("SectionNumber", "")
+        content_instruction = section.get("Content", "")
+        logging.info(f"Generating content for section: {section_title}")
+        content = document_generator.generate_content(section_title, content_instruction, section_number)
+        section["Content"] = content
+        yield json.dumps({
+            "type": "document_section",
+            "content": {
+                "section_number": section_number,
+                "section_title": section_title,
+                "content": content
+            }
+        }) + "\n"
+        for subsection in section.get("Subsections", []):
+            subsection_title = subsection.get("Title", "")
+            subsection_number = subsection.get("SectionNumber", "")
+            subsection_content_instruction = subsection.get("Content", "")
+            logging.info(f"Generating content for subsection: {subsection_title}")
+            content = document_generator.generate_content(subsection_title, subsection_content_instruction, subsection_number)
+            subsection["Content"] = content
+            yield json.dumps({
+                "type": "document_section",
+                "content": {
+                    "section_number": subsection_number,
+                    "section_title": subsection_title,
+                    "content": content
+                }
+            }) + "\n"
+    markdown_document = MarkdownConverter.convert_to_markdown(document_generator.document_outline["Document"])
+    yield json.dumps({"type": "complete_document", "content": markdown_document}) + "\n"
 @cache(expire=600*24*7)
 @router.post("/generate-document/json", response_model=JsonDocumentResponse)
     document_generator = DocumentGenerator(ai_client)
     try:
+        json_document = document_generator.generate_document_outline(request.query)
         if json_document is None:
             raise HTTPException(status_code=500, detail="Failed to generate a valid document outline")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@router.post("/generate-document/markdown-stream")
+async def generate_markdown_document_stream_endpoint(request: MarkdownDocumentRequest):
     ai_client = AIClient()
     document_generator = DocumentGenerator(ai_client)
+    async def stream_generator():
         try:
+            async for chunk in generate_document_stream(document_generator, request.json_document, request.query):
+                yield chunk
         except Exception as e:
+            yield json.dumps({"type": "error", "content": str(e)}) + "\n"
+    return StreamingResponse(stream_generator(), media_type="application/x-ndjson")
+###########################################
+class MarkdownDocumentResponse(BaseModel):
+    markdown_document: str
 @router.post("/generate-document-test", response_model=MarkdownDocumentResponse)
 async def test_generate_document_endpoint(request: DocumentRequest):