Update logging.py
Browse files- climateqa/logging.py +4 -4
    	
        climateqa/logging.py
    CHANGED
    
    | @@ -8,9 +8,9 @@ import pandas as pd | |
| 8 | 
             
            import io
         | 
| 9 | 
             
            from typing import TypedDict, List
         | 
| 10 | 
             
            from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
         | 
|  | |
| 11 |  | 
| 12 | 
            -
             | 
| 13 | 
            -
            def serialize_docs(docs:list)->list:
         | 
| 14 | 
             
                """Convert document objects to a simplified format compatible with Hugging Face datasets.
         | 
| 15 |  | 
| 16 | 
             
                This function processes document objects by extracting their page content and metadata,
         | 
| @@ -27,13 +27,13 @@ def serialize_docs(docs:list)->list: | |
| 27 | 
             
                for doc in docs:
         | 
| 28 | 
             
                    # Make sure we have a clean doc format
         | 
| 29 | 
             
                    new_doc = {
         | 
| 30 | 
            -
                        "page_content": doc. | 
| 31 | 
             
                        "metadata": {}
         | 
| 32 | 
             
                    }
         | 
| 33 |  | 
| 34 | 
             
                    # Ensure all metadata fields exist with defaults if missing
         | 
| 35 | 
             
                    for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
         | 
| 36 | 
            -
                        new_value =   | 
| 37 | 
             
                        try:
         | 
| 38 | 
             
                            new_doc["metadata"][field] = type(default_value)(new_value)
         | 
| 39 | 
             
                        except:
         | 
|  | |
| 8 | 
             
            import io
         | 
| 9 | 
             
            from typing import TypedDict, List
         | 
| 10 | 
             
            from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
         | 
| 11 | 
            +
            from langchain_core.documents import Document
         | 
| 12 |  | 
| 13 | 
            +
            def serialize_docs(docs:list[Document])->list:
         | 
|  | |
| 14 | 
             
                """Convert document objects to a simplified format compatible with Hugging Face datasets.
         | 
| 15 |  | 
| 16 | 
             
                This function processes document objects by extracting their page content and metadata,
         | 
|  | |
| 27 | 
             
                for doc in docs:
         | 
| 28 | 
             
                    # Make sure we have a clean doc format
         | 
| 29 | 
             
                    new_doc = {
         | 
| 30 | 
            +
                        "page_content": doc.page_content,
         | 
| 31 | 
             
                        "metadata": {}
         | 
| 32 | 
             
                    }
         | 
| 33 |  | 
| 34 | 
             
                    # Ensure all metadata fields exist with defaults if missing
         | 
| 35 | 
             
                    for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
         | 
| 36 | 
            +
                        new_value =  doc.metadata.get(field, default_value)
         | 
| 37 | 
             
                        try:
         | 
| 38 | 
             
                            new_doc["metadata"][field] = type(default_value)(new_value)
         | 
| 39 | 
             
                        except:
         | 

