First vesrion
Browse files- app.py +157 -0
- config.py +7 -0
- core/__init__.py +0 -0
- core/agent.py +17 -0
- core/ai_enrichment.py +41 -0
- core/components.py +23 -0
- core/components.pyi +29 -0
- core/database.py +81 -0
- core/parser.py +30 -0
- core/processing.py +42 -0
- core/storage.py +58 -0
- core/summarizer.py +25 -0
- core/utils.py +23 -0
- data/article_url.txt +0 -0
- data/document1.pdf +0 -0
- data/sample_note.txt +0 -0
- mcp_tools.py +122 -0
- requirements.txt +12 -0
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,157 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import uuid
         | 
| 3 | 
            +
            import gradio as gr
         | 
| 4 | 
            +
            from gradio import components
         | 
| 5 | 
            +
            from fastmcp import FastMCP
         | 
| 6 | 
            +
            # from core.parser import parse_document, parse_url
         | 
| 7 | 
            +
            from core.parser import parse_document, parse_url
         | 
| 8 | 
            +
            from core.summarizer import summarize_content, tag_content
         | 
| 9 | 
            +
            from core.storage import add_document, search_documents
         | 
| 10 | 
            +
            from core.agent import answer_question
         | 
| 11 | 
            +
            # from core.components import DocumentViewer
         | 
| 12 | 
            +
            import plotly.graph_objects as go
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Initialize the FastMCP server (for agentic tools)
         | 
| 15 | 
            +
            mcp = FastMCP("IntelligentContentOrganizer")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Gradio UI functions
         | 
| 18 | 
            +
            def process_content(file_obj, url, tags_input):
         | 
| 19 | 
            +
                """
         | 
| 20 | 
            +
                Handle file upload or URL input: parse content, summarize, tag, store.
         | 
| 21 | 
            +
                """
         | 
| 22 | 
            +
                content_text = ""
         | 
| 23 | 
            +
                source = ""
         | 
| 24 | 
            +
                if file_obj is not None:
         | 
| 25 | 
            +
                    # Save uploaded file to temp path
         | 
| 26 | 
            +
                    file_path = file_obj.name
         | 
| 27 | 
            +
                    content_text = parse_document(file_path)
         | 
| 28 | 
            +
                    source = file_obj.name
         | 
| 29 | 
            +
                elif url:
         | 
| 30 | 
            +
                    content_text = parse_url(url)
         | 
| 31 | 
            +
                    source = url
         | 
| 32 | 
            +
                else:
         | 
| 33 | 
            +
                    return "No document provided.", "", "", ""
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                # Summarize and tag (simulated)
         | 
| 36 | 
            +
                summary = summarize_content(content_text)
         | 
| 37 | 
            +
                tags = tag_content(content_text)
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # Allow user to override or confirm tags via input
         | 
| 40 | 
            +
                if tags_input:
         | 
| 41 | 
            +
                    # If user entered new tags, split by comma
         | 
| 42 | 
            +
                    tags = [t.strip() for t in tags_input.split(",") if t.strip() != ""]
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Store in ChromaDB with a unique ID
         | 
| 45 | 
            +
                doc_id = str(uuid.uuid4())
         | 
| 46 | 
            +
                metadata = {"source": source, "tags": tags}
         | 
| 47 | 
            +
                add_document(doc_id, content_text, metadata)
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                return content_text, summary, ", ".join(tags), f"Document stored with ID: {doc_id}"
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            def generate_graph():
         | 
| 52 | 
            +
                """
         | 
| 53 | 
            +
                Create a simple Plotly graph of documents.
         | 
| 54 | 
            +
                Nodes = documents, edges = shared tags.
         | 
| 55 | 
            +
                """
         | 
| 56 | 
            +
                # Fetch all documents from ChromaDB
         | 
| 57 | 
            +
                from core.storage import get_all_documents
         | 
| 58 | 
            +
                docs = get_all_documents()
         | 
| 59 | 
            +
                if not docs:
         | 
| 60 | 
            +
                    return go.Figure()  # empty
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                # Build graph connections: if two docs share a tag, connect them
         | 
| 63 | 
            +
                nodes = {doc["id"]: doc for doc in docs}
         | 
| 64 | 
            +
                edges = []
         | 
| 65 | 
            +
                for i, doc1 in enumerate(docs):
         | 
| 66 | 
            +
                    for doc2 in docs[i+1:]:
         | 
| 67 | 
            +
                        shared_tags = set(doc1["metadata"]["tags"]) & set(doc2["metadata"]["tags"])
         | 
| 68 | 
            +
                        if shared_tags:
         | 
| 69 | 
            +
                            edges.append((doc1["id"], doc2["id"]))
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                # Use networkx to compute layout (or simple fixed positions)
         | 
| 72 | 
            +
                import networkx as nx
         | 
| 73 | 
            +
                G = nx.Graph()
         | 
| 74 | 
            +
                G.add_nodes_from(nodes.keys())
         | 
| 75 | 
            +
                G.add_edges_from(edges)
         | 
| 76 | 
            +
                pos = nx.spring_layout(G, seed=42)
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                # Create Plotly traces
         | 
| 79 | 
            +
                edge_x = []
         | 
| 80 | 
            +
                edge_y = []
         | 
| 81 | 
            +
                for (src, dst) in edges:
         | 
| 82 | 
            +
                    x0, y0 = pos[src]
         | 
| 83 | 
            +
                    x1, y1 = pos[dst]
         | 
| 84 | 
            +
                    edge_x += [x0, x1, None]
         | 
| 85 | 
            +
                    edge_y += [y0, y1, None]
         | 
| 86 | 
            +
                edge_trace = go.Scatter(
         | 
| 87 | 
            +
                    x=edge_x, y=edge_y,
         | 
| 88 | 
            +
                    line=dict(width=1, color='#888'),
         | 
| 89 | 
            +
                    hoverinfo='none',
         | 
| 90 | 
            +
                    mode='lines')
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                node_x = []
         | 
| 93 | 
            +
                node_y = []
         | 
| 94 | 
            +
                node_text = []
         | 
| 95 | 
            +
                for node_id in G.nodes():
         | 
| 96 | 
            +
                    x, y = pos[node_id]
         | 
| 97 | 
            +
                    node_x.append(x)
         | 
| 98 | 
            +
                    node_y.append(y)
         | 
| 99 | 
            +
                    text = nodes[node_id]["metadata"].get("source", "")
         | 
| 100 | 
            +
                    node_text.append(f"{text}\nTags: {nodes[node_id]['metadata']['tags']}")
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                node_trace = go.Scatter(
         | 
| 103 | 
            +
                    x=node_x, y=node_y,
         | 
| 104 | 
            +
                    mode='markers+text',
         | 
| 105 | 
            +
                    marker=dict(size=10, color='skyblue'),
         | 
| 106 | 
            +
                    text=node_text, hoverinfo='text', textposition="bottom center")
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                fig = go.Figure(data=[edge_trace, node_trace],
         | 
| 109 | 
            +
                                layout=go.Layout(title="Document Knowledge Graph",
         | 
| 110 | 
            +
                                                 showlegend=False,
         | 
| 111 | 
            +
                                                 margin=dict(l=20, r=20, b=20, t=30)))
         | 
| 112 | 
            +
                return fig
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            def handle_query(question):
         | 
| 115 | 
            +
                """
         | 
| 116 | 
            +
                Answer a user question by retrieving relevant documents and summarizing them.
         | 
| 117 | 
            +
                """
         | 
| 118 | 
            +
                if not question:
         | 
| 119 | 
            +
                    return "Please enter a question."
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                answer = answer_question(question)
         | 
| 122 | 
            +
                return answer
         | 
| 123 | 
            +
             | 
| 124 | 
            +
            # Build Gradio interface with Blocks
         | 
| 125 | 
            +
            with gr.Blocks(title="Intelligent Content Organizer") as demo:
         | 
| 126 | 
            +
                gr.Markdown("# Intelligent Content Organizer")
         | 
| 127 | 
            +
                with gr.Tab("Upload / Fetch Content"):
         | 
| 128 | 
            +
                    gr.Markdown("**Add a document:** Upload a file or enter a URL.")
         | 
| 129 | 
            +
                    with gr.Row():
         | 
| 130 | 
            +
                        file_in = gr.File(label="Upload Document (PDF, TXT, etc.)")
         | 
| 131 | 
            +
                        url_in = gr.Textbox(label="Document URL", placeholder="https://example.com/article")
         | 
| 132 | 
            +
                    tags_in = gr.Textbox(label="Tags (comma-separated)", placeholder="Enter tags or leave blank")
         | 
| 133 | 
            +
                    process_btn = gr.Button("Parse & Add Document")
         | 
| 134 | 
            +
                    doc_view = gr.Textbox(label="Document Preview", lines=10, interactive=False)
         | 
| 135 | 
            +
                    summary_out = gr.Textbox(label="Summary", interactive=False)
         | 
| 136 | 
            +
                    tags_out = gr.Textbox(label="Detected Tags", interactive=False)
         | 
| 137 | 
            +
                    status_out = gr.Textbox(label="Status/Info", interactive=False)
         | 
| 138 | 
            +
                    process_btn.click(fn=process_content, inputs=[file_in, url_in, tags_in],
         | 
| 139 | 
            +
                                      outputs=[doc_view, summary_out, tags_out, status_out])
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                with gr.Tab("Knowledge Graph"):
         | 
| 142 | 
            +
                    gr.Markdown("**Document relationships:** Shared tags indicate edges.")
         | 
| 143 | 
            +
                    graph_plot = gr.Plot(label="Knowledge Graph")
         | 
| 144 | 
            +
                    refresh_btn = gr.Button("Refresh Graph")
         | 
| 145 | 
            +
                    refresh_btn.click(fn=generate_graph, inputs=None, outputs=graph_plot)
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                with gr.Tab("Ask a Question"):
         | 
| 148 | 
            +
                    gr.Markdown("**AI Q&A:** Ask a question about your documents.")
         | 
| 149 | 
            +
                    question_in = gr.Textbox(label="Your Question")
         | 
| 150 | 
            +
                    answer_out = gr.Textbox(label="Answer", interactive=False)
         | 
| 151 | 
            +
                    ask_btn = gr.Button("Get Answer")
         | 
| 152 | 
            +
                    ask_btn.click(fn=handle_query, inputs=question_in, outputs=answer_out)
         | 
| 153 | 
            +
             | 
| 154 | 
            +
            if __name__ == "__main__":
         | 
| 155 | 
            +
                # Launch Gradio app (Hugging Face Spaces will auto-launch this)
         | 
| 156 | 
            +
                # demo.queue().launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
         | 
| 157 | 
            +
                demo.launch(mcp_server=True)
         | 
    	
        config.py
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # config.py
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            from dotenv import load_dotenv
         | 
| 4 | 
            +
            load_dotenv()  # loads from .env if present
         | 
| 5 | 
            +
            MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY")
         | 
| 6 | 
            +
            CLAUDE_API_KEY  = os.environ.get("CLAUDE_API_KEY")
         | 
| 7 | 
            +
            BRAVE_API_KEY   = os.environ.get("BRAVE_API_KEY")
         | 
    	
        core/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        core/agent.py
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import json
         | 
| 2 | 
            +
            from core.storage import search_documents
         | 
| 3 | 
            +
            # For Q&A we can use a simple retrieval + QA pipeline (stubbed here)
         | 
| 4 | 
            +
            # In a real app, you might use LangChain or a HuggingFace question-answering model.
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def answer_question(question: str) -> str:
         | 
| 7 | 
            +
                """
         | 
| 8 | 
            +
                Agent: retrieve relevant docs and answer the question.
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                # Retrieve top documents
         | 
| 11 | 
            +
                results = search_documents(question, top_k=3)
         | 
| 12 | 
            +
                doc_texts = results.get("documents", [[]])[0]
         | 
| 13 | 
            +
                combined = " ".join(doc_texts)
         | 
| 14 | 
            +
                # Stub: just echo the question and number of docs
         | 
| 15 | 
            +
                if not combined.strip():
         | 
| 16 | 
            +
                    return "No relevant documents found."
         | 
| 17 | 
            +
                return f"Answered question: '{question}' (based on {len(doc_texts)} documents)."
         | 
    	
        core/ai_enrichment.py
    ADDED
    
    | @@ -0,0 +1,41 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # core/ai_enrichment.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from mistralai import Mistral
         | 
| 4 | 
            +
            import config
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def generate_tags(text: str) -> list[str]:
         | 
| 7 | 
            +
                """
         | 
| 8 | 
            +
                Use Mistral AI to generate 5-7 relevant tags for the text.
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                with Mistral(api_key=config.MISTRAL_API_KEY) as client:
         | 
| 11 | 
            +
                    response = client.chat.complete(
         | 
| 12 | 
            +
                        model="mistral-small-latest",
         | 
| 13 | 
            +
                        messages=[{
         | 
| 14 | 
            +
                            "role": "user",
         | 
| 15 | 
            +
                            "content": f"Generate 5-7 relevant tags (comma-separated) for the following text:\n\n{text}"
         | 
| 16 | 
            +
                        }]
         | 
| 17 | 
            +
                    )
         | 
| 18 | 
            +
                try:
         | 
| 19 | 
            +
                    content = response["choices"][0]["message"]["content"]
         | 
| 20 | 
            +
                except (KeyError, IndexError):
         | 
| 21 | 
            +
                    return []
         | 
| 22 | 
            +
                tags = [tag.strip() for tag in content.split(",") if tag.strip()]
         | 
| 23 | 
            +
                return tags
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            def summarize_text(text: str) -> str:
         | 
| 26 | 
            +
                """
         | 
| 27 | 
            +
                Use Mistral AI to generate a concise summary of the text.
         | 
| 28 | 
            +
                """
         | 
| 29 | 
            +
                with Mistral(api_key=config.MISTRAL_API_KEY) as client:
         | 
| 30 | 
            +
                    response = client.chat.complete(
         | 
| 31 | 
            +
                        model="mistral-small-latest",
         | 
| 32 | 
            +
                        messages=[{
         | 
| 33 | 
            +
                            "role": "user",
         | 
| 34 | 
            +
                            "content": f"Summarize the following text in a concise manner:\n\n{text}"
         | 
| 35 | 
            +
                        }]
         | 
| 36 | 
            +
                    )
         | 
| 37 | 
            +
                try:
         | 
| 38 | 
            +
                    summary = response["choices"][0]["message"]["content"].strip()
         | 
| 39 | 
            +
                except (KeyError, IndexError):
         | 
| 40 | 
            +
                    return ""
         | 
| 41 | 
            +
                return summary
         | 
    	
        core/components.py
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class DocumentViewer(gr.components.Component):
         | 
| 4 | 
            +
                """
         | 
| 5 | 
            +
                Custom Gradio component for document preview and tag editing.
         | 
| 6 | 
            +
                (Stub implementation)
         | 
| 7 | 
            +
                """
         | 
| 8 | 
            +
                def __init__(self, label=None):
         | 
| 9 | 
            +
                    super().__init__(label=label, value=None)
         | 
| 10 | 
            +
                    self.visible = True
         | 
| 11 | 
            +
                    self.interactive = False
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def preprocess(self, x):
         | 
| 14 | 
            +
                    # Input is a file path (or object); just return as-is
         | 
| 15 | 
            +
                    return x
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def postprocess(self, x):
         | 
| 18 | 
            +
                    # x is the raw document text; display first few lines as preview
         | 
| 19 | 
            +
                    if not x:
         | 
| 20 | 
            +
                        return ""
         | 
| 21 | 
            +
                    lines = x.splitlines()
         | 
| 22 | 
            +
                    preview = "\n".join(lines[:10])
         | 
| 23 | 
            +
                    return preview
         | 
    	
        core/components.pyi
    ADDED
    
    | @@ -0,0 +1,29 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            from gradio.events import Dependency
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            class DocumentViewer(gr.components.Component):
         | 
| 5 | 
            +
                """
         | 
| 6 | 
            +
                Custom Gradio component for document preview and tag editing.
         | 
| 7 | 
            +
                (Stub implementation)
         | 
| 8 | 
            +
                """
         | 
| 9 | 
            +
                def __init__(self, label=None):
         | 
| 10 | 
            +
                    super().__init__(label=label, value=None)
         | 
| 11 | 
            +
                    self.visible = True
         | 
| 12 | 
            +
                    self.interactive = False
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def preprocess(self, x):
         | 
| 15 | 
            +
                    # Input is a file path (or object); just return as-is
         | 
| 16 | 
            +
                    return x
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def postprocess(self, x):
         | 
| 19 | 
            +
                    # x is the raw document text; display first few lines as preview
         | 
| 20 | 
            +
                    if not x:
         | 
| 21 | 
            +
                        return ""
         | 
| 22 | 
            +
                    lines = x.splitlines()
         | 
| 23 | 
            +
                    preview = "\n".join(lines[:10])
         | 
| 24 | 
            +
                    return preview
         | 
| 25 | 
            +
                from typing import Callable, Literal, Sequence, Any, TYPE_CHECKING
         | 
| 26 | 
            +
                from gradio.blocks import Block
         | 
| 27 | 
            +
                if TYPE_CHECKING:
         | 
| 28 | 
            +
                    from gradio.components import Timer
         | 
| 29 | 
            +
                    from gradio.components.base import Component
         | 
    	
        core/database.py
    ADDED
    
    | @@ -0,0 +1,81 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # core/database.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import chromadb
         | 
| 4 | 
            +
            from chromadb.config import Settings
         | 
| 5 | 
            +
            from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
         | 
| 6 | 
            +
            import config
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            def init_chroma():
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                Initialize a ChromaDB client and collection with an embedding function.
         | 
| 11 | 
            +
                Uses OpenAI embeddings if API key is available, otherwise a dummy embedding.
         | 
| 12 | 
            +
                """
         | 
| 13 | 
            +
                # Initialize Chroma client (in-memory by default)
         | 
| 14 | 
            +
                client = chromadb.Client(Settings())
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Determine embedding function
         | 
| 17 | 
            +
                embedding_fn = None
         | 
| 18 | 
            +
                try:
         | 
| 19 | 
            +
                    openai_key = config.OPENAI_API_KEY
         | 
| 20 | 
            +
                except AttributeError:
         | 
| 21 | 
            +
                    openai_key = None
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                if openai_key:
         | 
| 24 | 
            +
                    embedding_fn = OpenAIEmbeddingFunction(
         | 
| 25 | 
            +
                        api_key=openai_key,
         | 
| 26 | 
            +
                        model_name="text-embedding-ada-002"
         | 
| 27 | 
            +
                    )
         | 
| 28 | 
            +
                else:
         | 
| 29 | 
            +
                    # Dummy embedding: one-dimensional embedding based on text length
         | 
| 30 | 
            +
                    class DummyEmbedding:
         | 
| 31 | 
            +
                        def __call__(self, texts):
         | 
| 32 | 
            +
                            return [[float(len(text))] for text in texts]
         | 
| 33 | 
            +
                    embedding_fn = DummyEmbedding()
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                # Create or get collection named "documents"
         | 
| 36 | 
            +
                collection = client.get_or_create_collection(
         | 
| 37 | 
            +
                    name="documents",
         | 
| 38 | 
            +
                    embedding_function=embedding_fn
         | 
| 39 | 
            +
                )
         | 
| 40 | 
            +
                return collection
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            def add_document(collection, doc_id: str, text: str, tags: list[str], summary: str, source: str):
         | 
| 43 | 
            +
                """
         | 
| 44 | 
            +
                Add a document to the ChromaDB collection with metadata.
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
                metadata = {"tags": tags, "summary": summary, "source": source}
         | 
| 47 | 
            +
                # Add document (Chroma will generate embeddings using the collection's embedding function)
         | 
| 48 | 
            +
                collection.add(
         | 
| 49 | 
            +
                    ids=[doc_id],
         | 
| 50 | 
            +
                    documents=[text],
         | 
| 51 | 
            +
                    metadatas=[metadata]
         | 
| 52 | 
            +
                )
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            def search_documents(collection, query: str, top_n: int = 5) -> list[dict]:
         | 
| 55 | 
            +
                """
         | 
| 56 | 
            +
                Search for semantically similar documents in the collection.
         | 
| 57 | 
            +
                Returns top N results with their metadata.
         | 
| 58 | 
            +
                """
         | 
| 59 | 
            +
                results = collection.query(
         | 
| 60 | 
            +
                    query_texts=[query],
         | 
| 61 | 
            +
                    n_results=top_n,
         | 
| 62 | 
            +
                    include=["metadatas", "documents", "distances"]
         | 
| 63 | 
            +
                )
         | 
| 64 | 
            +
                hits = []
         | 
| 65 | 
            +
                # Extract the results from the Chroma query response
         | 
| 66 | 
            +
                ids = results.get("ids", [[]])[0]
         | 
| 67 | 
            +
                documents = results.get("documents", [[]])[0]
         | 
| 68 | 
            +
                metadatas = results.get("metadatas", [[]])[0]
         | 
| 69 | 
            +
                distances = results.get("distances", [[]])[0]
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                for i, doc_id in enumerate(ids):
         | 
| 72 | 
            +
                    hit = {
         | 
| 73 | 
            +
                        "id": doc_id,
         | 
| 74 | 
            +
                        "score": distances[i] if i < len(distances) else None,
         | 
| 75 | 
            +
                        "source": metadatas[i].get("source") if i < len(metadatas) else None,
         | 
| 76 | 
            +
                        "tags": metadatas[i].get("tags") if i < len(metadatas) else None,
         | 
| 77 | 
            +
                        "summary": metadatas[i].get("summary") if i < len(metadatas) else None,
         | 
| 78 | 
            +
                        "document": documents[i] if i < len(documents) else None
         | 
| 79 | 
            +
                    }
         | 
| 80 | 
            +
                    hits.append(hit)
         | 
| 81 | 
            +
                return hits
         | 
    	
        core/parser.py
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import requests
         | 
| 2 | 
            +
            from bs4 import BeautifulSoup
         | 
| 3 | 
            +
            from unstructured.partition.auto import partition
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            def parse_document(file_path: str) -> str:
         | 
| 6 | 
            +
                """
         | 
| 7 | 
            +
                Parse a document file (PDF, DOCX, TXT, etc.) into text using Unstructured.
         | 
| 8 | 
            +
                """
         | 
| 9 | 
            +
                try:
         | 
| 10 | 
            +
                    elements = partition(file_path)
         | 
| 11 | 
            +
                    # Combine text elements into a single string
         | 
| 12 | 
            +
                    text = "\n".join([elem.text for elem in elements if elem.text])
         | 
| 13 | 
            +
                    return text
         | 
| 14 | 
            +
                except Exception as e:
         | 
| 15 | 
            +
                    return f"Error parsing document: {e}"
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            def parse_url(url: str) -> str:
         | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Fetch and parse webpage content at the given URL.
         | 
| 20 | 
            +
                """
         | 
| 21 | 
            +
                try:
         | 
| 22 | 
            +
                    headers = {"User-Agent": "Mozilla/5.0"}
         | 
| 23 | 
            +
                    response = requests.get(url, headers=headers, timeout=10)
         | 
| 24 | 
            +
                    soup = BeautifulSoup(response.text, 'html.parser')
         | 
| 25 | 
            +
                    # Extract visible text from paragraphs
         | 
| 26 | 
            +
                    paragraphs = soup.find_all(['p', 'h1', 'h2', 'h3', 'li'])
         | 
| 27 | 
            +
                    text = "\n".join([p.get_text() for p in paragraphs])
         | 
| 28 | 
            +
                    return text
         | 
| 29 | 
            +
                except Exception as e:
         | 
| 30 | 
            +
                    return f"Error fetching URL: {e}"
         | 
    	
        core/processing.py
    ADDED
    
    | @@ -0,0 +1,42 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # core/processing.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import requests
         | 
| 4 | 
            +
            from unstructured.partition.html import partition_html
         | 
| 5 | 
            +
            from unstructured.partition.auto import partition
         | 
| 6 | 
            +
            import config
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            def fetch_web_content(url: str) -> str:
         | 
| 9 | 
            +
                """
         | 
| 10 | 
            +
                Fetch and parse web content from the given URL into structured text.
         | 
| 11 | 
            +
                """
         | 
| 12 | 
            +
                try:
         | 
| 13 | 
            +
                    # Use Unstructured to fetch and parse HTML content directly from the URL
         | 
| 14 | 
            +
                    elements = partition_html(url=url)
         | 
| 15 | 
            +
                    text = "\n\n".join([elem.text for elem in elements if hasattr(elem, 'text') and elem.text])
         | 
| 16 | 
            +
                    return text
         | 
| 17 | 
            +
                except Exception:
         | 
| 18 | 
            +
                    # If Unstructured parsing fails, attempt a simple HTTP GET as a fallback
         | 
| 19 | 
            +
                    try:
         | 
| 20 | 
            +
                        response = requests.get(url)
         | 
| 21 | 
            +
                        response.raise_for_status()
         | 
| 22 | 
            +
                        html_text = response.text
         | 
| 23 | 
            +
                        # Attempt parsing the fetched HTML text
         | 
| 24 | 
            +
                        elements = partition(filename=None, file=html_text)
         | 
| 25 | 
            +
                        text = "\n\n".join([elem.text for elem in elements if hasattr(elem, 'text') and elem.text])
         | 
| 26 | 
            +
                        return text
         | 
| 27 | 
            +
                    except Exception:
         | 
| 28 | 
            +
                        # On failure, return empty string
         | 
| 29 | 
            +
                        return ""
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            def parse_local_file(file_path: str) -> str:
         | 
| 32 | 
            +
                """
         | 
| 33 | 
            +
                Parse a local file into structured text using the Unstructured library.
         | 
| 34 | 
            +
                Supports various file formats (e.g., PDF, DOCX, TXT).
         | 
| 35 | 
            +
                """
         | 
| 36 | 
            +
                try:
         | 
| 37 | 
            +
                    elements = partition(filename=file_path)
         | 
| 38 | 
            +
                    text = "\n\n".join([elem.text for elem in elements if hasattr(elem, 'text') and elem.text])
         | 
| 39 | 
            +
                    return text
         | 
| 40 | 
            +
                except Exception:
         | 
| 41 | 
            +
                    # Return empty string on failure
         | 
| 42 | 
            +
                    return ""
         | 
    	
        core/storage.py
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import chromadb
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            from mistralai import Mistral
         | 
| 4 | 
            +
            import config
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Initialize ChromaDB client (persistent directory can be set via CHROMA_DB_DIR)
         | 
| 7 | 
            +
            chroma_db_path = os.getenv("CHROMA_DB_DIR", "db/")
         | 
| 8 | 
            +
            client = chromadb.Client()
         | 
| 9 | 
            +
            collection = client.get_or_create_collection("documents")
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            # Use Mistral API for embeddings
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            def get_mistral_embedding(text: str) -> list[float]:
         | 
| 14 | 
            +
                """
         | 
| 15 | 
            +
                Get embedding for the given text using Mistral API.
         | 
| 16 | 
            +
                """
         | 
| 17 | 
            +
                with Mistral(api_key=config.MISTRAL_API_KEY) as client:
         | 
| 18 | 
            +
                    response = client.embeddings.create(
         | 
| 19 | 
            +
                        model="mistral-embed",
         | 
| 20 | 
            +
                        input=text
         | 
| 21 | 
            +
                    )
         | 
| 22 | 
            +
                    # The API returns a list of embeddings (one per input)
         | 
| 23 | 
            +
                    return response['data'][0]['embedding']
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            def add_document(doc_id: str, text: str, metadata: dict):
         | 
| 27 | 
            +
                """
         | 
| 28 | 
            +
                Add a document's text and metadata to the ChromaDB collection.
         | 
| 29 | 
            +
                """
         | 
| 30 | 
            +
                embedding = get_mistral_embedding(text)
         | 
| 31 | 
            +
                collection.add(ids=[doc_id], embeddings=[embedding], documents=[text], metadatas=[metadata])
         | 
| 32 | 
            +
                # Persist to disk
         | 
| 33 | 
            +
                client.persist()
         | 
| 34 | 
            +
                return True
         | 
| 35 | 
            +
             | 
| 36 | 
            +
             | 
| 37 | 
            +
            def search_documents(query: str, top_k: int = 5) -> dict:
         | 
| 38 | 
            +
                """
         | 
| 39 | 
            +
                Search for documents semantically similar to the query.
         | 
| 40 | 
            +
                Returns a dictionary of top results.
         | 
| 41 | 
            +
                """
         | 
| 42 | 
            +
                query_vec = get_mistral_embedding(query)
         | 
| 43 | 
            +
                results = collection.query(query_embeddings=[query_vec], n_results=top_k,
         | 
| 44 | 
            +
                                           include=['ids','distances','documents','metadatas'])
         | 
| 45 | 
            +
                return results
         | 
| 46 | 
            +
             | 
| 47 | 
            +
             | 
| 48 | 
            +
            def get_all_documents() -> list:
         | 
| 49 | 
            +
                """
         | 
| 50 | 
            +
                Retrieve metadata for all documents in the collection.
         | 
| 51 | 
            +
                """
         | 
| 52 | 
            +
                all_ids = collection.get()['ids']
         | 
| 53 | 
            +
                docs = []
         | 
| 54 | 
            +
                for doc_id in all_ids:
         | 
| 55 | 
            +
                    res = collection.get(ids=[doc_id])
         | 
| 56 | 
            +
                    if res and res['metadatas']:
         | 
| 57 | 
            +
                        docs.append({"id": doc_id, "metadata": res['metadatas'][0]})
         | 
| 58 | 
            +
                return docs
         | 
    	
        core/summarizer.py
    ADDED
    
    | @@ -0,0 +1,25 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            def summarize_content(text: str) -> str:
         | 
| 2 | 
            +
                """
         | 
| 3 | 
            +
                Generate a summary of the text. (This is a stub simulating a Claude 3 Haiku call.)
         | 
| 4 | 
            +
                """
         | 
| 5 | 
            +
                # In a real app, you might call the Anthropic Claude 3 API here.
         | 
| 6 | 
            +
                # We'll return the first 100 characters as a "summary".
         | 
| 7 | 
            +
                summary = text.strip().replace("\n", " ")
         | 
| 8 | 
            +
                summary = summary[:100] + ("..." if len(summary) > 100 else "")
         | 
| 9 | 
            +
                return f"Summary: {summary}"
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            def tag_content(text: str) -> list:
         | 
| 12 | 
            +
                """
         | 
| 13 | 
            +
                Generate tags for the text. (This is a stub simulating a Mistral 7B call.)
         | 
| 14 | 
            +
                """
         | 
| 15 | 
            +
                # In a real app, you might call a tag-generation model or use embeddings.
         | 
| 16 | 
            +
                # We'll simulate by picking some keywords.
         | 
| 17 | 
            +
                common_words = ["data", "analysis", "python", "research", "AI"]
         | 
| 18 | 
            +
                tags = []
         | 
| 19 | 
            +
                lower = text.lower()
         | 
| 20 | 
            +
                for word in common_words:
         | 
| 21 | 
            +
                    if word in lower:
         | 
| 22 | 
            +
                        tags.append(word)
         | 
| 23 | 
            +
                if not tags:
         | 
| 24 | 
            +
                    tags = ["general"]
         | 
| 25 | 
            +
                return tags
         | 
    	
        core/utils.py
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # core/utils.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            from datetime import datetime
         | 
| 5 | 
            +
            import hashlib
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            def clean_text(text: str) -> str:
         | 
| 8 | 
            +
                """
         | 
| 9 | 
            +
                Clean and normalize text by removing extra whitespace.
         | 
| 10 | 
            +
                """
         | 
| 11 | 
            +
                if not text:
         | 
| 12 | 
            +
                    return ""
         | 
| 13 | 
            +
                # Collapse multiple whitespace into single spaces and strip ends
         | 
| 14 | 
            +
                cleaned = re.sub(r'\s+', ' ', text)
         | 
| 15 | 
            +
                return cleaned.strip()
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            def generate_doc_id(source: str) -> str:
         | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Generate a unique document ID based on source identifier and timestamp.
         | 
| 20 | 
            +
                """
         | 
| 21 | 
            +
                timestamp = datetime.now().isoformat()
         | 
| 22 | 
            +
                raw_id = f"{source}-{timestamp}"
         | 
| 23 | 
            +
                return hashlib.md5(raw_id.encode()).hexdigest()
         | 
    	
        data/article_url.txt
    ADDED
    
    | 
            File without changes
         | 
    	
        data/document1.pdf
    ADDED
    
    | 
            File without changes
         | 
    	
        data/sample_note.txt
    ADDED
    
    | 
            File without changes
         | 
    	
        mcp_tools.py
    ADDED
    
    | @@ -0,0 +1,122 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # # mcp_tools.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # from fastmcp import FastMCP
         | 
| 4 | 
            +
            # import core.processing as processing
         | 
| 5 | 
            +
            # import core.ai_enrichment as ai_enrichment
         | 
| 6 | 
            +
            # import core.database as db
         | 
| 7 | 
            +
            # import core.utils as utils
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # # Initialize the FastMCP server instance
         | 
| 10 | 
            +
            # mcp = FastMCP(name="IntelligentContentOrganizer")
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # # Initialize the ChromaDB collection (shared for all tools)
         | 
| 13 | 
            +
            # collection = db.init_chroma()
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            # @mcp.tool()
         | 
| 16 | 
            +
            # def process_content(url: str) -> dict:
         | 
| 17 | 
            +
            #     """
         | 
| 18 | 
            +
            #     Process content from a web URL: fetch, enrich, and store.
         | 
| 19 | 
            +
            #     Returns document ID, tags, summary, and source.
         | 
| 20 | 
            +
            #     """
         | 
| 21 | 
            +
            #     content = processing.fetch_web_content(url)
         | 
| 22 | 
            +
            #     text = utils.clean_text(content)
         | 
| 23 | 
            +
            #     tags = ai_enrichment.generate_tags(text) if text else []
         | 
| 24 | 
            +
            #     summary = ai_enrichment.summarize_text(text) if text else ""
         | 
| 25 | 
            +
            #     doc_id = utils.generate_doc_id(url)
         | 
| 26 | 
            +
            #     # Add the document to the database collection
         | 
| 27 | 
            +
            #     db.add_document(collection, doc_id, text, tags, summary, source=url)
         | 
| 28 | 
            +
            #     return {"id": doc_id, "tags": tags, "summary": summary, "source": url}
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # @mcp.tool()
         | 
| 31 | 
            +
            # def upload_local_file(file_path: str) -> dict:
         | 
| 32 | 
            +
            #     """
         | 
| 33 | 
            +
            #     Process a local file: parse, enrich, and store.
         | 
| 34 | 
            +
            #     Returns document ID, tags, summary, and source.
         | 
| 35 | 
            +
            #     """
         | 
| 36 | 
            +
            #     content = processing.parse_local_file(file_path)
         | 
| 37 | 
            +
            #     text = utils.clean_text(content)
         | 
| 38 | 
            +
            #     tags = ai_enrichment.generate_tags(text) if text else []
         | 
| 39 | 
            +
            #     summary = ai_enrichment.summarize_text(text) if text else ""
         | 
| 40 | 
            +
            #     doc_id = utils.generate_doc_id(file_path)
         | 
| 41 | 
            +
            #     db.add_document(collection, doc_id, text, tags, summary, source=file_path)
         | 
| 42 | 
            +
            #     return {"id": doc_id, "tags": tags, "summary": summary, "source": file_path}
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            # @mcp.tool()
         | 
| 45 | 
            +
            # def semantic_search(query: str, top_n: int = 5) -> list:
         | 
| 46 | 
            +
            #     """
         | 
| 47 | 
            +
            #     Search for documents semantically similar to the query.
         | 
| 48 | 
            +
            #     Returns top N results as a list of dictionaries.
         | 
| 49 | 
            +
            #     """
         | 
| 50 | 
            +
            #     results = db.search_documents(collection, query, top_n)
         | 
| 51 | 
            +
            #     return results
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
            +
            from fastmcp import FastMCP
         | 
| 55 | 
            +
            from core.parser import parse_document, parse_url
         | 
| 56 | 
            +
            from core.summarizer import summarize_content, tag_content
         | 
| 57 | 
            +
            from core.storage import add_document, search_documents
         | 
| 58 | 
            +
            from core.agent import answer_question
         | 
| 59 | 
            +
            import json
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            mcp = FastMCP("IntelligentContentOrganizer_MCP")
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            @mcp.tool(name="parse_document")
         | 
| 64 | 
            +
            def mcp_parse_document(file_path: str) -> str:
         | 
| 65 | 
            +
                """
         | 
| 66 | 
            +
                MCP tool: Parse a document file and return extracted text.
         | 
| 67 | 
            +
                """
         | 
| 68 | 
            +
                text = parse_document(file_path)
         | 
| 69 | 
            +
                return text
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            @mcp.tool(name="parse_url")
         | 
| 72 | 
            +
            def mcp_parse_url(url: str) -> str:
         | 
| 73 | 
            +
                """
         | 
| 74 | 
            +
                MCP tool: Fetch and parse webpage content from a URL.
         | 
| 75 | 
            +
                """
         | 
| 76 | 
            +
                text = parse_url(url)
         | 
| 77 | 
            +
                return text
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            @mcp.tool(name="summarize")
         | 
| 80 | 
            +
            def mcp_summarize(text: str) -> str:
         | 
| 81 | 
            +
                """
         | 
| 82 | 
            +
                MCP tool: Generate a summary of the provided text.
         | 
| 83 | 
            +
                """
         | 
| 84 | 
            +
                return summarize_content(text)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            @mcp.tool(name="tag")
         | 
| 87 | 
            +
            def mcp_tag(text: str) -> str:
         | 
| 88 | 
            +
                """
         | 
| 89 | 
            +
                MCP tool: Generate tags for the provided text (JSON list).
         | 
| 90 | 
            +
                """
         | 
| 91 | 
            +
                tags = tag_content(text)
         | 
| 92 | 
            +
                return json.dumps(tags)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            @mcp.tool(name="add_to_db")
         | 
| 95 | 
            +
            def mcp_add_to_db(doc_id: str, text: str, metadata_json: str) -> str:
         | 
| 96 | 
            +
                """
         | 
| 97 | 
            +
                MCP tool: Add a document to ChromaDB with given ID and metadata (JSON).
         | 
| 98 | 
            +
                """
         | 
| 99 | 
            +
                metadata = json.loads(metadata_json)
         | 
| 100 | 
            +
                add_document(doc_id, text, metadata)
         | 
| 101 | 
            +
                return "Document added with ID: " + doc_id
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            @mcp.tool(name="search_db")
         | 
| 104 | 
            +
            def mcp_search_db(query: str, top_k: int = 5) -> str:
         | 
| 105 | 
            +
                """
         | 
| 106 | 
            +
                MCP tool: Search documents using a query (semantic search). Returns JSON results.
         | 
| 107 | 
            +
                """
         | 
| 108 | 
            +
                results = search_documents(query, top_k=top_k)
         | 
| 109 | 
            +
                return json.dumps(results)
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            @mcp.tool(name="answer_question")
         | 
| 112 | 
            +
            def mcp_answer_question(question: str) -> str:
         | 
| 113 | 
            +
                """
         | 
| 114 | 
            +
                MCP tool: Answer a question using the agentic workflow.
         | 
| 115 | 
            +
                """
         | 
| 116 | 
            +
                answer = answer_question(question)
         | 
| 117 | 
            +
                return answer
         | 
| 118 | 
            +
             | 
| 119 | 
            +
            if __name__ == "__main__":
         | 
| 120 | 
            +
                # Run the MCP server (streamable HTTP for web integration:contentReference[oaicite:6]{index=6})
         | 
| 121 | 
            +
                mcp.run(transport="streamable-http", host="0.0.0.0", port=7861, path="/mcp")
         | 
| 122 | 
            +
             | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
             | 
| 2 | 
            +
            mistralai
         | 
| 3 | 
            +
            python-dotenv
         | 
| 4 | 
            +
            gradio>=4.0
         | 
| 5 | 
            +
            fastmcp>=2.0
         | 
| 6 | 
            +
            chromadb
         | 
| 7 | 
            +
            sentence-transformers
         | 
| 8 | 
            +
            unstructured
         | 
| 9 | 
            +
            requests
         | 
| 10 | 
            +
            beautifulsoup4
         | 
| 11 | 
            +
            plotly
         | 
| 12 | 
            +
            networkx
         | 

