File size: 5,558 Bytes
fc367cb
5d9de5b
 
 
 
fc367cb
5d9de5b
80e7c69
5d9de5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import streamlit as st
import os
import shutil
import fitz  # PyMuPDF
from simple_rag import generate_data_store, ask_question, DATA_PATH, CHROMA_PATH  # <- Added CHROMA_PATH

# --- Page Configuration ---
st.set_page_config(page_title="SEALION v3.5 8B RAG", layout="wide")

# --- DARK MODE STYLING ---
st.markdown("""
    <style>
    body { background-color: var(--background-color); }
    .stChatBubble {
        padding: 0.8rem 1rem;
        border-radius: 0.6rem;
        margin-bottom: 1rem;
    }
    .user {
        background-color: var(--primary-color-bg);
        border-left: 4px solid #4285F4;
    }
    .bot {
        background-color: var(--secondary-color-bg);
        border-left: 4px solid #34A853;
    }
    .context {
        font-size: 0.9rem;
        color: var(--text-color);
    }
    </style>
""", unsafe_allow_html=True)

# --- Set up state ---
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "dark_mode" not in st.session_state:
    st.session_state.dark_mode = False

# --- Sidebar: Settings + Clear Vector Store ---
with st.sidebar:
    st.title("βš™οΈ Settings")


    # --- Clear Chroma Vector Store Option ---
    if st.button("πŸ—‘οΈ Clear Vector Store"):
        try:
            if os.path.exists(CHROMA_PATH):
                shutil.rmtree(CHROMA_PATH)
                st.success("βœ… Vector store cleared.")
            else:
                st.info("ℹ️ No vector store found.")
        except Exception as e:
            st.error(f"❌ Failed to clear vector store: {e}")

# --- Title ---
st.title("πŸ’¬ DocChat – Ask Anything From Your PDFs")

# --- File Upload Sidebar ---
with st.sidebar:
    st.subheader("πŸ“‚ Uploaded PDFs")
    uploaded_files = st.file_uploader(
        "Upload PDF files", type=["pdf"], accept_multiple_files=True
    )

    def render_pdf_preview(pdf_path, highlights=None, max_pages=3):
        doc = fitz.open(pdf_path)
        images = []

        for page_num in range(min(len(doc), max_pages)):
            page = doc.load_page(page_num)

            if highlights:
                for h in highlights:
                    if h["filename"] == os.path.basename(pdf_path) and h["page"] == page_num + 1:
                        areas = page.search_for(h["text"], hit_max=5)
                        for rect in areas:
                            page.draw_rect(rect, color=(1, 1, 0), fill=(1, 1, 0), overlay=True)

            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
            img_bytes = pix.tobytes("png")
            images.append(img_bytes)

        doc.close()
        return images

    if uploaded_files:
        if os.path.exists(DATA_PATH):
            shutil.rmtree(DATA_PATH)
        os.makedirs(DATA_PATH, exist_ok=True)

        file_names = []
        for uploaded_file in uploaded_files:
            file_path = os.path.join(DATA_PATH, uploaded_file.name)
            file_names.append(uploaded_file.name)
            with open(file_path, "wb") as f:
                f.write(uploaded_file.read())

        st.success(f"Uploaded {len(uploaded_files)} file(s).")

        if st.button("⚑ Generate Vector Store"):
            with st.spinner("Processing and indexing..."):
                generate_data_store()
            st.success("βœ… Knowledge base ready!")

        st.markdown("### 🧾 File Manager")
        for name in file_names:
            st.markdown(f"- πŸ“„ `{name}`")
            with st.expander("πŸ–Ό Preview Pages"):
                file_path = os.path.join(DATA_PATH, name)

                file_highlights = []
                for chat in st.session_state.chat_history:
                    for chunk in chat["context"]:
                        if chunk["filename"] == name:
                            file_highlights.append(chunk)

                images = render_pdf_preview(file_path, highlights=file_highlights)

                for img in images:
                    st.image(img, use_container_width=True)

# --- Main Chat Section ---
st.markdown("### 🧠 Ask Your Question")
query = st.text_input("Type your question here...", placeholder="e.g. What is the conclusion of the document?")

col1, col2 = st.columns([1, 1])

with col1:
    if st.button("πŸš€ Get Answer"):
        if not query.strip():
            st.warning("Please enter a question.")
        else:
            with st.spinner("Thinking..."):
                answer, context_chunks = ask_question(query)

            st.session_state.chat_history.append({
                "question": query,
                "answer": answer,
                "context": context_chunks
            })

with col2:
    if st.button("πŸ—‘οΈ Clear Chat"):
        st.session_state.chat_history = []

# --- Display Chat History ---
if st.session_state.chat_history:
    st.markdown("### πŸ—‚οΈ Chat History")
    for chat in reversed(st.session_state.chat_history):
        st.markdown(f'<div class="stChatBubble user"><b>Q:</b> {chat["question"]}</div>', unsafe_allow_html=True)
        st.markdown(f'<div class="stChatBubble bot"><b>A:</b> {chat["answer"]}</div>', unsafe_allow_html=True)
        with st.expander("🧾 View Context Used"):
            for chunk in chat["context"]:
                st.markdown(f"""
                <div style="background-color: #fef9c3; padding: 10px; border-radius: 6px; margin-bottom: 10px;">
                    <b>πŸ“„ {chunk['filename']} – Page {chunk['page']}</b>
                    <pre style="white-space: pre-wrap;">{chunk['text']}</pre>
                </div>
                """, unsafe_allow_html=True)