DeepSeek-R1-32b-api

Sleeping

App Files Files Community

ruslanmv commited on Jan 28

Commit

3b55d4b

verified ·

1 Parent(s): c7e00fb

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -104

app.py CHANGED Viewed

@@ -1,109 +1,163 @@
 import streamlit as st
-from models import demo  # Import the demo object from models.py
-# --- Streamlit App Configuration ---
-st.set_page_config(
-    page_title="DeepSeek Chatbot",
-    page_icon="🤖",
-    layout="wide"
-)
-# --- App Title and Description ---
-st.title("DeepSeek Chatbot")
-st.markdown("""
-    Created by [ruslanmv.com](https://ruslanmv.com/)
-    This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
-    You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
-""")
-# --- Sidebar for Model Selection and Parameters ---
-with st.sidebar:
-    st.header("Options")
-    model_choice = st.radio(
-        "Choose a Model",
-        options=["DeepSeek-R1-Distill-Qwen-32B", "DeepSeek-R1", "DeepSeek-R1-Zero"],
-        index=1  # Default to "DeepSeek-R1"
     )
-    with st.expander("Optional Parameters", expanded=False):
-        system_message = st.text_area(
-            "System Message",
-            value="You are a friendly Chatbot created by ruslanmv.com",
-            height=100
-        )
-        max_new_tokens = st.slider(
-            "Max New Tokens",
-            min_value=1,
-            max_value=4000,
-            value=200
-        )
-        temperature = st.slider(
-            "Temperature",
-            min_value=0.10,
-            max_value=4.00,
-            value=0.70
-        )
-        top_p = st.slider(
-            "Top-p (nucleus sampling)",
-            min_value=0.10,
-            max_value=1.00,
-            value=0.90
-        )
-# --- Chatbot Function ---
-def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
-    # Create payload for the model
-    payload = {
-        "messages": [{"role": "user", "content": input_text}],
-        "system": system_message,
-        "max_tokens": max_new_tokens,
-        "temperature": temperature,
-        "top_p": top_p
-    }
-    # Run inference using the selected model
-    try:
-        response = demo(payload)  # Use the demo object directly
-        if isinstance(response, dict) and "choices" in response:
-            assistant_response = response["choices"][0]["message"]["content"]
         else:
-            assistant_response = "Unexpected model response format."
-    except Exception as e:
-        assistant_response = f"Error: {str(e)}"
-    # Append user and assistant messages to history
-    history.append((input_text, assistant_response))
-    return history
-# --- Chat History Management ---
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-# --- Chat Interface ---
-st.header("Chat with DeepSeek")
-# Display chat history
-for user_msg, assistant_msg in st.session_state.chat_history:
-    with st.chat_message("user"):
-        st.write(user_msg)
-    with st.chat_message("assistant"):
-        st.write(assistant_msg)
-# Input for new message
-input_text = st.chat_input("Type your message here...")
-# Handle new message submission
-if input_text:
-    # Update chat history
-    st.session_state.chat_history = chatbot(
-        input_text,
-        st.session_state.chat_history,
-        model_choice,
-        system_message,
-        max_new_tokens,
-        temperature,
-        top_p
     )
-    # Rerun the app to display the updated chat history
-    st.rerun()

+########################################
+# app.py
+########################################
 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# We define a cache to load pipelines for each model only once.
+@st.cache_resource
+def load_text_generation_pipeline(model_name: str):
+    """
+    Loads a text-generation pipeline from the Hugging Face Hub.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype="auto",       # or torch.float16 if GPU is available
+        device_map="auto"         # automatically map layers to available GPU(s)
     )
+    text_generation = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+    return text_generation
+def generate_response(
+    text_generation,
+    system_prompt: str,
+    conversation_history: list,
+    user_query: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float
+):
+    """
+    Generates a response from the language model given the system prompt,
+    conversation history, and user query with specified parameters.
+    """
+    # Construct a prompt that includes the system role, conversation history, and the new user input.
+    # Adjust format depending on your model's instructions format.
+    # Here we do a simple approach: system prompt + turn-by-turn conversation.
+    full_prompt = system_prompt.strip()
+    for (speaker, text) in conversation_history:
+        if speaker == "user":
+            full_prompt += f"\nUser: {text}"
         else:
+            full_prompt += f"\nAssistant: {text}"
+    # Add the new user query
+    full_prompt += f"\nUser: {user_query}\nAssistant:"
+    # Use the pipeline to generate text
+    outputs = text_generation(
+        full_prompt,
+        max_new_tokens=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=True
     )
+    # The pipeline returns a list of generated sequences; get the text from the first one
+    generated_text = outputs[0]["generated_text"]
+    # Extract just the new answer part from the generated text
+    # Since we appended "Assistant:" at the end, the model's response is everything after that
+    answer = generated_text.split("Assistant:")[-1].strip()
+    return answer
+def main():
+    st.title("Streamlit Chatbot with Model Selection")
+    st.markdown(
+        """
+        **System message**: You are a friendly Chatbot created by [ruslanmv.com](https://ruslanmv.com)
+        Below you can select the model, adjust parameters, and begin chatting!
+        """
+    )
+    # Sidebar for model selection and parameters
+    st.sidebar.header("Select Model & Parameters")
+    model_name = st.sidebar.selectbox(
+        "Choose a model:",
+        [
+            "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+            "deepseek-ai/DeepSeek-R1",
+            "deepseek-ai/DeepSeek-R1-Zero"
+        ]
+    )
+    max_new_tokens = st.sidebar.slider(
+        "Max new tokens",
+        min_value=1,
+        max_value=4000,
+        value=1024,
+        step=1
+    )
+    temperature = st.sidebar.slider(
+        "Temperature",
+        min_value=0.1,
+        max_value=4.0,
+        value=1.0,
+        step=0.1
+    )
+    top_p = st.sidebar.slider(
+        "Top-p (nucleus sampling)",
+        min_value=0.1,
+        max_value=1.0,
+        value=0.9,
+        step=0.05
+    )
+    # The system "role" content
+    system_message = (
+        "You are a friendly Chatbot created by ruslanmv.com. "
+        "You answer user questions in a concise and helpful way."
+    )
+    # Load the chosen model
+    text_generation_pipeline = load_text_generation_pipeline(model_name)
+    # We'll keep conversation history in session_state
+    if "conversation" not in st.session_state:
+        st.session_state["conversation"] = []  # List of tuples (speaker, text)
+    # Display conversation so far
+    # Each element in st.session_state["conversation"] is ("user" or "assistant", message_text)
+    for speaker, text in st.session_state["conversation"]:
+        if speaker == "user":
+            st.markdown(f"<div style='text-align:left; color:blue'><strong>User:</strong> {text}</div>", unsafe_allow_html=True)
+        else:
+            st.markdown(f"<div style='text-align:left; color:green'><strong>Assistant:</strong> {text}</div>", unsafe_allow_html=True)
+    # User input text box
+    user_input = st.text_input("Your message", "")
+    # When user hits "Send"
+    if st.button("Send"):
+        if user_input.strip():
+            # 1) Add user query to conversation
+            st.session_state["conversation"].append(("user", user_input.strip()))
+            # 2) Generate a response
+            with st.spinner("Thinking..."):
+                answer = generate_response(
+                    text_generation=text_generation_pipeline,
+                    system_prompt=system_message,
+                    conversation_history=st.session_state["conversation"],
+                    user_query=user_input.strip(),
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p
+                )
+            # 3) Add assistant answer to conversation
+            st.session_state["conversation"].append(("assistant", answer))
+            # 4) Rerun to display
+            st.experimental_rerun()
+    # Optional: Provide a button to clear the conversation
+    if st.button("Clear Conversation"):
+        st.session_state["conversation"] = []
+        st.experimental_rerun()
+if __name__ == "__main__":
+    main()