import streamlit as st import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login # App title st.set_page_config(page_title="GenAI Chat Bot") # Hugging Face Credentials with st.sidebar: st.title('GenAI Chat Bot') #st.write('This is a generative AI Chat Bot.') # Use Hugging Face API Key from secrets or environment api_key = os.getenv("llama3") if not api_key: st.error("API key is missing!") st.stop() # Authenticate with Hugging Face Hub try: login(api_key) st.success('API successfully authenticated!', icon='✅') except Exception as e: st.error(f"Authentication failed: {e}") st.stop() st.subheader('Models and parameters') # Model selection categories model_options = { "Basic": [ "meta-llama/Llama-3.2-1B" ], "Basic-Medium": [ "meta-llama/Llama-3.2-1B-Instruct" ], "Medium-Fine": [ "meta-llama/Llama-3.2-3B" ], "Finest": [ "meta-llama/Llama-3.2-3B-Instruct" ] } # Select category (default set to "Basic-Medium") selected_category = st.sidebar.selectbox('Select Model Category', ["Basic", "Basic-Medium", "Medium-Fine", "Finest"], index=1) # Set selected model based on category selected_model = model_options[selected_category][0] # Slider inputs for parameters temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.3, step=0.01) top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01) max_length = st.sidebar.slider('max_length', min_value=20, max_value=80, value=65, step=5) st.markdown("Disclaimer: The performance and speed of this GenAI tool depends on the machine configuration and model selection") # Store LLM generated responses if "messages" not in st.session_state.keys(): st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] # Display or clear chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.write(message["content"]) def clear_chat_history(): st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] st.sidebar.button('Clear Chat History', on_click=clear_chat_history) # Load the tokenizer and model try: tokenizer = AutoTokenizer.from_pretrained(selected_model) model = AutoModelForCausalLM.from_pretrained(selected_model, torch_dtype=torch.bfloat16, device_map="auto") except Exception as e: st.error(f"Error loading model: {e}") st.stop() # Function for generating response using Hugging Face model def generate_huggingface_response(prompt_input): inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device) try: # Generate response from the model with torch.no_grad(): outputs = model.generate(inputs["input_ids"], max_new_tokens=max_length, temperature=temperature, top_p=top_p, do_sample=True) # Decode the generated response response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response except Exception as e: st.error(f"Error generating response: {e}") return "Oops! Something went wrong." # User-provided prompt if prompt := st.chat_input(disabled=not api_key): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.write(prompt) # Generate a new response if last message is not from assistant if st.session_state.messages[-1]["role"] != "assistant": with st.chat_message("assistant"): with st.spinner("Thinking..."): response = generate_huggingface_response(prompt) placeholder = st.empty() full_response = '' for item in response: full_response += item placeholder.markdown(full_response) placeholder.markdown(full_response) message = {"role": "assistant", "content": full_response} st.session_state.messages.append(message)