NanaAkwasiAbayieBoateng commited on
Commit
61211fa
·
1 Parent(s): 180d9f2

Add application file

Browse files
Files changed (2) hide show
  1. huggingface_streamlit.py +69 -0
  2. requirements.txt +16 -0
huggingface_streamlit.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import asyncio
4
+
5
+ # Create a Streamlit app object
6
+ st.sidebar.title("Lmstudio Demo")
7
+
8
+ # Model selection options
9
+ model_options = ["HuggingFaceTB/SmolLM-1.7B-Instruct", "HuggingFaceTB/SmolLM-360M-Instruct", "HuggingFaceTB/SmolLM-135M-Instruct"]
10
+ default_model_option = model_options[1]
11
+ model_selectbox = st.sidebar.selectbox("Language Model", options=model_options, index=1)
12
+
13
+ # Temperature slider
14
+ temperature_value = 0.5
15
+ temperature_slider = st.sidebar.slider("Temperature", min_value=0.0, max_value=1.0, step=0.05, value=temperature_value, help="Controls the randomness of the generated text.")
16
+
17
+ # Seed slider
18
+ seed_value = 5238
19
+ seed_slider = st.sidebar.slider("Seed", min_value=0, max_value=99999, step=1, value=seed_value, help="Controls randomness of token selection.")
20
+
21
+ # Top P slider
22
+ top_p_value = 0.75
23
+ top_p_slider = st.sidebar.slider("Top P", min_value=0.0, max_value=1.0, step=0.05, value=top_p_value, help="Controls randomness and creativity.")
24
+
25
+ # Response token length slider
26
+ length_value = 256
27
+ response_token_length_slider = st.sidebar.slider("Response Token", min_value=1, max_value=99999, step=16, value=length_value, help="Maximum tokens in response.")
28
+
29
+ # Device selection
30
+ device_type = "cpu"
31
+ device_selectbox = st.sidebar.selectbox("Device Type", options=["cpu", "gpu"], index=0)
32
+
33
+ # Asynchronous text generation function
34
+ async def generate_response(prompt, device, model, top_p, temperature, max_new_tokens):
35
+ tokenizer = AutoTokenizer.from_pretrained(model)
36
+ model_instance = AutoModelForCausalLM.from_pretrained(model).to(device)
37
+ messages = [{"role": "user", "content": prompt}]
38
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False)
39
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
40
+
41
+ # Use asyncio.to_thread to run the blocking model.generate in a separate thread
42
+ loop = asyncio.get_running_loop()
43
+ output = await loop.run_in_executor(None, lambda: model_instance.generate(inputs, temperature=temperature, top_p=top_p, max_new_tokens=max_new_tokens, do_sample=True))
44
+
45
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
46
+ return generated_text
47
+
48
+ async def main():
49
+ st.title("Huggingface LLM App")
50
+ user_input = st.text_input("Enter your prompt:")
51
+
52
+ if st.button("Generate"):
53
+ if user_input:
54
+ with st.spinner("Generating response..."):
55
+ response = await generate_response(
56
+ prompt=user_input,
57
+ device=device_selectbox,
58
+ model=model_selectbox,
59
+ top_p=top_p_slider,
60
+ temperature=temperature_slider,
61
+ max_new_tokens=response_token_length_slider,
62
+ )
63
+ st.write("Model Response:")
64
+ st.write(response)
65
+ else:
66
+ st.warning("Please enter a prompt.")
67
+
68
+ if __name__ == "__main__":
69
+ asyncio.run(main())
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chromadb==0.6.1
2
+ langchain==0.3.13
3
+ langchain-community==0.3.13
4
+ langchain-core==0.3.28
5
+ langchain-ollama==0.2.2
6
+ langchain-text-splitters==0.3.4
7
+ langsmith==0.2.4
8
+ llvmlite==0.43.0
9
+ ollama==0.4.4
10
+ pydantic==2.10.4
11
+ pydantic-settings==2.7.0
12
+ pydantic_core==2.27.2
13
+ streamlit==1.33.0
14
+ sentencepiece==0.2.0
15
+ asyncio==3.4.3
16
+ transformers==4.47.1