| import streamlit as st | |
| from huggingface_hub import InferenceClient | |
| # MUST SET HF_TOKEN IN STREAMLIT SETTINGS IN HUGGINGFACE REPO SECRETS | |
| HF_TOKEN = st.secrets["HF_TOKEN"] | |
| # INIT THE INFERENCE CLIENT WITH YOUR HF TOKEN | |
| client = InferenceClient( | |
| provider="hf-inference", | |
| api_key=HF_TOKEN, | |
| ) | |
| # THIS IS JUST THE streamlit TEXT INPUT WIDGET | |
| user_input = st.text_input( | |
| "Place your prompt here", | |
| "This is a placeholder", | |
| key="placeholder", | |
| ) | |
| # THIS IS THE INFERENCE CLIENT CALL | |
| completion = client.chat.completions.create( | |
| model="HuggingFaceH4/zephyr-7b-beta", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": user_input | |
| } | |
| ], | |
| max_tokens=512, | |
| ) | |
| # THIS IS THE RESPONSE FROM THE INFERENCE CLIENT | |
| ai_response = completion.choices[0].message.content | |
| # THIS IS THE STREAMLIT TEXT OUTPUT WIDGET WITH THE RESPONSE FROM THE INFERENCE CLIENT | |
| st.text(ai_response) | |
| ### WRONG WAY TO TRY AND LOAD MODELS::: | |
| # Load model directly | |
| # from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Prover-V2-671B", trust_remote_code=True) | |
| # model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Prover-V2-671B", trust_remote_code=True) |