Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from PIL import Image | |
| import io | |
| from gtts import gTTS | |
| import tempfile | |
| st.title("🖼️ → 📖 Image-to-Story Demo") | |
| st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!") | |
| def load_captioner(): | |
| return pipeline("image-to-text", model="unography/blip-large-long-cap") | |
| def load_story_gen(): | |
| return pipeline("text-generation", model="gpt2", tokenizer="gpt2") | |
| captioner = load_captioner() | |
| story_gen = load_story_gen() | |
| uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image") | |
| if uploaded: | |
| img = Image.open(uploaded) | |
| st.image(img, use_column_width=True) | |
| # Caption | |
| if "caption" not in st.session_state: | |
| with st.spinner("Generating caption…"): | |
| caps = captioner(img) | |
| st.session_state.caption = caps[0] if isinstance(caps, list) else caps | |
| st.write("**Caption:**", st.session_state.caption) | |
| # Story | |
| if "story" not in st.session_state: | |
| with st.spinner("Spinning up a story…"): | |
| out = story_gen( | |
| st.session_state.caption, | |
| max_length=200, | |
| num_return_sequences=1, | |
| do_sample=True, | |
| top_p=0.9 | |
| ) | |
| st.session_state.story = out[0]["generated_text"] | |
| st.write("**Story:**", st.session_state.story) | |
| # Prepare audio bytes once | |
| if "audio_bytes" not in st.session_state: | |
| with st.spinner("Generating audio…"): | |
| tts = gTTS(text=st.session_state.story, lang="en") | |
| buf = io.BytesIO() | |
| tts.write_to_fp(buf) | |
| st.session_state.audio_bytes = buf.getvalue() | |
| # Play button | |
| if st.button("🔊 Play Story Audio"): | |
| # Write to a temp file | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tmp.write(st.session_state.audio_bytes) | |
| tmp.flush() | |
| tmp_path = tmp.name | |
| tmp.close() | |
| # Stream it | |
| st.audio(tmp_path, format="audio/mp3") | |