import streamlit as st from transformers import pipeline from PIL import Image st.title("Multimodal AI App 🤖") st.sidebar.header("🔧 Choose Task") task = st.sidebar.selectbox("📂 Select task", ["🖼️ Visual Question Answering", "🌐 Translate to Urdu", "📖 Story Generator"]) if task == "🖼️ Visual Question Answering": st.header("🖼️ Visual Question Answering") uploaded_file = st.file_uploader("📤 Upload an image", type=["jpg", "png", "jpeg"]) question = st.text_input("❓ Ask a question about the image") if uploaded_file and question: image = Image.open(uploaded_file) if st.button("🔍 Ask Question"): with st.spinner('⏳ Loading VQA model...'): vqa_pipe = pipeline("visual-question-answering", model="dandelin/vilt-b32-finetuned-vqa") result = vqa_pipe(image, question) st.image(image, caption="🖼️ Uploaded Image") st.success(f"✅ **Answer:** {result[0]['answer']}") elif task == "🌐 Translate to Urdu": st.header("🌐 English to Urdu Translation") input_text = st.text_area("✏️ Enter English text") if st.button("🌍 Translate"): with st.spinner('⏳ Loading Translation model...'): translator = pipeline("translation", model="facebook/nllb-200-distilled-600M") translation = translator(input_text, src_lang="eng_Latn", tgt_lang="urd_Arab") st.success(f"✅ **Urdu Translation:** {translation[0]['translation_text']}") elif task == "📖 Story Generator": st.header("📝 Story Generator") prompt = st.text_input("💡 Enter a prompt") if st.button("✍️ Generate Story"): with st.spinner('⏳ Loading Text Generation model...'): text_gen_pipe = pipeline("text-generation", model="openai-community/gpt2") result = text_gen_pipe(prompt, max_length=100, num_return_sequences=1) st.success(f"✅ **Generated Text:** {result[0]['generated_text']}")