Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from PIL import Image | |
| from transformers import BlipProcessor, BlipForQuestionAnswering | |
| # Model Loading | |
| processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large") | |
| model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large") | |
| # Streamlit App Structure | |
| st.title("Visual Question Answering ") | |
| def get_image(): | |
| img_url = st.text_input("Enter Image URL", value='https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg') | |
| if img_url: | |
| raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB') | |
| st.image(raw_image) | |
| return raw_image | |
| def process_vqa(image, question): | |
| if image and question: | |
| inputs = processor(image, question, return_tensors="pt") | |
| output = model.generate(**inputs) | |
| answer = processor.decode(output[0], skip_special_tokens=True) | |
| st.write("Answer:", answer) | |
| # User Input | |
| image = get_image() | |
| question = st.text_input("Ask your question about the image:") | |
| # Process Question and Generate Answer | |
| process_vqa(image, question) | |