Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import spaces | |
| from PIL import Image | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| model_name = "Lin-Chen/ShareCaptioner" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, device_map="cpu", torch_dtype=torch.float16, trust_remote_code=True).eval() | |
| model.tokenizer = tokenizer | |
| model.cuda() | |
| seg1 = '<|User|>:' | |
| seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:' | |
| seg_emb1 = model.encode_text(seg1, add_special_tokens=True).cuda() | |
| seg_emb2 = model.encode_text(seg2, add_special_tokens=False).cuda() | |
| def detailed_caption(img_path): | |
| subs = [] | |
| image = Image.open(img_path).convert("RGB") | |
| subs.append(model.vis_processor(image).unsqueeze(0)) | |
| subs = torch.cat(subs, dim=0).cuda() | |
| tmp_bs = subs.shape[0] | |
| tmp_seg_emb1 = seg_emb1.repeat(tmp_bs, 1, 1) | |
| tmp_seg_emb2 = seg_emb2.repeat(tmp_bs, 1, 1) | |
| with torch.cuda.amp.autocast(): | |
| with torch.no_grad(): | |
| subs = model.encode_img(subs) | |
| input_emb = torch.cat([tmp_seg_emb1, subs, tmp_seg_emb2], dim=1) | |
| out_embeds = model.internlm_model.generate(inputs_embeds=input_emb, | |
| max_length=500, | |
| num_beams=3, | |
| min_length=1, | |
| do_sample=True, | |
| repetition_penalty=1.5, | |
| length_penalty=1.0, | |
| temperature=1., | |
| eos_token_id=model.tokenizer.eos_token_id, | |
| num_return_sequences=1, | |
| ) | |
| return model.decode_text([out_embeds[0]]) | |
| block_css = """ | |
| #buttons button { | |
| min-width: min(120px,100%); | |
| } | |
| """ | |
| title_markdown = (""" | |
| # π¬ ShareGPT4V: Improving Large Multi-modal Models with Better Captions | |
| [[Project Page](https://sharegpt4v.github.io/)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)] | [[Paper](https://github.com/InternLM/InternLM-XComposer/blob/main/projects/ShareGPT4V/ShareGPT4V.pdf)] | |
| """) | |
| tos_markdown = (""" | |
| ### Terms of use | |
| By using this service, users are required to agree to the following terms: | |
| The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. | |
| For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality. | |
| """) | |
| learn_more_markdown = (""" | |
| ### License | |
| The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation. | |
| """) | |
| ack_markdown = (""" | |
| ### Acknowledgement | |
| The template for this web demo is from [LLaVA](https://github.com/haotian-liu/LLaVA), and we are very grateful to LLaVA for their open source contributions to the community! | |
| """) | |
| def build_demo(): | |
| with gr.Blocks(title="Share-Captioner", theme=gr.themes.Default(), css=block_css) as demo: | |
| gr.Markdown(title_markdown) | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| with gr.Row(elem_id="Model ID"): | |
| gr.Dropdown( | |
| choices=['Share-Captioner'], | |
| value='Share-Captioner', | |
| interactive=True, | |
| label='Model ID', | |
| container=False) | |
| img_path = gr.Image(label="Image", type="filepath") | |
| with gr.Column(scale=8): | |
| with gr.Row(): | |
| caption = gr.Textbox(label='Caption') | |
| with gr.Row(): | |
| submit_btn = gr.Button( | |
| value="π Generate", variant="primary") | |
| regenerate_btn = gr.Button(value="π Regenerate") | |
| gr.Markdown(tos_markdown) | |
| gr.Markdown(learn_more_markdown) | |
| gr.Markdown(ack_markdown) | |
| submit_btn.click(detailed_caption, inputs=[ | |
| img_path], outputs=[caption]) | |
| regenerate_btn.click(detailed_caption, inputs=[ | |
| img_path], outputs=[caption]) | |
| return demo | |
| if __name__ == '__main__': | |
| demo = build_demo() | |
| demo.launch() | |