Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b | |
| import gradio as gr | |
| import re | |
| from PIL import Image | |
| import torch | |
| from io import BytesIO | |
| import hashlib | |
| import os | |
| from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM | |
| DESCRIPTION = '''# <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">Ziya-Blip2-14B</a>''' | |
| MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.' | |
| MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误,请关闭代理并重试。\n提示2: 如果你上传了很大的图片,比如10MB大小,那将需要一些时间来上传和处理,请耐心等待。' | |
| NOTES = 'This app is adapted from <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1</a>. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from <a href="https://huggingface.co/spaces/lykeven/visualglm-6b">lykeven/visualglm-6b</a>.' | |
| import json | |
| default_chatbox = [] | |
| def is_chinese(text): | |
| zh_pattern = re.compile(u'[\u4e00-\u9fa5]+') | |
| return zh_pattern.search(text) | |
| AUTH_TOKEN = os.getenv("AUTH_TOKEN") | |
| LM_MODEL_PATH = "wuxiaojun/Ziya-LLaMA-13B-v1" | |
| # LM_MODEL_PATH = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-LLaMA-13B-v1" | |
| lm_model = LlamaForCausalLM.from_pretrained( | |
| LM_MODEL_PATH, | |
| device_map="auto", | |
| torch_dtype=torch.float16, | |
| use_auth_token=AUTH_TOKEN, | |
| quantization_config=BitsAndBytesConfig(load_in_4bit=True)) | |
| TOKENIZER_PATH = "IDEA-CCNL/Ziya-LLaMA-13B-v1" | |
| # TOKENIZER_PATH = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-LLaMA-13B-v1" | |
| # tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH, use_auth_token=AUTH_TOKEN) | |
| tokenizer = LlamaTokenizer.from_pretrained(TOKENIZER_PATH) | |
| # visual model | |
| OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073] | |
| OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711] | |
| # demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1" | |
| visual_model_path = "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1" | |
| # visual_model_path = "/cognitive_comp/wuxiaojun/pretrained/pytorch/huggingface/Ziya-BLIP2-14B-Visual-v1" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| visual_model_path, | |
| trust_remote_code=True, use_auth_token=AUTH_TOKEN, | |
| torch_dtype=torch.float16) | |
| model.cuda() # if you use on cpu, comment this line | |
| model.language_model = lm_model | |
| image_size = model.config.vision_config.image_size | |
| image_processor = BlipImageProcessor( | |
| size={"height": image_size, "width": image_size}, | |
| image_mean=OPENAI_CLIP_MEAN, | |
| image_std=OPENAI_CLIP_STD, | |
| ) | |
| def post( | |
| input_text, | |
| temperature, | |
| top_p, | |
| image_prompt, | |
| result_previous, | |
| hidden_image | |
| ): | |
| result_text = [(ele[0], ele[1]) for ele in result_previous] | |
| previous_querys = [] | |
| previous_outputs = [] | |
| for i in range(len(result_text)-1, -1, -1): | |
| if result_text[i][0] == "": | |
| del result_text[i] | |
| else: | |
| previous_querys.append(result_text[i][0]) | |
| previous_outputs.append(result_text[i][1]) | |
| is_zh = is_chinese(input_text) | |
| if image_prompt is None: | |
| print("Image empty") | |
| if is_zh: | |
| result_text.append((input_text, '图片为空!请上传图片并重试。')) | |
| else: | |
| result_text.append((input_text, 'Image empty! Please upload a image and retry.')) | |
| return input_text, result_text, hidden_image | |
| elif input_text == "": | |
| print("Text empty") | |
| result_text.append((input_text, 'Text empty! Please enter text and retry.')) | |
| return "", result_text, hidden_image | |
| generate_config = { | |
| "max_new_tokens": 128, | |
| "top_p": top_p, | |
| "temperature": temperature, | |
| "repetition_penalty": 1.18, | |
| } | |
| img = Image.open(image_prompt) | |
| pixel_values = image_processor( | |
| img, | |
| return_tensors="pt").pixel_values.to( | |
| model.device).to(model.dtype) | |
| output_buffer = BytesIO() | |
| img.save(output_buffer, "PNG") | |
| byte_data = output_buffer.getvalue() | |
| md = hashlib.md5() | |
| md.update(byte_data) | |
| img_hash = md.hexdigest() | |
| if img_hash != hidden_image: | |
| previous_querys = [] | |
| previous_outputs = [] | |
| result_text = [] | |
| answer = model.chat( | |
| tokenizer=tokenizer, | |
| pixel_values=pixel_values, | |
| query=input_text, | |
| previous_querys=previous_querys, | |
| previous_outputs=previous_outputs, | |
| **generate_config, | |
| ) | |
| result_text.append((input_text, answer)) | |
| print(result_text) | |
| return "", result_text, img_hash | |
| def clear_fn(value): | |
| return "", default_chatbox, None | |
| def clear_fn2(value): | |
| return default_chatbox | |
| def io_fn(a, b, c): | |
| print(f"call io_fn") | |
| return a, b | |
| def change_language(value): | |
| if value == "Change hint to English": | |
| return "提示变为中文", MAINTENANCE_NOTICE1 | |
| else: | |
| return "Change hint to English", MAINTENANCE_NOTICE2 | |
| def main(): | |
| gr.close_all() | |
| examples = [] | |
| with open("./examples/example_inputs.jsonl") as f: | |
| for line in f: | |
| data = json.loads(line) | |
| examples.append(data) | |
| with gr.Blocks(css='style.css') as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=4.5): | |
| with gr.Group(): | |
| input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.') | |
| with gr.Row(): | |
| run_button = gr.Button('Generate') | |
| clear_button = gr.Button('Clear') | |
| image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None) | |
| with gr.Row(): | |
| temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature') | |
| top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P') | |
| with gr.Group(): | |
| with gr.Row(): | |
| with gr.Column(scale=7): | |
| maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1) | |
| with gr.Column(scale=2): | |
| change_button = gr.Button('Change hint to English', visible=False) | |
| with gr.Column(scale=5.5): | |
| result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550) | |
| hidden_image_hash = gr.Textbox(visible=False) | |
| gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples], | |
| inputs=[input_text, image_prompt], | |
| label="Example Inputs (Click to insert an examplet into the input box)", | |
| examples_per_page=3) | |
| gr.Markdown(NOTES) | |
| print(gr.__version__) | |
| run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash], | |
| outputs=[input_text, result_text, hidden_image_hash]) | |
| input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash], | |
| outputs=[input_text, result_text, hidden_image_hash]) | |
| clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt]) | |
| image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) | |
| image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) | |
| print(gr.__version__) | |
| demo.queue(concurrency_count=10) | |
| demo.launch(server_name="0.0.0.0") | |
| if __name__ == '__main__': | |
| main() |