model_explorer4

Paused

App Files Files Community

donb-hf commited on Jun 3, 2024

Commit

ad6330a

1 Parent(s): a6d3ba4

refactor app

Browse files

Files changed (4) hide show

.gitignore +8 -0
app.py +11 -60
requirements.txt +3 -1
utils.py +73 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,10 @@
 .venv/
 .python-version

+# Ignore Python cache files
+__pycache__/
+*.py[cod]
+# Ignore virtual environment
 .venv/
+# Ignore environment-specific files
+.env
 .python-version

app.py CHANGED Viewed

@@ -1,65 +1,13 @@
 import gradio as gr
-import os, requests
-import torch, torchvision, einops
-import spaces
-import subprocess
-from transformers import AutoModelForCausalLM, AutoModel, AutoModelForVision2Seq, PaliGemmaForConditionalGeneration, LlavaForConditionalGeneration, LlavaNextForConditionalGeneration
-from huggingface_hub import login
 # Install required package
-subprocess.run(
-    "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    shell=True,
-)
 HF_TOKEN = os.getenv("HF_TOKEN")
-login(token=HF_TOKEN, add_to_git_credential=True)
-# Cache for storing loaded models and their summaries
-model_cache = {}
-# Function to get the model summary
-@spaces.GPU
-def get_model_summary(model_name):
-    if model_name in model_cache:
-        return model_cache[model_name], ""
-    try:
-        # Fetch the config.json file
-        config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
-        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
-        response = requests.get(config_url, headers=headers)
-        response.raise_for_status()
-        config = response.json()
-        architecture = config["architectures"][0]
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Select the correct model class based on the architecture
-        if architecture == "LlavaNextForConditionalGeneration":
-            from transformers import LlavaNextForConditionalGeneration
-            model = LlavaNextForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
-        elif architecture == "LlavaForConditionalGeneration":
-            from transformers import LlavaForConditionalGeneration
-            model = LlavaForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
-        elif architecture == "PaliGemmaForConditionalGeneration":
-            from transformers import PaliGemmaForConditionalGeneration
-            model = PaliGemmaForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
-        elif architecture == "Idefics2ForConditionalGeneration":
-            from transformers import Idefics2ForConditionalGeneration
-            model = Idefics2ForConditionalGeneration.from_pretrained(model_name, trust_remote_code=True).to(device)
-        elif architecture == "MiniCPMV":
-            from transformers import MiniCPMV
-            model = MiniCPMV.from_pretrained(model_name, trust_remote_code=True).to(device)
-        else:
-            model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
-        model_summary = str(model)
-        model_cache[model_name] = model_summary
-        return model_summary, ""
-    except Exception as e:
-        return "", str(e)
 # Create the Gradio Blocks interface
 with gr.Blocks() as demo:
@@ -69,13 +17,14 @@ with gr.Blocks() as demo:
             gr.Markdown("### Vision Models")
             vision_examples = gr.Examples(
                 examples=[
                     ["llava-hf/llava-v1.6-mistral-7b-hf"],
                     ["xtuner/llava-phi-3-mini-hf"],
                     ["xtuner/llava-llama-3-8b-v1_1-transformers"],
                     ["vikhyatk/moondream2"],
                     ["openbmb/MiniCPM-Llama3-V-2_5"],
                     ["microsoft/Phi-3-vision-128k-instruct"],
-                    ["google/paligemma-3b-mix-224"],
                     ["HuggingFaceM4/idefics2-8b-chatty"],
                     ["microsoft/llava-med-v1.5-mistral-7b"]
                 ],
@@ -85,10 +34,12 @@ with gr.Blocks() as demo:
             gr.Markdown("### Other Models")
             other_examples = gr.Examples(
                 examples=[
                     ["google/gemma-7b"],
                     ["microsoft/Phi-3-mini-4k-instruct"],
-                    ["meta-llama/Meta-Llama-3-8B"],
-                    ["mistralai/Mistral-7B-Instruct-v0.3"]
                 ],
                 inputs=textbox
             )

+import os
 import gradio as gr
+from utils import get_model_summary, install_flash_attn, authenticate_hf
 # Install required package
+install_flash_attn()
+# Authenticate with Hugging Face
 HF_TOKEN = os.getenv("HF_TOKEN")
+authenticate_hf(HF_TOKEN)
 # Create the Gradio Blocks interface
 with gr.Blocks() as demo:
             gr.Markdown("### Vision Models")
             vision_examples = gr.Examples(
                 examples=[
+                    ["google/paligemma-3b-mix-224"],
+                    ["google/paligemma-3b-ft-refcoco-seg-224"],
                     ["llava-hf/llava-v1.6-mistral-7b-hf"],
                     ["xtuner/llava-phi-3-mini-hf"],
                     ["xtuner/llava-llama-3-8b-v1_1-transformers"],
                     ["vikhyatk/moondream2"],
                     ["openbmb/MiniCPM-Llama3-V-2_5"],
                     ["microsoft/Phi-3-vision-128k-instruct"],
                     ["HuggingFaceM4/idefics2-8b-chatty"],
                     ["microsoft/llava-med-v1.5-mistral-7b"]
                 ],
             gr.Markdown("### Other Models")
             other_examples = gr.Examples(
                 examples=[
+                    ["dwb2023/mistral-7b-instruct-quantized"],
+                    ["mistralai/Mistral-7B-Instruct-v0.2"],
+                    ["mistralai/Mistral-7B-Instruct-v0.3"],
                     ["google/gemma-7b"],
                     ["microsoft/Phi-3-mini-4k-instruct"],
+                    ["meta-llama/Meta-Llama-3-8B"]
                 ],
                 inputs=textbox
             )

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 git+https://github.com/huggingface/transformers.git
 spaces
 torchvision
-einops

 git+https://github.com/huggingface/transformers.git
 spaces
 torchvision
+einops
+accelerate
+bitsandbytes

utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import subprocess
+import os, requests
+import torch, torchvision
+from huggingface_hub import login
+from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration
+# Install required package
+def install_flash_attn():
+    subprocess.run(
+        "pip install flash-attn --no-build-isolation",
+        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+        shell=True,
+    )
+# Authenticate with Hugging Face
+def authenticate_hf(token):
+    login(token=token, add_to_git_credential=True)
+# Function to get the model summary
+model_cache = {}
+def get_model_summary(model_name):
+    if model_name in model_cache:
+        return model_cache[model_name], ""
+    try:
+        # Fetch the config.json file
+        config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
+        headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
+        response = requests.get(config_url, headers=headers)
+        response.raise_for_status()
+        config = response.json()
+        architecture = config["architectures"][0]
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Check if the model is quantized
+        is_quantized = "quantized" in model_name.lower()
+        # Set up BitsAndBytesConfig if the model is quantized
+        bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None
+        # Load the model based on its architecture and quantization status
+        if architecture == "LlavaNextForConditionalGeneration":
+            model = LlavaNextForConditionalGeneration.from_pretrained(
+                model_name, config=bnb_config, trust_remote_code=True
+            )
+        elif architecture == "LlavaForConditionalGeneration":
+            model = LlavaForConditionalGeneration.from_pretrained(
+                model_name, config=bnb_config, trust_remote_code=True
+            )
+        elif architecture == "PaliGemmaForConditionalGeneration":
+            model = PaliGemmaForConditionalGeneration.from_pretrained(
+                model_name, config=bnb_config, trust_remote_code=True
+            )
+        elif architecture == "Idefics2ForConditionalGeneration":
+            model = Idefics2ForConditionalGeneration.from_pretrained(
+                model_name, config=bnb_config, trust_remote_code=True
+            )
+        else:
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name, config=bnb_config, trust_remote_code=True
+            )
+        # Move to device only if the model is not quantized
+        if not is_quantized:
+            model = model.to(device)
+        model_summary = str(model)
+        model_cache[model_name] = model_summary
+        return model_summary, ""
+    except Exception as e:
+        return "", str(e)