bnb-my-repo

Running on A10G

App Files Files Community

MekkCyber commited on Mar 17

Commit

00178b2

1 Parent(s): 7bf7dc3

final maybe

Browse files

Files changed (2) hide show

app.py +221 -54
app_claude.py +385 -457

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
@@ -17,14 +17,17 @@ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) ->
         return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
     return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
-def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
         model_names = [model.id for model in models]
-        if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
-        else :
             repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
         if repo_name in model_names:
@@ -34,7 +37,10 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name,
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
-def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
     model_card = f"""---
 base_model:
 - {model_name}
@@ -58,23 +64,31 @@ You can use this model in your applications by loading it directly from the Hugg
 from transformers import AutoModel
 model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
-def load_model(model_name, quantization_config, auth_token) :
-    return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
 DTYPE_MAPPING = {
     "int8": torch.int8,
     "uint8": torch.uint8,
     "float16": torch.float16,
-    "float32": torch.float32,
     "bfloat16": torch.bfloat16,
 }
-def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
-    print(f"Quantizing model: {quant_type_4}")
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type=quant_type_4,
@@ -83,61 +97,114 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
         bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
     )
-    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
-    for _ , module in model.named_modules():
         if isinstance(module, Linear4bit):
             module.to("cuda")
             module.to("cpu")
     return model
-def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
-    print("Saving quantized model")
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
-        if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
-        else :
             repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
-        model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
         # Push to Hub
         api = HfApi(token=auth_token.token)
         api.create_repo(repo_name, exist_ok=True, private=not public)
         api.upload_folder(
             folder_path=tmpdirname,
             repo_id=repo_name,
             repo_type="model",
         )
     # Get model architecture as string
     import io
     from contextlib import redirect_stdout
     import html
     # Capture the model architecture string
     f = io.StringIO()
     with redirect_stdout(f):
         print(model)
     model_architecture_str = f.getvalue()
     # Escape HTML characters and format with line breaks
-    model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
     # Format it for display in markdown with proper styling
     model_architecture_info = f"""
     <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
         <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
     </div>
     """
     return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
-def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
-    if oauth_token is None :
         return """
         <div class="error-box">
             <h3>❌ Authentication Error</h3>
@@ -150,9 +217,11 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
             <h3>❌ Authentication Error</h3>
             <p>Please sign in to your HuggingFace account to use the quantizer.</p>
         </div>
-        """
-    exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
-    if exists_message :
         return f"""
         <div class="warning-box">
             <h3>⚠️ Model Already Exists</h3>
@@ -160,10 +229,35 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
         </div>
         """
     try:
-        quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
-        return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
-    except Exception as e :
-        error_message = str(e).replace('\n', '<br/>')
         return f"""
         <div class="error-box">
             <h3>❌ Error Occurred</h3>
@@ -172,7 +266,7 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
         """
-css="""/* Custom CSS to allow scrolling */
 .gradio-container {overflow-y: auto;}
 /* Fix alignment for radio buttons and checkboxes */
@@ -342,7 +436,40 @@ button[variant="primary"]:hover {
 #toggle-button:hover::after {
     left: 100%;
 }
 """
@@ -358,8 +485,8 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
     m1 = gr.Markdown()
     demo.load(hello, inputs=None, outputs=m1)
-    instructions_visible = gr.State(False)
     with gr.Row():
         with gr.Column():
@@ -370,7 +497,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
                     search_type="model",
                 )
             with gr.Row():
-                with gr.Column():
                     gr.Markdown(
                         """
                         ### ⚙️ Model Quantization Type Settings
@@ -381,21 +508,21 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
                         choices=["fp4", "nf4"],
                         value="nf4",
                         visible=True,
-                        show_label=False
                     )
                     compute_type_4 = gr.Dropdown(
                         info="The compute type for the model",
                         choices=["float16", "bfloat16", "float32"],
                         value="bfloat16",
                         visible=True,
-                        show_label=False
                     )
                     quant_storage_4 = gr.Dropdown(
                         info="The storage type for the model",
                         choices=["float16", "float32", "int8", "uint8", "bfloat16"],
                         value="uint8",
                         visible=True,
-                        show_label=False
                     )
                     gr.Markdown(
                         """
@@ -404,11 +531,11 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
                     )
                     with gr.Row(elem_classes="option-row"):
                         double_quant_4 = gr.Radio(
-                            ["True", "False"],
-                            info="Use Double Quant",
-                            visible=True,
                             value="True",
-                            show_label=False
                         )
                     gr.Markdown(
                         """
@@ -424,26 +551,66 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
                             elem_classes="model-name-textbox",
                             show_label=False,
                         )
                     with gr.Row():
                         public = gr.Checkbox(
                             label="🌐 Make model public",
                             info="If checked, the model will be publicly accessible",
                             value=True,
                             interactive=True,
-                            show_label=True
                         )
         with gr.Column():
-            quantize_button = gr.Button("🚀 Quantize and Push to the Hub", variant="primary")
-            output_link = gr.Markdown("🔗 Quantized Model", container=True, min_height=100)
     quantize_button.click(
         fn=quantize_and_save,
-        inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
-        outputs=[output_link]
     )
 if __name__ == "__main__":
     demo.launch(share=True)

 import gradio as gr
 import torch
+from transformers import AutoModel, BitsAndBytesConfig
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
         return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
     return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
+def check_model_exists(
+    oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name
+):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
         model_names = [model.id for model in models]
+        if quantized_model_name:
             repo_name = f"{username}/{quantized_model_name}"
+        else:
             repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
         if repo_name in model_names:
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
+def create_model_card(
+    model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4
+):
     model_card = f"""---
 base_model:
 - {model_name}
 from transformers import AutoModel
 model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
 DTYPE_MAPPING = {
     "int8": torch.int8,
     "uint8": torch.uint8,
     "float16": torch.float16,
+    "float32": torch.float32,
     "bfloat16": torch.bfloat16,
 }
+def quantize_model(
+    model_name,
+    quant_type_4,
+    double_quant_4,
+    compute_type_4,
+    quant_storage_4,
+    auth_token=None,
+    progress=gr.Progress(),
+):
+    progress(0, desc="Loading model")
+    # Configure quantization
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type=quant_type_4,
         bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
     )
+    # Load model
+    model = AutoModel.from_pretrained(
+        model_name,
+        quantization_config=quantization_config,
+        device_map="cpu",
+        use_auth_token=auth_token.token,
+        torch_dtype=torch.bfloat16,
+    )
+    progress(0.33, desc="Quantizing")
+    # Quantize model
+    modules = list(model.named_modules())
+    for idx, (_, module) in enumerate(modules):
         if isinstance(module, Linear4bit):
             module.to("cuda")
             module.to("cpu")
+        progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
+    progress(0.66, desc="Quantized successfully")
     return model
+def save_model(
+    model,
+    model_name,
+    quant_type_4,
+    double_quant_4,
+    compute_type_4,
+    quant_storage_4,
+    username=None,
+    auth_token=None,
+    quantized_model_name=None,
+    public=False,
+    progress=gr.Progress(),
+):
+    progress(0.67, desc="Preparing to push")
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Save model
+        model.save_pretrained(
+            tmpdirname, safe_serialization=True, use_auth_token=auth_token.token
+        )
+        progress(0.75, desc="Preparing to push")
+        # Prepare repo name and model card
+        if quantized_model_name:
             repo_name = f"{username}/{quantized_model_name}"
+        else:
             repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
+        model_card = create_model_card(
+            repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4
+        )
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
+        progress(0.80, desc="Model card created")
         # Push to Hub
         api = HfApi(token=auth_token.token)
         api.create_repo(repo_name, exist_ok=True, private=not public)
+        progress(0.85, desc="Pushing to Hub")
+        # Upload files
         api.upload_folder(
             folder_path=tmpdirname,
             repo_id=repo_name,
             repo_type="model",
         )
+        progress(1.00, desc="Model pushed to Hub")
     # Get model architecture as string
     import io
     from contextlib import redirect_stdout
     import html
     # Capture the model architecture string
     f = io.StringIO()
     with redirect_stdout(f):
         print(model)
     model_architecture_str = f.getvalue()
     # Escape HTML characters and format with line breaks
+    model_architecture_str_html = html.escape(model_architecture_str).replace(
+        "\n", "<br/>"
+    )
     # Format it for display in markdown with proper styling
     model_architecture_info = f"""
     <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
         <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
     </div>
     """
     return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
+def quantize_and_save(
+    profile: gr.OAuthProfile | None,
+    oauth_token: gr.OAuthToken | None,
+    model_name,
+    quant_type_4,
+    double_quant_4,
+    compute_type_4,
+    quant_storage_4,
+    quantized_model_name,
+    public,
+    progress=gr.Progress(),
+):
+    if oauth_token is None:
         return """
         <div class="error-box">
             <h3>❌ Authentication Error</h3>
             <h3>❌ Authentication Error</h3>
             <p>Please sign in to your HuggingFace account to use the quantizer.</p>
         </div>
+        """
+    exists_message = check_model_exists(
+        oauth_token, profile.username, model_name, quantized_model_name
+    )
+    if exists_message:
         return f"""
         <div class="warning-box">
             <h3>⚠️ Model Already Exists</h3>
         </div>
         """
     try:
+        # Download phase
+        progress(0, desc="Starting quantization process")
+        quantized_model = quantize_model(
+            model_name,
+            quant_type_4,
+            double_quant_4,
+            compute_type_4,
+            quant_storage_4,
+            oauth_token,
+            progress,
+        )
+        final_message = save_model(
+            quantized_model,
+            model_name,
+            quant_type_4,
+            double_quant_4,
+            compute_type_4,
+            quant_storage_4,
+            profile.username,
+            oauth_token,
+            quantized_model_name,
+            public,
+            progress,
+        )
+        return final_message
+    except Exception as e:
+        error_message = str(e).replace("\n", "<br/>")
         return f"""
         <div class="error-box">
             <h3>❌ Error Occurred</h3>
         """
+css = """/* Custom CSS to allow scrolling */
 .gradio-container {overflow-y: auto;}
 /* Fix alignment for radio buttons and checkboxes */
 #toggle-button:hover::after {
     left: 100%;
 }
+/* Progress Bar Styles */
+.progress-container {
+    font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    padding: 20px;
+    background: white;
+    border-radius: 12px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+}
+.progress-stage {
+    font-size: 0.9rem;
+    font-weight: 600;
+    color: #64748b;
+}
+.progress-stage .stage {
+    position: relative;
+    padding: 8px 12px;
+    border-radius: 6px;
+    background: #f1f5f9;
+    transition: all 0.3s ease;
+}
+.progress-stage .stage.completed {
+    background: #ecfdf5;
+}
+.progress-bar {
+    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+.progress {
+    transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1);
+    box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);
+}
 """
     m1 = gr.Markdown()
     demo.load(hello, inputs=None, outputs=m1)
+    instructions_visible = gr.State(False)
     with gr.Row():
         with gr.Column():
                     search_type="model",
                 )
             with gr.Row():
+                with gr.Column():
                     gr.Markdown(
                         """
                         ### ⚙️ Model Quantization Type Settings
                         choices=["fp4", "nf4"],
                         value="nf4",
                         visible=True,
+                        show_label=False,
                     )
                     compute_type_4 = gr.Dropdown(
                         info="The compute type for the model",
                         choices=["float16", "bfloat16", "float32"],
                         value="bfloat16",
                         visible=True,
+                        show_label=False,
                     )
                     quant_storage_4 = gr.Dropdown(
                         info="The storage type for the model",
                         choices=["float16", "float32", "int8", "uint8", "bfloat16"],
                         value="uint8",
                         visible=True,
+                        show_label=False,
                     )
                     gr.Markdown(
                         """
                     )
                     with gr.Row(elem_classes="option-row"):
                         double_quant_4 = gr.Radio(
+                            ["True", "False"],
+                            info="Use Double Quant",
+                            visible=True,
                             value="True",
+                            show_label=False,
                         )
                     gr.Markdown(
                         """
                             elem_classes="model-name-textbox",
                             show_label=False,
                         )
                     with gr.Row():
                         public = gr.Checkbox(
                             label="🌐 Make model public",
                             info="If checked, the model will be publicly accessible",
                             value=True,
                             interactive=True,
+                            show_label=True,
                         )
         with gr.Column():
+            quantize_button = gr.Button(
+                "🚀 Quantize and Push to the Hub", variant="primary"
+            )
+            output_link = gr.Markdown(
+                "🔗 Quantized Model", container=True, min_height=100
+            )
     quantize_button.click(
         fn=quantize_and_save,
+        inputs=[
+            model_name,
+            quant_type_4,
+            double_quant_4,
+            compute_type_4,
+            quant_storage_4,
+            quantized_model_name,
+            public,
+        ],
+        outputs=[output_link],
+        show_progress="full",
     )
+    # Add information section about the app options
+    with gr.Accordion("📚 About this app", open=True):
+        gr.Markdown(
+            """
+            ## 📝 Notes on Quantization Options
+            ### Quantization Type (bnb_4bit_quant_type)
+            - **fp4**: Floating-point 4-bit quantization.
+            - **nf4**: Normal float 4-bit quantization.
+            ### Double Quantization
+            - **True**: Applies a second round of quantization to the quantization constants, further reducing memory usage.
+            - **False**: Uses standard quantization only.
+            ### Model Saving Options
+            - **Model Name**: Custom name for your quantized model on the Hub. If left empty, a default name will be generated.
+            - **Make model public**: If checked, anyone can access your quantized model. If unchecked, only you can access it.
+            ## 🔍 How It Works
+            This app uses the BitsAndBytes library to perform 4-bit quantization on Transformer models. The process:
+            1. Downloads the original model
+            2. Applies the selected quantization settings
+            3. Uploads the quantized model to your HuggingFace account
+            ## 📊 Memory Usage
+            4-bit quantization can reduce model size by up to 75% compared to FP16, allowing you to run larger models on consumer hardware.
+            """
+        )
 if __name__ == "__main__":
     demo.launch(share=True)

app_claude.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
@@ -8,12 +8,14 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from bitsandbytes.nn import Linear4bit
 from packaging import version
 import os
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
     if profile is None:
-        return "👋 Hello! Sign in to get started with the BitsAndBytes Quantizer."
-    return f"👋 Hello {profile.name}! Welcome to the BitsAndBytes Quantizer."
 def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
@@ -23,7 +25,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name,
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
-            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
@@ -59,9 +61,6 @@ model = AutoModel.from_pretrained("{model_name}")"""
     return model_card
-def load_model(model_name, quantization_config, auth_token) :
-    return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
 DTYPE_MAPPING = {
     "int8": torch.int8,
     "uint8": torch.uint8,
@@ -71,7 +70,9 @@ DTYPE_MAPPING = {
 }
-def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
     print(f"Quantizing model: {quant_type_4}")
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
@@ -80,9 +81,9 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
         bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
         bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
     )
-    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
-    for _ , module in model.named_modules():
         if isinstance(module, Linear4bit):
             module.to("cuda")
             module.to("cpu")
@@ -91,12 +92,14 @@ def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, qua
 def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
         model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
-            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
         model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
@@ -108,15 +111,27 @@ def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4,
             repo_id=repo_name,
             repo_type="model",
         )
-    return f"""
-    <div class="success-box">
-        <h2>🎉 Quantization Complete!</h2>
-        <p>Your quantized model is now available at:</p>
-        <a href="https://huggingface.co/{repo_name}" target="_blank" class="model-link">
-            huggingface.co/{repo_name}
-        </a>
     </div>
     """
 def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
     if oauth_token is None :
@@ -132,7 +147,7 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
             <h3>❌ Authentication Error</h3>
             <p>Please sign in to your HuggingFace account to use the quantizer.</p>
         </div>
-        """
     exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
     if exists_message :
         return f"""
@@ -142,537 +157,450 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
         </div>
         """
     try:
         quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
-        return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
     except Exception as e :
-        print(e)
         return f"""
         <div class="error-box">
             <h3>❌ Error Occurred</h3>
-            <p>{str(e)}</p>
         </div>
         """
-css = """
-:root {
-    --primary: #6366f1;
-    --primary-light: #818cf8;
-    --primary-dark: #4f46e5;
-    --secondary: #10b981;
-    --accent: #f97316;
-    --background: #f8fafc;
-    --text: #1e293b;
-    --card-bg: #ffffff;
-    --input-bg: #f1f5f9;
-    --error: #ef4444;
-    --warning: #f59e0b;
-    --success: #10b981;
-    --border-radius: 12px;
-    --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
-    --transition: all 0.3s ease;
-}
-body, .gradio-container {
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif;
-    color: var(--text);
-    background-color: var(--background);
-}
-h1 {
-    font-size: 2.5rem !important;
-    font-weight: 800 !important;
-    text-align: center;
-    background: linear-gradient(45deg, var(--primary), var(--accent));
-    -webkit-background-clip: text;
-    background-clip: text;
-    color: transparent !important;
-    margin-bottom: 1rem !important;
-    padding: 1rem 0 !important;
-}
-h2 {
-    font-size: 1.75rem !important;
-    font-weight: 700 !important;
-    color: var(--primary-dark) !important;
-    margin-top: 1.5rem !important;
-    margin-bottom: 1rem !important;
-}
-h3 {
-    font-size: 1.25rem !important;
-    font-weight: 600 !important;
-    color: var(--primary) !important;
-    margin-top: 1rem !important;
-    margin-bottom: 0.5rem !important;
-    border-bottom: 2px solid var(--primary-light);
-    padding-bottom: 0.5rem;
-    width: fit-content;
 }
-/* Main container styling */
-.main-container {
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 2rem;
-    background-color: var(--card-bg);
-    border-radius: var(--border-radius);
-    box-shadow: var(--shadow);
 }
-/* Button styling */
-button {
-    border-radius: var(--border-radius) !important;
-    font-weight: 600 !important;
-    transition: var(--transition) !important;
-    text-transform: uppercase;
-    letter-spacing: 0.5px;
 }
-button.primary {
-    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
-    border: none !important;
-    color: white !important;
-    padding: 12px 24px !important;
-    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
 }
-button.primary:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
 }
-/* Login button styling */
-#login-button {
-    margin: 1.5rem auto !important;
-    min-width: 200px !important;
-    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
-    color: white !important;
-    font-weight: 600 !important;
-    padding: 12px 24px !important;
-    border-radius: var(--border-radius) !important;
-    border: none !important;
-    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
-    transition: var(--transition) !important;
 }
-#login-button:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
 }
-/* Toggle button styling */
-#toggle-button {
-    background: transparent !important;
-    color: var(--primary) !important;
-    border: 2px solid var(--primary-light) !important;
-    padding: 8px 16px !important;
-    margin: 1rem 0 !important;
-    border-radius: var(--border-radius) !important;
-    transition: var(--transition) !important;
-    font-weight: 600 !important;
 }
-#toggle-button:hover {
-    background-color: var(--primary-light) !important;
     color: white !important;
 }
-/* Input fields styling */
-input, select, textarea {
-    border-radius: var(--border-radius) !important;
-    border: 2px solid var(--input-bg) !important;
-    padding: 10px 16px !important;
-    background-color: var(--input-bg) !important;
-    transition: var(--transition) !important;
-}
-input:focus, select:focus, textarea:focus {
-    border-color: var(--primary-light) !important;
-    box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
-}
-/* Dropdown styling with nice hover effects */
-.gradio-dropdown > div {
-    border-radius: var(--border-radius) !important;
-    border: 2px solid var(--input-bg) !important;
-    overflow: hidden !important;
-    transition: var(--transition) !important;
-}
-.gradio-dropdown > div:hover {
-    border-color: var(--primary-light) !important;
-}
-/* Radio and checkbox styling */
-.gradio-radio, .gradio-checkbox {
-    background-color: var(--card-bg) !important;
-    border-radius: var(--border-radius) !important;
-    padding: 12px !important;
-    margin-bottom: 16px !important;
-    transition: var(--transition) !important;
-    border: 2px solid var(--input-bg) !important;
 }
-.gradio-radio:hover, .gradio-checkbox:hover {
-    border-color: var(--primary-light) !important;
 }
-.gradio-radio input[type="radio"] + label {
-    padding: 8px 12px !important;
-    border-radius: 20px !important;
-    margin-right: 8px !important;
-    background-color: var(--input-bg) !important;
-    transition: var(--transition) !important;
 }
-.gradio-radio input[type="radio"]:checked + label {
-    background-color: var(--primary) !important;
     color: white !important;
 }
-/* Custom spacing and layout */
-.gradio-row {
-    margin-bottom: 24px !important;
-}
-.option-row {
-    display: flex !important;
-    gap: 16px !important;
-    margin-bottom: 16px !important;
-}
-/* Card-like sections */
-.card-section {
-    background-color: var(--card-bg) !important;
-    border-radius: var(--border-radius) !important;
-    padding: 20px !important;
-    margin-bottom: 24px !important;
-    box-shadow: var(--shadow) !important;
-    border: 1px solid rgba(0, 0, 0, 0.05) !important;
-}
-/* Search box styling */
-.search-box input {
-    border-radius: var(--border-radius) !important;
-    border: 2px solid var(--input-bg) !important;
-    padding: 12px 20px !important;
-    box-shadow: var(--shadow) !important;
-    transition: var(--transition) !important;
-}
-.search-box input:focus {
-    border-color: var(--primary) !important;
-    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
-}
-/* Model name textbox specific styling */
-.model-name-textbox {
-    border: 2px solid var(--input-bg) !important;
-    border-radius: var(--border-radius) !important;
-    transition: var(--transition) !important;
-}
-.model-name-textbox:focus-within {
-    border-color: var(--primary) !important;
-    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
-}
-/* Success, warning and error boxes */
-.success-box, .warning-box, .error-box {
-    border-radius: var(--border-radius) !important;
-    padding: 20px !important;
-    margin: 20px 0 !important;
-    box-shadow: var(--shadow) !important;
-    animation: fadeIn 0.5s ease-in-out;
 }
-.success-box {
-    background-color: rgba(16, 185, 129, 0.1) !important;
-    border: 2px solid var(--success) !important;
 }
-.warning-box {
-    background-color: rgba(245, 158, 11, 0.1) !important;
-    border: 2px solid var(--warning) !important;
 }
-.error-box {
-    background-color: rgba(239, 68, 68, 0.1) !important;
-    border: 2px solid var(--error) !important;
 }
-/* Model link styling */
-.model-link {
-    display: inline-block !important;
-    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
     color: white !important;
-    text-decoration: none !important;
-    padding: 12px 24px !important;
-    border-radius: var(--border-radius) !important;
     font-weight: 600 !important;
-    margin-top: 16px !important;
-    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
-    transition: var(--transition) !important;
 }
-.model-link:hover {
     transform: translateY(-2px) !important;
-    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
-}
-/* Instructions section */
-.instructions-container {
-    background-color: rgba(99, 102, 241, 0.05) !important;
-    border-left: 4px solid var(--primary) !important;
-    padding: 16px !important;
-    margin: 24px 0 !important;
-    border-radius: 0 var(--border-radius) var(--border-radius) 0 !important;
 }
-/* Animations */
-@keyframes fadeIn {
-    from { opacity: 0; transform: translateY(10px); }
-    to { opacity: 1; transform: translateY(0); }
-}
-/* Responsive adjustments */
-@media (max-width: 768px) {
-    .option-row {
-        flex-direction: column !important;
-    }
-}
-/* Add a nice gradient splash to the app */
-.gradio-container::before {
     content: "";
     position: absolute;
     top: 0;
-    left: 0;
-    right: 0;
-    height: 10px;
-    background: linear-gradient(90deg, var(--primary), var(--accent));
-    z-index: 100;
-}
-/* Stylish header */
-.app-header {
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    margin-bottom: 2rem;
-    position: relative;
 }
-.app-header::after {
-    content: "";
-    position: absolute;
-    bottom: -10px;
-    left: 50%;
-    transform: translateX(-50%);
-    width: 80px;
-    height: 4px;
-    background: linear-gradient(90deg, var(--primary), var(--accent));
-    border-radius: 2px;
 }
-/* Section headers */
-.section-header {
-    display: flex;
-    align-items: center;
-    margin-bottom: 1rem;
 }
-.section-header::before {
-    content: "⚙️";
-    margin-right: 8px;
-    font-size: 1.25rem;
 }
-/* Quantize button special styling */
-#quantize-button {
-    background: linear-gradient(135deg, var(--primary), var(--accent)) !important;
-    color: white !important;
-    padding: 16px 32px !important;
-    font-size: 1.1rem !important;
-    font-weight: 700 !important;
-    border: none !important;
-    border-radius: var(--border-radius) !important;
-    box-shadow: 0 4px 15px -3px rgba(99, 102, 241, 0.5) !important;
-    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
     position: relative;
-    overflow: hidden;
 }
-#quantize-button:hover {
-    transform: translateY(-3px) !important;
-    box-shadow: 0 7px 20px -2px rgba(99, 102, 241, 0.6) !important;
 }
-#quantize-button::after {
-    content: "";
-    position: absolute;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
-    transform: translateY(-100%);
-    transition: transform 0.6s cubic-bezier(0.25, 0.8, 0.25, 1);
 }
-#quantize-button:hover::after {
-    transform: translateY(0);
 }
 """
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald"), css=css) as demo:
-    with gr.Column(elem_classes="main-container"):
-        with gr.Row(elem_classes="app-header"):
-            gr.Markdown(
-                """
-                <h1 style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> 🤗 BitsAndBytes Model Quantizer</h1>
-                <div style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;">
-                 Welcome to the BitsAndBytes Model Quantizer!
-                </div>
-                """
-            )
-        gr.LoginButton(elem_id="login-button", elem_classes="login-button")
-        welcome_msg = gr.Markdown(elem_classes="welcome-message")
-        demo.load(hello, inputs=None, outputs=welcome_msg)
-        instructions = gr.Markdown(
-            """
-            <div class="instructions-container">
-                <h3>📋 Instructions</h3>
-                <ol>
-                    <li>Login to your HuggingFace account</li>
-                    <li>Enter the name of the Hugging Face LLM model you want to quantize</li>
-                    <li>Configure quantization settings based on your needs</li>
-                    <li>Optionally, specify a custom name for the quantized model</li>
-                    <li>Click "Quantize Model" to start the process</li>
-                </ol>
-                <p><strong>Note:</strong> Processing time depends on model size and your hardware. Check container logs for progress!</p>
-            </div>
-            """,
-            visible=False
         )
-        instructions_visible = gr.State(False)
-        toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
-        def toggle_instructions(instructions_visible):
-            new_visibility = not instructions_visible
-            new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions"
-            return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label)
-        toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
-        with gr.Row(elem_classes="app-content"):
-            with gr.Column(scale=1, elem_classes="card-section"):
-                with gr.Row(elem_classes="search-section"):
-                    model_name = HuggingfaceHubSearch(
-                        label="🔍 Select Model",
-                        placeholder="  Search for model on Huggingface Hub...",
-                        search_type="model",
-                        elem_classes="search-box"
                     )
-                with gr.Row(elem_classes="section-header"):
-                    gr.Markdown("### Quantization Settings")
-                with gr.Column(elem_classes="settings-group"):
-                    gr.Markdown("**Quantization Type**", elem_classes="setting-label")
                     quant_type_4 = gr.Dropdown(
                         choices=["fp4", "nf4"],
-                        value="fp4",
-                        label="Format",
-                        info="The quantization data type in bnb.nn.Linear4Bit layers",
                         show_label=False
                     )
-                    gr.Markdown("**Compute Settings**", elem_classes="setting-label")
                     compute_type_4 = gr.Dropdown(
                         choices=["float16", "bfloat16", "float32"],
-                        value="float32",
-                        label="Compute Type",
-                        info="The compute dtype for matrix multiplication"
                     )
                     quant_storage_4 = gr.Dropdown(
                         choices=["float16", "float32", "int8", "uint8", "bfloat16"],
                         value="uint8",
-                        label="Storage Type",
-                        info="The storage type for quantized weights"
                     )
-                    gr.Markdown("**Double Quantization**", elem_classes="setting-label")
-                    double_quant_4 = gr.Radio(
-                        ["False", "True"],
-                        label="Use Double Quantization",
-                        info="Further compress model size with nested quantization",
-                        value="False",
                     )
-                with gr.Row(elem_classes="section-header"):
-                    gr.Markdown("### Output Settings")
-                with gr.Column(elem_classes="settings-group"):
-                    quantized_model_name = gr.Textbox(
-                        label="Custom Model Name (Optional)",
-                        info="Leave blank to use default naming convention",
-                        placeholder="my-quantized-model",
-                        elem_classes="model-name-textbox"
                     )
-                    public = gr.Checkbox(
-                        label="Make model public",
-                        info="If checked, your model will be publicly accessible on Hugging Face Hub",
-                        value=False,
-                    )
-            with gr.Column(scale=1, elem_classes="card-section"):
-                with gr.Row():
-                    gr.Markdown("""
-                    ### 📊 Quantization Benefits
-                    <div style="background-color: rgba(99, 102, 241, 0.05); padding: 12px; border-radius: 8px; margin-bottom: 16px;">
-                        <p><strong>⚡ Lower Memory Usage:</strong> Reduce model size by up to 75%</p>
-                        <p><strong>🚀 Faster Inference:</strong> Achieve better performance on resource-constrained hardware</p>
-                        <p><strong>💻 Wider Compatibility:</strong> Run models on devices with limited VRAM</p>
-                    </div>
-                    ### 🔧 Configuration Guide
-                    <div style="background-color: rgba(16, 185, 129, 0.05); padding: 12px; border-radius: 8px;">
-                        <p><strong>Quantization Type:</strong></p>
-                        <ul>
-                            <li><code>fp4</code> - 4-bit floating point (better for most cases)</li>
-                            <li><code>nf4</code> - normalized float format (better for specific models)</li>
-                        </ul>
-                        <p><strong>Double Quantization:</strong> Enable for additional compression with minimal quality loss</p>
-                    </div>
-                    """)
-                with gr.Row():
-                    quantize_button = gr.Button("🚀 Quantize Model", variant="primary", elem_id="quantize-button")
-                output_link = gr.HTML(label="Results", elem_classes="results-container")
-        # Add interactive footer with links
-        gr.Markdown("""
-        <div style="margin-top: 2rem; text-align: center; padding: 1rem; border-top: 1px solid rgba(99, 102, 241, 0.2);">
-            <p>Powered by <a href="https://huggingface.co/" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">Hugging Face</a> and <a href="https://github.com/TimDettmers/bitsandbytes" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">BitsAndBytes</a></p>
-        </div>
-        """)
     quantize_button.click(
         fn=quantize_and_save,
         inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
-        outputs=[output_link]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
 import torch
+from transformers import AutoModel, BitsAndBytesConfig
 import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
 from bitsandbytes.nn import Linear4bit
 from packaging import version
 import os
+from tqdm import tqdm
 def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
+    # ^ expect a gr.OAuthProfile object as input to get the user's profile
+    # if the user is not logged in, profile will be None
     if profile is None:
+        return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
+    return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
 def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
     return model_card
 DTYPE_MAPPING = {
     "int8": torch.int8,
     "uint8": torch.uint8,
 }
+def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None, progress=gr.Progress()):
+    progress(0, desc="Starting")
     print(f"Quantizing model: {quant_type_4}")
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
         bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
     )
+    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
+    for _ , module in progress.tqdm(model.named_modules(), desc="Quantizing model", total=len(list(model.named_modules())), unit="layers"):
         if isinstance(module, Linear4bit):
             module.to("cuda")
             module.to("cpu")
 def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
         model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
         model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
             repo_id=repo_name,
             repo_type="model",
         )
+    # Get model architecture as string
+    import io
+    from contextlib import redirect_stdout
+    import html
+    # Capture the model architecture string
+    f = io.StringIO()
+    with redirect_stdout(f):
+        print(model)
+    model_architecture_str = f.getvalue()
+    # Escape HTML characters and format with line breaks
+    model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
+    # Format it for display in markdown with proper styling
+    model_architecture_info = f"""
+    <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
+        <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
     </div>
     """
+    return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
 def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
     if oauth_token is None :
             <h3>❌ Authentication Error</h3>
             <p>Please sign in to your HuggingFace account to use the quantizer.</p>
         </div>
+        """
     exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
     if exists_message :
         return f"""
         </div>
         """
     try:
+        # Download phase
         quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
+        final_message = save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
+        return final_message
     except Exception as e :
+        error_message = str(e).replace('\n', '<br/>')
         return f"""
         <div class="error-box">
             <h3>❌ Error Occurred</h3>
+            <p>{error_message}</p>
         </div>
         """
+css="""/* Custom CSS to allow scrolling */
+.gradio-container {overflow-y: auto;}
+/* Fix alignment for radio buttons and checkboxes */
+.gradio-radio {
+    display: flex !important;
+    align-items: center !important;
+    margin: 10px 0 !important;
 }
+.gradio-checkbox {
+    display: flex !important;
+    align-items: center !important;
+    margin: 10px 0 !important;
 }
+/* Ensure consistent spacing and alignment */
+.gradio-dropdown, .gradio-textbox, .gradio-radio, .gradio-checkbox {
+    margin-bottom: 12px !important;
+    width: 100% !important;
 }
+/* Align radio buttons and checkboxes horizontally */
+.option-row {
+    display: flex !important;
+    justify-content: space-between !important;
+    align-items: center !important;
+    gap: 20px !important;
+    margin-bottom: 12px !important;
 }
+.option-row .gradio-radio, .option-row .gradio-checkbox {
+    margin: 0 !important;
+    flex: 1 !important;
 }
+/* Horizontally align radio button options with text */
+.gradio-radio label {
+    display: flex !important;
+    align-items: center !important;
 }
+.gradio-radio input[type="radio"] {
+    margin-right: 5px !important;
 }
+/* Remove padding and margin from model name textbox for better alignment */
+.model-name-textbox {
+    padding-left: 0 !important;
+    padding-right: 0 !important;
+    margin-left: 0 !important;
+    margin-right: 0 !important;
 }
+/* Quantize button styling with glow effect */
+button[variant="primary"] {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
     color: white !important;
+    padding: 16px 32px !important;
+    font-size: 1.1rem !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+    animation: glow 1.5s ease-in-out infinite alternate;
 }
+button[variant="primary"]::before {
+    content: "✨ ";
 }
+button[variant="primary"]:hover {
+    transform: translateY(-5px) scale(1.05) !important;
+    box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
 }
+@keyframes glow {
+    from {
+        box-shadow: 0 0 10px rgba(59, 130, 246, 0.5);
+    }
+    to {
+        box-shadow: 0 0 20px rgba(59, 130, 246, 0.8), 0 0 30px rgba(16, 185, 129, 0.5);
+    }
 }
+/* Login button styling with glow effect */
+#login-button {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
     color: white !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+    animation: glow 1.5s ease-in-out infinite alternate;
+    max-width: 300px !important;
+    margin: 0 auto !important;
 }
+#login-button::before {
+    content: "🔑 ";
+    display: inline-block !important;
+    vertical-align: middle !important;
+    margin-right: 5px !important;
+    line-height: normal !important;
 }
+#login-button:hover {
+    transform: translateY(-3px) scale(1.03) !important;
+    box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
 }
+#login-button::after {
+    content: "";
+    position: absolute;
+    top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+    transition: 0.5s;
 }
+#login-button:hover::after {
+    left: 100%;
 }
+/* Toggle instructions button styling */
+#toggle-button {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
     color: white !important;
+    font-size: 0.85rem !important;
     font-weight: 600 !important;
+    padding: 8px 16px !important;
+    border: none !important;
+    border-radius: 8px !important;
+    box-shadow: 0 2px 10px rgba(59, 130, 246, 0.3) !important;
+    transition: all 0.3s ease !important;
+    margin: 0.5rem auto 1.5rem auto !important;
+    display: block !important;
+    max-width: 200px !important;
+    text-align: center !important;
+    position: relative;
+    overflow: hidden;
 }
+#toggle-button:hover {
     transform: translateY(-2px) !important;
+    box-shadow: 0 4px 12px rgba(59, 130, 246, 0.5) !important;
 }
+#toggle-button::after {
     content: "";
     position: absolute;
     top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+    transition: 0.5s;
 }
+#toggle-button:hover::after {
+    left: 100%;
 }
+/* Progress Bar Styles */
+.progress-container {
+    font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+    padding: 20px;
+    background: white;
+    border-radius: 12px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 }
+.progress-stage {
+    font-size: 0.9rem;
+    font-weight: 600;
+    color: #64748b;
 }
+.progress-stage .stage {
     position: relative;
+    padding: 8px 12px;
+    border-radius: 6px;
+    background: #f1f5f9;
+    transition: all 0.3s ease;
 }
+.progress-stage .stage.completed {
+    background: #ecfdf5;
 }
+.progress-bar {
+    box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1);
 }
+.progress {
+    transition: width 0.8s cubic-bezier(0.4, 0, 0.2, 1);
+    box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);
 }
 """
+def quantize_model_with_progress(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token, progress=gr.Progress()):
+    """Quantize model with progress updates."""
+    progress(0, desc="Loading model")
+    # Configure quantization
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type=quant_type_4,
+        bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
+        bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
+        bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
+    )
+    # Load model
+    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token, torch_dtype=torch.bfloat16)
+    progress(0.33, desc="Quantizing")
+    # Quantize model
+    modules = list(model.named_modules())
+    for idx, (_, module) in enumerate(modules):
+        if isinstance(module, Linear4bit):
+            module.to("cuda")
+            module.to("cpu")
+        progress(0.33 + (0.33 * idx / len(modules)), desc="Quantizing")
+    progress(0.66, desc="Quantized successfully")
+    return model
+def save_model_with_progress(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False, progress=gr.Progress()):
+    """Save model with progress updates."""
+    progress(0.67, desc="Preparing to push")
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Save model
+        model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
+        progress(0.75, desc="Preparing to push")
+        # Prepare repo name and model card
+        if quantized_model_name:
+            repo_name = f"{username}/{quantized_model_name}"
+        else:
+            repo_name = f"{username}/{model_name.split('/')[-1]}-bnb-4bit"
+        model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
+        with open(os.path.join(tmpdirname, "README.md"), "w") as f:
+            f.write(model_card)
+        progress(0.80, desc="Model card created")
+        # Push to Hub
+        api = HfApi(token=auth_token.token)
+        api.create_repo(repo_name, exist_ok=True, private=not public)
+        progress(0.85, desc="Pushing to Hub")
+        # Upload files
+        api.upload_folder(
+            folder_path=tmpdirname,
+            repo_id=repo_name,
+            repo_type="model",
         )
+        progress(1.00, desc="Model pushed to Hub")
+    # Get model architecture as string
+    import io
+    from contextlib import redirect_stdout
+    import html
+    # Capture the model architecture string
+    f = io.StringIO()
+    with redirect_stdout(f):
+        print(model)
+    model_architecture_str = f.getvalue()
+    # Escape HTML characters and format with line breaks
+    model_architecture_str_html = html.escape(model_architecture_str).replace('\n', '<br/>')
+    # Format it for display in markdown with proper styling
+    model_architecture_info = f"""
+    <div class="model-architecture" style="max-height: 500px; overflow-y: auto; overflow-x: auto; background-color: #f5f5f5; padding: 5px; border-radius: 8px; font-family: monospace; white-space: pre-wrap;">
+        <div style="line-height: 1.2; font-size: 0.75em;">{model_architecture_str_html}</div>
+    </div>
+    """
+    return f'🔗 Quantized Model <br/><h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a><br/><br/>📊 Model Architecture<br/>{model_architecture_info}'
+def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public, progress=gr.Progress()):
+    if oauth_token is None:
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
+    if not profile:
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
+    exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
+    if exists_message:
+        return f"""
+        <div class="warning-box">
+            <h3>⚠️ Model Already Exists</h3>
+            <p>{exists_message}</p>
+        </div>
+        """
+    try:
+        # Download and quantize phase
+        progress(0, desc="Starting quantization process")
+        quantized_model = quantize_model_with_progress(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token, progress)
+        # Save and push phase
+        final_message = save_model_with_progress(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public, progress)
+        return final_message
+    except Exception as e:
+        error_message = str(e).replace('\n', '<br/>')
+        return f"""
+        <div class="error-box">
+            <h3>❌ Error Occurred</h3>
+            <p>{error_message}</p>
+        </div>
+        """
+with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
+    gr.Markdown(
+        """
+        # 🤗 LLM Model BitsAndBytes Quantizer ✨
+        """
+    )
+    gr.LoginButton(elem_id="login-button", elem_classes="center-button", min_width=250)
+    m1 = gr.Markdown()
+    demo.load(hello, inputs=None, outputs=m1)
+    instructions_visible = gr.State(False)
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                model_name = HuggingfaceHubSearch(
+                    label="🔍 Hub Model ID",
+                    placeholder="Search for model id on Huggingface",
+                    search_type="model",
+                )
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown(
+                        """
+                        ### ⚙️ Model Quantization Type Settings
+                        """
                     )
                     quant_type_4 = gr.Dropdown(
+                        info="The quantization data type in the bnb.nn.Linear4Bit layers",
                         choices=["fp4", "nf4"],
+                        value="nf4",
+                        visible=True,
                         show_label=False
                     )
                     compute_type_4 = gr.Dropdown(
+                        info="The compute type for the model",
                         choices=["float16", "bfloat16", "float32"],
+                        value="bfloat16",
+                        visible=True,
+                        show_label=False
                     )
                     quant_storage_4 = gr.Dropdown(
+                        info="The storage type for the model",
                         choices=["float16", "float32", "int8", "uint8", "bfloat16"],
                         value="uint8",
+                        visible=True,
+                        show_label=False
                     )
+                    gr.Markdown(
+                        """
+                        ### 🔄 Double Quantization Settings
+                        """
                     )
+                    with gr.Row(elem_classes="option-row"):
+                        double_quant_4 = gr.Radio(
+                            ["True", "False"],
+                            info="Use Double Quant",
+                            visible=True,
+                            value="True",
+                            show_label=False
+                        )
+                    gr.Markdown(
+                        """
+                        ### 💾 Saving Settings
+                        """
                     )
+                    with gr.Row():
+                        quantized_model_name = gr.Textbox(
+                            label="✏️ Model Name",
+                            info="Model Name (optional : to override default)",
+                            value="",
+                            interactive=True,
+                            elem_classes="model-name-textbox",
+                            show_label=False,
+                        )
+                    with gr.Row():
+                        public = gr.Checkbox(
+                            label="🌐 Make model public",
+                            info="If checked, the model will be publicly accessible",
+                            value=True,
+                            interactive=True,
+                            show_label=True
+                        )
+        with gr.Column():
+            quantize_button = gr.Button("🚀 Quantize and Push to the Hub", variant="primary")
+            output_link = gr.Markdown("🔗 Quantized Model", container=True, min_height=100)
     quantize_button.click(
         fn=quantize_and_save,
         inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
+        outputs=[output_link],
     )
 if __name__ == "__main__":
+    demo.launch(share=True)