Spaces:

HF-Quantization
/

bnb-my-repo

Running on A10G

App Files Files Community

MekkCyber commited on Mar 17

Commit

fa23c0d

1 Parent(s): 40a26a8

updating

Browse files

Files changed (3) hide show

app.py +275 -135
app_claude.py +678 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import tempfile
 from huggingface_hub import HfApi
 from huggingface_hub import list_models
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from packaging import version
 import os
@@ -13,10 +14,10 @@ def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) ->
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
     # if the user is not logged in, profile will be None
     if profile is None:
-        return "Hello !"
-    return f"Hello {profile.name} ! Welcome to BitsAndBytes Space"
-def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization_type, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
@@ -24,7 +25,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
-            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
@@ -33,7 +34,7 @@ def check_model_exists(oauth_token: gr.OAuthToken | None, username, quantization
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
-def create_model_card(model_name, quantization_type, threshold, quant_type_4, double_quant_4,):
     model_card = f"""---
 base_model:
 - {model_name}
@@ -42,17 +43,17 @@ base_model:
 # {model_name} (Quantized)
 ## Description
-This model is a quantized version of the original model `{model_name}`. It has been quantized using {quantization_type} quantization with bitsandbytes.
 ## Quantization Details
-- **Quantization Type**: {quantization_type}
-- **Threshold**: {threshold if quantization_type == "int8" else None}
-- **bnb_4bit_quant_type**: {quant_type_4 if quantization_type == "int4" else None}
-- **bnb_4bit_use_double_quant**: {double_quant_4 if quantization_type=="int4" else None}
 ## Usage
 You can use this model in your applications by loading it directly from the Hugging Face Hub:
 ```python
 from transformers import AutoModel
@@ -63,24 +64,33 @@ model = AutoModel.from_pretrained("{model_name}")"""
 def load_model(model_name, quantization_config, auth_token) :
     return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
-def quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, auth_token=None, username=None):
-    print(f"Quantizing model: {quantization_type}")
-    if quantization_type=="int4":
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_quant_type=quant_type_4,
-            bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
-        )
-    else :
-        quantization_config = BitsAndBytesConfig(
-            load_in_8bit=True,
-            llm_int8_threshold=threshold,
-        )
-    model = load_model(model_name, quantization_config=quantization_config, auth_token=auth_token)
     return model
-def save_model(model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, username=None, auth_token=None, quantized_model_name=None):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
@@ -89,15 +99,15 @@ def save_model(model, model_name, quantization_type, threshold, quant_type_4, do
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
-            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-{quantization_type}"
-        model_card = create_model_card(repo_name, quantization_type, threshold, quant_type_4, double_quant_4)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
         # Push to Hub
         api = HfApi(token=auth_token.token)
-        api.create_repo(repo_name, exist_ok=True)
         api.upload_folder(
             folder_path=tmpdirname,
             repo_id=repo_name,
@@ -105,30 +115,17 @@ def save_model(model, model_name, quantization_type, threshold, quant_type_4, do
         )
     return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
-def is_float(value):
-    try:
-        float(value)
-        return True
-    except ValueError:
-        return False
-def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quantization_type, threshold, quant_type_4, double_quant_4, quantized_model_name):
     if oauth_token is None :
         return "Error : Please Sign In to your HuggingFace account to use the quantizer"
     if not profile:
         return "Error: Please Sign In to your HuggingFace account to use the quantizer"
-    exists_message = check_model_exists(oauth_token, profile.username, quantization_type, model_name, quantized_model_name)
     if exists_message :
         return exists_message
-    if not is_float(threshold) :
-        return "Threshold must be a float"
-    threshold = float(threshold)
     # try:
-    quantized_model = quantize_model(model_name, quantization_type, threshold, quant_type_4, double_quant_4, oauth_token, profile.username)
-    return save_model(quantized_model, model_name, quantization_type, threshold, quant_type_4, double_quant_4, profile.username, oauth_token, quantized_model_name)
     # except Exception as e :
     #     print(e)
     #     return f"An error occurred: {str(e)}"
@@ -136,16 +133,183 @@ def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToke
 css="""/* Custom CSS to allow scrolling */
 .gradio-container {overflow-y: auto;}
-.custom-radio {
-    margin-left: 20px;  /* Adjust the value as needed */
 }
 """
 with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
     gr.Markdown(
         """
-        # 🤗 LLM Model BitsAndBytes Quantization App
-        Quantize your favorite Hugging Face models using BitsAndBytes and save them to your profile!
         """
     )
@@ -153,117 +317,93 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
     m1 = gr.Markdown()
     demo.load(hello, inputs=None, outputs=m1)
-    # radio = gr.Radio(["show", "hide"], label="Show Instructions")
-    instructions = gr.Markdown(
-        """
-        ## Instructions
-        1. Login to your HuggingFace account
-        2. Enter the name of the Hugging Face LLM model you want to quantize (Make sure you have access to it)
-        3. Choose the quantization type.
-        4. Optionally, specify the group size.
-        5. Optionally, choose a custom name for the quantized model
-        6. Click "Quantize and Save Model" to start the process.
-        7. Once complete, you'll receive a link to the quantized model on Hugging Face.
-        Note: This process may take some time depending on the model size and your hardware you can check the container logs to see where are you at in the process!
-        """,
-        visible=False
-    )
-    instructions_visible = gr.State(False)
-    toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
-    def toggle_instructions(instructions_visible):
-        new_visibility = not instructions_visible  # Toggle the state
-        new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions"  # Change label based on visibility
-        return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label)  # Toggle visibility and return new state
-    toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
-    # def update_visibility(radio):  # Accept the event argument, even if not used
-    #     value = radio  # Get the selected value from the radio button
-    #     if value == "show":
-    #         return gr.Textbox(visible=True) #make it visible
-    #     else:
-    #         return gr.Textbox(visible=False)
-    # radio.change(update_visibility, radio, instructions)
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 model_name = HuggingfaceHubSearch(
-                    label="Hub Model ID",
                     placeholder="Search for model id on Huggingface",
                     search_type="model",
                 )
             with gr.Row():
-                with gr.Column():
-                    quantization_type = gr.Dropdown(
-                        info="Quantization Type",
-                        choices=["int4", "int8"],
-                        value="int8",
-                        filterable=False,
-                        show_label=False,
-                    )
-                    threshold_8 = gr.Textbox(
-                        info="Outlier threshold",
-                        value=6,
-                        interactive=True,
-                        show_label=False,
-                        visible=True
                     )
                     quant_type_4 = gr.Dropdown(
                         info="The quantization data type in the bnb.nn.Linear4Bit layers",
                         choices=["fp4", "nf4"],
                         value="fp4",
-                        visible=False,
                         show_label=False
                     )
-                    radio_4 = gr.Radio(["False", "True"], info="Use Double Quant", visible=False, value="False", elem_classes="custom_radio")
-                    def update_visibility(quantization_type):
-                        return gr.update(visible=(quantization_type=="int8")), gr.update(visible=(quantization_type=="int4")), gr.update(visible=(quantization_type=="int4"))
-                    quantization_type.change(fn=update_visibility, inputs=quantization_type, outputs=[threshold_8, quant_type_4, radio_4])
-                    quantized_model_name = gr.Textbox(
-                        info="Model Name (optional : to override default)",
-                        value="",
-                        interactive=True,
                         show_label=False
                     )
         with gr.Column():
-            quantize_button = gr.Button("Quantize and Save Model", variant="primary")
-            output_link = gr.Markdown(label="Quantized Model Link", container=True, min_height=80)
-    # Adding CSS styles for the username box
-    demo.css = """
-    #username-box {
-        background-color: #f0f8ff; /* Light color */
-        border-radius: 8px;
-        padding: 10px;
-    }
-    """
-    demo.css = """
-    .center-button {
-        display: flex;
-        justify-content: center;
-        align-items: center;
-        margin: 0 auto; /* Center horizontally */
-    }
-    """
     quantize_button.click(
         fn=quantize_and_save,
-        inputs=[model_name, quantization_type, threshold_8, quant_type_4, radio_4, quantized_model_name],
         outputs=[output_link]
     )
 if __name__ == "__main__":
     demo.launch(share=True)
 # Launch the app
-# demo.launch(share=True, debug=True)

 from huggingface_hub import HfApi
 from huggingface_hub import list_models
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
+from bitsandbytes.nn import Linear4bit
 from packaging import version
 import os
     # ^ expect a gr.OAuthProfile object as input to get the user's profile
     # if the user is not logged in, profile will be None
     if profile is None:
+        return "Hello Please Login to HuggingFace to use the BitsAndBytes Quantizer!"
+    return f"Hello {profile.name} ! Welcome to BitsAndBytes Quantizer"
+def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
     """Check if a model exists in the user's Hugging Face repository."""
     try:
         models = list_models(author=username, token=oauth_token.token)
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
         if repo_name in model_names:
             return f"Model '{repo_name}' already exists in your repository."
     except Exception as e:
         return f"Error checking model existence: {str(e)}"
+def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
     model_card = f"""---
 base_model:
 - {model_name}
 # {model_name} (Quantized)
 ## Description
+This model is a quantized version of the original model `{model_name}`. It has been quantized using int4 quantization with bitsandbytes.
 ## Quantization Details
+- **Quantization Type**: int4
+- **bnb_4bit_quant_type**: {quant_type_4}
+- **bnb_4bit_use_double_quant**: {double_quant_4}
+- **bnb_4bit_compute_dtype**: {compute_type_4}
+- **bnb_4bit_quant_storage**: {quant_storage_4}
 ## Usage
 You can use this model in your applications by loading it directly from the Hugging Face Hub:
 ```python
 from transformers import AutoModel
 def load_model(model_name, quantization_config, auth_token) :
     return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
+DTYPE_MAPPING = {
+    "int8": torch.int8,
+    "uint8": torch.uint8,
+    "float16": torch.float16,
+    "float32": torch.float32,
+    "bfloat16": torch.bfloat16,
+}
+def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
+    print(f"Quantizing model: {quant_type_4}")
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type=quant_type_4,
+        bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
+        bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
+        bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
+    )
+    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
+    for _ , module in model.named_modules():
+        if isinstance(module, Linear4bit):
+            module.to("cuda")
+            module.to("cpu")
     return model
+def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
     print("Saving quantized model")
     with tempfile.TemporaryDirectory() as tmpdirname:
         if quantized_model_name :
             repo_name = f"{username}/{quantized_model_name}"
         else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
+        model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
         with open(os.path.join(tmpdirname, "README.md"), "w") as f:
             f.write(model_card)
         # Push to Hub
         api = HfApi(token=auth_token.token)
+        api.create_repo(repo_name, exist_ok=True, private=not public)
         api.upload_folder(
             folder_path=tmpdirname,
             repo_id=repo_name,
         )
     return f'<h1> 🤗 DONE</h1><br/>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank" style="text-decoration:underline">{repo_name}</a>'
+def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
     if oauth_token is None :
         return "Error : Please Sign In to your HuggingFace account to use the quantizer"
     if not profile:
         return "Error: Please Sign In to your HuggingFace account to use the quantizer"
+    exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
     if exists_message :
         return exists_message
     # try:
+    quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
+    return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
     # except Exception as e :
     #     print(e)
     #     return f"An error occurred: {str(e)}"
 css="""/* Custom CSS to allow scrolling */
 .gradio-container {overflow-y: auto;}
+/* Fix alignment for radio buttons and checkboxes */
+.gradio-radio {
+    display: flex !important;
+    align-items: center !important;
+    margin: 10px 0 !important;
+}
+.gradio-checkbox {
+    display: flex !important;
+    align-items: center !important;
+    margin: 10px 0 !important;
+}
+/* Ensure consistent spacing and alignment */
+.gradio-dropdown, .gradio-textbox, .gradio-radio, .gradio-checkbox {
+    margin-bottom: 12px !important;
+    width: 100% !important;
+}
+/* Align radio buttons and checkboxes horizontally */
+.option-row {
+    display: flex !important;
+    justify-content: space-between !important;
+    align-items: center !important;
+    gap: 20px !important;
+    margin-bottom: 12px !important;
+}
+.option-row .gradio-radio, .option-row .gradio-checkbox {
+    margin: 0 !important;
+    flex: 1 !important;
+}
+/* Horizontally align radio button options with text */
+.gradio-radio label {
+    display: flex !important;
+    align-items: center !important;
+}
+.gradio-radio input[type="radio"] {
+    margin-right: 5px !important;
+}
+/* Remove padding and margin from model name textbox for better alignment */
+.model-name-textbox {
+    padding-left: 0 !important;
+    padding-right: 0 !important;
+    margin-left: 0 !important;
+    margin-right: 0 !important;
+}
+/* Quantize button styling with glow effect */
+button[variant="primary"] {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
+    color: white !important;
+    padding: 16px 32px !important;
+    font-size: 1.1rem !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+    animation: glow 1.5s ease-in-out infinite alternate;
+}
+button[variant="primary"]::before {
+    content: "✨ ";
+}
+button[variant="primary"]:hover {
+    transform: translateY(-5px) scale(1.05) !important;
+    box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
+}
+@keyframes glow {
+    from {
+        box-shadow: 0 0 10px rgba(59, 130, 246, 0.5);
+    }
+    to {
+        box-shadow: 0 0 20px rgba(59, 130, 246, 0.8), 0 0 30px rgba(16, 185, 129, 0.5);
+    }
+}
+/* Login button styling with glow effect */
+#login-button {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
+    color: white !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: 12px !important;
+    box-shadow: 0 0 15px rgba(59, 130, 246, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+    animation: glow 1.5s ease-in-out infinite alternate;
+    max-width: 300px !important;
+    margin: 0 auto !important;
+}
+#login-button::before {
+    content: "🔑 ";
+    display: inline-block !important;
+    vertical-align: middle !important;
+    margin-right: 5px !important;
+    line-height: normal !important;
+}
+#login-button:hover {
+    transform: translateY(-3px) scale(1.03) !important;
+    box-shadow: 0 10px 25px rgba(59, 130, 246, 0.7) !important;
+}
+#login-button::after {
+    content: "";
+    position: absolute;
+    top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+    transition: 0.5s;
+}
+#login-button:hover::after {
+    left: 100%;
+}
+/* Toggle instructions button styling */
+#toggle-button {
+    background: linear-gradient(135deg, #3B82F6, #10B981) !important;
+    color: white !important;
+    font-size: 0.85rem !important;
+    font-weight: 600 !important;
+    padding: 8px 16px !important;
+    border: none !important;
+    border-radius: 8px !important;
+    box-shadow: 0 2px 10px rgba(59, 130, 246, 0.3) !important;
+    transition: all 0.3s ease !important;
+    margin: 0.5rem auto 1.5rem auto !important;
+    display: block !important;
+    max-width: 200px !important;
+    text-align: center !important;
+    position: relative;
+    overflow: hidden;
+}
+#toggle-button:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 4px 12px rgba(59, 130, 246, 0.5) !important;
+}
+#toggle-button::after {
+    content: "";
+    position: absolute;
+    top: 0;
+    left: -100%;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+    transition: 0.5s;
+}
+#toggle-button:hover::after {
+    left: 100%;
 }
 """
 with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
     gr.Markdown(
         """
+        # 🤗 LLM Model BitsAndBytes Quantizer ✨
         """
     )
     m1 = gr.Markdown()
     demo.load(hello, inputs=None, outputs=m1)
+    instructions_visible = gr.State(False)
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 model_name = HuggingfaceHubSearch(
+                    label="🔍 Hub Model ID",
                     placeholder="Search for model id on Huggingface",
                     search_type="model",
                 )
             with gr.Row():
+                with gr.Column():
+                    gr.Markdown(
+                        """
+                        ### ⚙️ Model Quantization Type Settings
+                        """
                     )
                     quant_type_4 = gr.Dropdown(
                         info="The quantization data type in the bnb.nn.Linear4Bit layers",
                         choices=["fp4", "nf4"],
                         value="fp4",
+                        visible=True,
                         show_label=False
                     )
+                    compute_type_4 = gr.Dropdown(
+                        info="The compute type for the model",
+                        choices=["float16", "bfloat16", "float32"],
+                        value="float32",
+                        visible=True,
                         show_label=False
                     )
+                    quant_storage_4 = gr.Dropdown(
+                        info="The storage type for the model",
+                        choices=["float16", "float32", "int8", "uint8", "bfloat16"],
+                        value="uint8",
+                        visible=True,
+                        show_label=False
+                    )
+                    gr.Markdown(
+                        """
+                        ### 🔄 Double Quantization Settings
+                        """
+                    )
+                    with gr.Row(elem_classes="option-row"):
+                        double_quant_4 = gr.Radio(
+                            ["False", "True"],
+                            info="Use Double Quant",
+                            visible=True,
+                            value="False",
+                            show_label=False
+                        )
+                    gr.Markdown(
+                        """
+                        ### 💾 Saving Settings
+                        """
+                    )
+                    with gr.Row():
+                        quantized_model_name = gr.Textbox(
+                            label="✏️ Model Name",
+                            info="Model Name (optional : to override default)",
+                            value="",
+                            interactive=True,
+                            elem_classes="model-name-textbox",
+                            show_label=False,
+                        )
+                    with gr.Row():
+                        public = gr.Checkbox(
+                            label="🌐 Make model public",
+                            info="If checked, the model will be publicly accessible",
+                            value=False,
+                            interactive=True,
+                            show_label=True
+                        )
         with gr.Column():
+            quantize_button = gr.Button("🚀 Quantize and Save Model", variant="primary")
+            output_link = gr.Markdown(label="🔗 Quantized Model Link", container=True, min_height=80)
     quantize_button.click(
         fn=quantize_and_save,
+        inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
         outputs=[output_link]
     )
 if __name__ == "__main__":
     demo.launch(share=True)
 # Launch the app
+# demo.launch(share=True, debug=True)

app_claude.py ADDED Viewed

	@@ -0,0 +1,678 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, BitsAndBytesConfig
+import tempfile
+from huggingface_hub import HfApi
+from huggingface_hub import list_models
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+from bitsandbytes.nn import Linear4bit
+from packaging import version
+import os
+def hello(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
+    if profile is None:
+        return "👋 Hello! Sign in to get started with the BitsAndBytes Quantizer."
+    return f"👋 Hello {profile.name}! Welcome to the BitsAndBytes Quantizer."
+def check_model_exists(oauth_token: gr.OAuthToken | None, username, model_name, quantized_model_name):
+    """Check if a model exists in the user's Hugging Face repository."""
+    try:
+        models = list_models(author=username, token=oauth_token.token)
+        model_names = [model.id for model in models]
+        if quantized_model_name :
+            repo_name = f"{username}/{quantized_model_name}"
+        else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
+        if repo_name in model_names:
+            return f"Model '{repo_name}' already exists in your repository."
+        else:
+            return None  # Model does not exist
+    except Exception as e:
+        return f"Error checking model existence: {str(e)}"
+def create_model_card(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4):
+    model_card = f"""---
+base_model:
+- {model_name}
+---
+# {model_name} (Quantized)
+## Description
+This model is a quantized version of the original model `{model_name}`. It has been quantized using int4 quantization with bitsandbytes.
+## Quantization Details
+- **Quantization Type**: int4
+- **bnb_4bit_quant_type**: {quant_type_4}
+- **bnb_4bit_use_double_quant**: {double_quant_4}
+- **bnb_4bit_compute_dtype**: {compute_type_4}
+- **bnb_4bit_quant_storage**: {quant_storage_4}
+## Usage
+You can use this model in your applications by loading it directly from the Hugging Face Hub:
+```python
+from transformers import AutoModel
+model = AutoModel.from_pretrained("{model_name}")"""
+    return model_card
+def load_model(model_name, quantization_config, auth_token) :
+    return AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
+DTYPE_MAPPING = {
+    "int8": torch.int8,
+    "uint8": torch.uint8,
+    "float16": torch.float16,
+    "float32": torch.float32,
+    "bfloat16": torch.bfloat16,
+}
+def quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, auth_token=None):
+    print(f"Quantizing model: {quant_type_4}")
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type=quant_type_4,
+        bnb_4bit_use_double_quant=True if double_quant_4 == "True" else False,
+        bnb_4bit_quant_storage=DTYPE_MAPPING[quant_storage_4],
+        bnb_4bit_compute_dtype=DTYPE_MAPPING[compute_type_4],
+    )
+    model = AutoModel.from_pretrained(model_name, quantization_config=quantization_config, device_map="cpu", use_auth_token=auth_token.token)
+    for _ , module in model.named_modules():
+        if isinstance(module, Linear4bit):
+            module.to("cuda")
+            module.to("cpu")
+    return model
+def save_model(model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, username=None, auth_token=None, quantized_model_name=None, public=False):
+    print("Saving quantized model")
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        model.save_pretrained(tmpdirname, safe_serialization=True, use_auth_token=auth_token.token)
+        if quantized_model_name :
+            repo_name = f"{username}/{quantized_model_name}"
+        else :
+            repo_name = f"{username}/{model_name.split('/')[-1]}-BNB-INT4"
+        model_card = create_model_card(repo_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4)
+        with open(os.path.join(tmpdirname, "README.md"), "w") as f:
+            f.write(model_card)
+        # Push to Hub
+        api = HfApi(token=auth_token.token)
+        api.create_repo(repo_name, exist_ok=True, private=not public)
+        api.upload_folder(
+            folder_path=tmpdirname,
+            repo_id=repo_name,
+            repo_type="model",
+        )
+    return f"""
+    <div class="success-box">
+        <h2>🎉 Quantization Complete!</h2>
+        <p>Your quantized model is now available at:</p>
+        <a href="https://huggingface.co/{repo_name}" target="_blank" class="model-link">
+            huggingface.co/{repo_name}
+        </a>
+    </div>
+    """
+def quantize_and_save(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public):
+    if oauth_token is None :
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
+    if not profile:
+        return """
+        <div class="error-box">
+            <h3>❌ Authentication Error</h3>
+            <p>Please sign in to your HuggingFace account to use the quantizer.</p>
+        </div>
+        """
+    exists_message = check_model_exists(oauth_token, profile.username, model_name, quantized_model_name)
+    if exists_message :
+        return f"""
+        <div class="warning-box">
+            <h3>⚠️ Model Already Exists</h3>
+            <p>{exists_message}</p>
+        </div>
+        """
+    try:
+        quantized_model = quantize_model(model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, oauth_token)
+        return save_model(quantized_model, model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, profile.username, oauth_token, quantized_model_name, public)
+    except Exception as e :
+        print(e)
+        return f"""
+        <div class="error-box">
+            <h3>❌ Error Occurred</h3>
+            <p>{str(e)}</p>
+        </div>
+        """
+css = """
+:root {
+    --primary: #6366f1;
+    --primary-light: #818cf8;
+    --primary-dark: #4f46e5;
+    --secondary: #10b981;
+    --accent: #f97316;
+    --background: #f8fafc;
+    --text: #1e293b;
+    --card-bg: #ffffff;
+    --input-bg: #f1f5f9;
+    --error: #ef4444;
+    --warning: #f59e0b;
+    --success: #10b981;
+    --border-radius: 12px;
+    --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+    --transition: all 0.3s ease;
+}
+body, .gradio-container {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif;
+    color: var(--text);
+    background-color: var(--background);
+}
+h1 {
+    font-size: 2.5rem !important;
+    font-weight: 800 !important;
+    text-align: center;
+    background: linear-gradient(45deg, var(--primary), var(--accent));
+    -webkit-background-clip: text;
+    background-clip: text;
+    color: transparent !important;
+    margin-bottom: 1rem !important;
+    padding: 1rem 0 !important;
+}
+h2 {
+    font-size: 1.75rem !important;
+    font-weight: 700 !important;
+    color: var(--primary-dark) !important;
+    margin-top: 1.5rem !important;
+    margin-bottom: 1rem !important;
+}
+h3 {
+    font-size: 1.25rem !important;
+    font-weight: 600 !important;
+    color: var(--primary) !important;
+    margin-top: 1rem !important;
+    margin-bottom: 0.5rem !important;
+    border-bottom: 2px solid var(--primary-light);
+    padding-bottom: 0.5rem;
+    width: fit-content;
+}
+/* Main container styling */
+.main-container {
+    max-width: 1200px;
+    margin: 0 auto;
+    padding: 2rem;
+    background-color: var(--card-bg);
+    border-radius: var(--border-radius);
+    box-shadow: var(--shadow);
+}
+/* Button styling */
+button {
+    border-radius: var(--border-radius) !important;
+    font-weight: 600 !important;
+    transition: var(--transition) !important;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+}
+button.primary {
+    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
+    border: none !important;
+    color: white !important;
+    padding: 12px 24px !important;
+    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
+}
+button.primary:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
+}
+/* Login button styling */
+#login-button {
+    margin: 1.5rem auto !important;
+    min-width: 200px !important;
+    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
+    color: white !important;
+    font-weight: 600 !important;
+    padding: 12px 24px !important;
+    border-radius: var(--border-radius) !important;
+    border: none !important;
+    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
+    transition: var(--transition) !important;
+}
+#login-button:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
+}
+/* Toggle button styling */
+#toggle-button {
+    background: transparent !important;
+    color: var(--primary) !important;
+    border: 2px solid var(--primary-light) !important;
+    padding: 8px 16px !important;
+    margin: 1rem 0 !important;
+    border-radius: var(--border-radius) !important;
+    transition: var(--transition) !important;
+    font-weight: 600 !important;
+}
+#toggle-button:hover {
+    background-color: var(--primary-light) !important;
+    color: white !important;
+}
+/* Input fields styling */
+input, select, textarea {
+    border-radius: var(--border-radius) !important;
+    border: 2px solid var(--input-bg) !important;
+    padding: 10px 16px !important;
+    background-color: var(--input-bg) !important;
+    transition: var(--transition) !important;
+}
+input:focus, select:focus, textarea:focus {
+    border-color: var(--primary-light) !important;
+    box-shadow: 0 0 0 2px rgba(99, 102, 241, 0.2) !important;
+}
+/* Dropdown styling with nice hover effects */
+.gradio-dropdown > div {
+    border-radius: var(--border-radius) !important;
+    border: 2px solid var(--input-bg) !important;
+    overflow: hidden !important;
+    transition: var(--transition) !important;
+}
+.gradio-dropdown > div:hover {
+    border-color: var(--primary-light) !important;
+}
+/* Radio and checkbox styling */
+.gradio-radio, .gradio-checkbox {
+    background-color: var(--card-bg) !important;
+    border-radius: var(--border-radius) !important;
+    padding: 12px !important;
+    margin-bottom: 16px !important;
+    transition: var(--transition) !important;
+    border: 2px solid var(--input-bg) !important;
+}
+.gradio-radio:hover, .gradio-checkbox:hover {
+    border-color: var(--primary-light) !important;
+}
+.gradio-radio input[type="radio"] + label {
+    padding: 8px 12px !important;
+    border-radius: 20px !important;
+    margin-right: 8px !important;
+    background-color: var(--input-bg) !important;
+    transition: var(--transition) !important;
+}
+.gradio-radio input[type="radio"]:checked + label {
+    background-color: var(--primary) !important;
+    color: white !important;
+}
+/* Custom spacing and layout */
+.gradio-row {
+    margin-bottom: 24px !important;
+}
+.option-row {
+    display: flex !important;
+    gap: 16px !important;
+    margin-bottom: 16px !important;
+}
+/* Card-like sections */
+.card-section {
+    background-color: var(--card-bg) !important;
+    border-radius: var(--border-radius) !important;
+    padding: 20px !important;
+    margin-bottom: 24px !important;
+    box-shadow: var(--shadow) !important;
+    border: 1px solid rgba(0, 0, 0, 0.05) !important;
+}
+/* Search box styling */
+.search-box input {
+    border-radius: var(--border-radius) !important;
+    border: 2px solid var(--input-bg) !important;
+    padding: 12px 20px !important;
+    box-shadow: var(--shadow) !important;
+    transition: var(--transition) !important;
+}
+.search-box input:focus {
+    border-color: var(--primary) !important;
+    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
+}
+/* Model name textbox specific styling */
+.model-name-textbox {
+    border: 2px solid var(--input-bg) !important;
+    border-radius: var(--border-radius) !important;
+    transition: var(--transition) !important;
+}
+.model-name-textbox:focus-within {
+    border-color: var(--primary) !important;
+    box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.3) !important;
+}
+/* Success, warning and error boxes */
+.success-box, .warning-box, .error-box {
+    border-radius: var(--border-radius) !important;
+    padding: 20px !important;
+    margin: 20px 0 !important;
+    box-shadow: var(--shadow) !important;
+    animation: fadeIn 0.5s ease-in-out;
+}
+.success-box {
+    background-color: rgba(16, 185, 129, 0.1) !important;
+    border: 2px solid var(--success) !important;
+}
+.warning-box {
+    background-color: rgba(245, 158, 11, 0.1) !important;
+    border: 2px solid var(--warning) !important;
+}
+.error-box {
+    background-color: rgba(239, 68, 68, 0.1) !important;
+    border: 2px solid var(--error) !important;
+}
+/* Model link styling */
+.model-link {
+    display: inline-block !important;
+    background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
+    color: white !important;
+    text-decoration: none !important;
+    padding: 12px 24px !important;
+    border-radius: var(--border-radius) !important;
+    font-weight: 600 !important;
+    margin-top: 16px !important;
+    box-shadow: 0 4px 6px -1px rgba(99, 102, 241, 0.4) !important;
+    transition: var(--transition) !important;
+}
+.model-link:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 15px -3px rgba(99, 102, 241, 0.5) !important;
+}
+/* Instructions section */
+.instructions-container {
+    background-color: rgba(99, 102, 241, 0.05) !important;
+    border-left: 4px solid var(--primary) !important;
+    padding: 16px !important;
+    margin: 24px 0 !important;
+    border-radius: 0 var(--border-radius) var(--border-radius) 0 !important;
+}
+/* Animations */
+@keyframes fadeIn {
+    from { opacity: 0; transform: translateY(10px); }
+    to { opacity: 1; transform: translateY(0); }
+}
+/* Responsive adjustments */
+@media (max-width: 768px) {
+    .option-row {
+        flex-direction: column !important;
+    }
+}
+/* Add a nice gradient splash to the app */
+.gradio-container::before {
+    content: "";
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 10px;
+    background: linear-gradient(90deg, var(--primary), var(--accent));
+    z-index: 100;
+}
+/* Stylish header */
+.app-header {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    margin-bottom: 2rem;
+    position: relative;
+}
+.app-header::after {
+    content: "";
+    position: absolute;
+    bottom: -10px;
+    left: 50%;
+    transform: translateX(-50%);
+    width: 80px;
+    height: 4px;
+    background: linear-gradient(90deg, var(--primary), var(--accent));
+    border-radius: 2px;
+}
+/* Section headers */
+.section-header {
+    display: flex;
+    align-items: center;
+    margin-bottom: 1rem;
+}
+.section-header::before {
+    content: "⚙️";
+    margin-right: 8px;
+    font-size: 1.25rem;
+}
+/* Quantize button special styling */
+#quantize-button {
+    background: linear-gradient(135deg, var(--primary), var(--accent)) !important;
+    color: white !important;
+    padding: 16px 32px !important;
+    font-size: 1.1rem !important;
+    font-weight: 700 !important;
+    border: none !important;
+    border-radius: var(--border-radius) !important;
+    box-shadow: 0 4px 15px -3px rgba(99, 102, 241, 0.5) !important;
+    transition: all 0.3s cubic-bezier(0.25, 0.8, 0.25, 1) !important;
+    position: relative;
+    overflow: hidden;
+}
+#quantize-button:hover {
+    transform: translateY(-3px) !important;
+    box-shadow: 0 7px 20px -2px rgba(99, 102, 241, 0.6) !important;
+}
+#quantize-button::after {
+    content: "";
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(rgba(255, 255, 255, 0.2), rgba(255, 255, 255, 0));
+    transform: translateY(-100%);
+    transition: transform 0.6s cubic-bezier(0.25, 0.8, 0.25, 1);
+}
+#quantize-button:hover::after {
+    transform: translateY(0);
+}
+"""
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald"), css=css) as demo:
+    with gr.Column(elem_classes="main-container"):
+        with gr.Row(elem_classes="app-header"):
+            gr.Markdown(
+                """
+                <h1 style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;"> 🤗 BitsAndBytes Model Quantizer</h1>
+                <div style="text-align: center; margin-bottom: 1rem; font-size: 1.2rem; color: #4b5563;">
+                 Welcome to the BitsAndBytes Model Quantizer!
+                </div>
+                """
+            )
+        gr.LoginButton(elem_id="login-button", elem_classes="login-button")
+        welcome_msg = gr.Markdown(elem_classes="welcome-message")
+        demo.load(hello, inputs=None, outputs=welcome_msg)
+        instructions = gr.Markdown(
+            """
+            <div class="instructions-container">
+                <h3>📋 Instructions</h3>
+                <ol>
+                    <li>Login to your HuggingFace account</li>
+                    <li>Enter the name of the Hugging Face LLM model you want to quantize</li>
+                    <li>Configure quantization settings based on your needs</li>
+                    <li>Optionally, specify a custom name for the quantized model</li>
+                    <li>Click "Quantize Model" to start the process</li>
+                </ol>
+                <p><strong>Note:</strong> Processing time depends on model size and your hardware. Check container logs for progress!</p>
+            </div>
+            """,
+            visible=False
+        )
+        instructions_visible = gr.State(False)
+        toggle_button = gr.Button("▼ Show Instructions", elem_id="toggle-button", elem_classes="toggle-button")
+        def toggle_instructions(instructions_visible):
+            new_visibility = not instructions_visible
+            new_label = "▲ Hide Instructions" if new_visibility else "▼ Show Instructions"
+            return gr.update(visible=new_visibility), new_visibility, gr.update(value=new_label)
+        toggle_button.click(toggle_instructions, instructions_visible, [instructions, instructions_visible, toggle_button])
+        with gr.Row(elem_classes="app-content"):
+            with gr.Column(scale=1, elem_classes="card-section"):
+                with gr.Row(elem_classes="search-section"):
+                    model_name = HuggingfaceHubSearch(
+                        label="🔍 Select Model",
+                        placeholder="  Search for model on Huggingface Hub...",
+                        search_type="model",
+                        elem_classes="search-box"
+                    )
+                with gr.Row(elem_classes="section-header"):
+                    gr.Markdown("### Quantization Settings")
+                with gr.Column(elem_classes="settings-group"):
+                    gr.Markdown("**Quantization Type**", elem_classes="setting-label")
+                    quant_type_4 = gr.Dropdown(
+                        choices=["fp4", "nf4"],
+                        value="fp4",
+                        label="Format",
+                        info="The quantization data type in bnb.nn.Linear4Bit layers",
+                        show_label=False
+                    )
+                    gr.Markdown("**Compute Settings**", elem_classes="setting-label")
+                    compute_type_4 = gr.Dropdown(
+                        choices=["float16", "bfloat16", "float32"],
+                        value="float32",
+                        label="Compute Type",
+                        info="The compute dtype for matrix multiplication"
+                    )
+                    quant_storage_4 = gr.Dropdown(
+                        choices=["float16", "float32", "int8", "uint8", "bfloat16"],
+                        value="uint8",
+                        label="Storage Type",
+                        info="The storage type for quantized weights"
+                    )
+                    gr.Markdown("**Double Quantization**", elem_classes="setting-label")
+                    double_quant_4 = gr.Radio(
+                        ["False", "True"],
+                        label="Use Double Quantization",
+                        info="Further compress model size with nested quantization",
+                        value="False",
+                    )
+                with gr.Row(elem_classes="section-header"):
+                    gr.Markdown("### Output Settings")
+                with gr.Column(elem_classes="settings-group"):
+                    quantized_model_name = gr.Textbox(
+                        label="Custom Model Name (Optional)",
+                        info="Leave blank to use default naming convention",
+                        placeholder="my-quantized-model",
+                        elem_classes="model-name-textbox"
+                    )
+                    public = gr.Checkbox(
+                        label="Make model public",
+                        info="If checked, your model will be publicly accessible on Hugging Face Hub",
+                        value=False,
+                    )
+            with gr.Column(scale=1, elem_classes="card-section"):
+                with gr.Row():
+                    gr.Markdown("""
+                    ### 📊 Quantization Benefits
+                    <div style="background-color: rgba(99, 102, 241, 0.05); padding: 12px; border-radius: 8px; margin-bottom: 16px;">
+                        <p><strong>⚡ Lower Memory Usage:</strong> Reduce model size by up to 75%</p>
+                        <p><strong>🚀 Faster Inference:</strong> Achieve better performance on resource-constrained hardware</p>
+                        <p><strong>💻 Wider Compatibility:</strong> Run models on devices with limited VRAM</p>
+                    </div>
+                    ### 🔧 Configuration Guide
+                    <div style="background-color: rgba(16, 185, 129, 0.05); padding: 12px; border-radius: 8px;">
+                        <p><strong>Quantization Type:</strong></p>
+                        <ul>
+                            <li><code>fp4</code> - 4-bit floating point (better for most cases)</li>
+                            <li><code>nf4</code> - normalized float format (better for specific models)</li>
+                        </ul>
+                        <p><strong>Double Quantization:</strong> Enable for additional compression with minimal quality loss</p>
+                    </div>
+                    """)
+                with gr.Row():
+                    quantize_button = gr.Button("🚀 Quantize Model", variant="primary", elem_id="quantize-button")
+                output_link = gr.HTML(label="Results", elem_classes="results-container")
+        # Add interactive footer with links
+        gr.Markdown("""
+        <div style="margin-top: 2rem; text-align: center; padding: 1rem; border-top: 1px solid rgba(99, 102, 241, 0.2);">
+            <p>Powered by <a href="https://huggingface.co/" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">Hugging Face</a> and <a href="https://github.com/TimDettmers/bitsandbytes" target="_blank" style="color: var(--primary); text-decoration: none; font-weight: 600;">BitsAndBytes</a></p>
+        </div>
+        """)
+    quantize_button.click(
+        fn=quantize_and_save,
+        inputs=[model_name, quant_type_4, double_quant_4, compute_type_4, quant_storage_4, quantized_model_name, public],
+        outputs=[output_link]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -2,4 +2,4 @@ transformers
 accelerate
 huggingface-hub
 gradio-huggingfacehub-search
-https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl

 accelerate
 huggingface-hub
 gradio-huggingfacehub-search
+bitsandbytes