Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			A10G
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			A10G
	Split/shard support
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -28,11 +28,51 @@ def script_to_use(model_id, api): 
     | 
|
| 28 | 
         
             
                arch = arch[0]
         
     | 
| 29 | 
         
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         
     | 
| 30 | 
         | 
| 31 | 
         
            -
            def  
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 32 | 
         
             
                if oauth_token.token is None:
         
     | 
| 33 | 
         
             
                    raise ValueError("You must be logged in to use GGUF-my-repo")
         
     | 
| 34 | 
         
             
                model_name = model_id.split('/')[-1]
         
     | 
| 35 | 
         
            -
                fp16 = f"{model_name} 
     | 
| 36 | 
         | 
| 37 | 
         
             
                try:
         
     | 
| 38 | 
         
             
                    api = HfApi(token=oauth_token.token)
         
     | 
| 
         @@ -54,7 +94,9 @@ def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken | 
     | 
|
| 54 | 
         
             
                    dl_pattern += pattern
         
     | 
| 55 | 
         | 
| 56 | 
         
             
                    api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
         
     | 
| 57 | 
         
            -
                    print("Model downloaded  
     | 
| 
         | 
|
| 
         | 
|
| 58 | 
         | 
| 59 | 
         
             
                    conversion_script = script_to_use(model_id, api)
         
     | 
| 60 | 
         
             
                    fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
         
     | 
| 
         @@ -62,17 +104,21 @@ def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken | 
     | 
|
| 62 | 
         
             
                    print(result)
         
     | 
| 63 | 
         
             
                    if result.returncode != 0:
         
     | 
| 64 | 
         
             
                        raise Exception(f"Error converting to fp16: {result.stderr}")
         
     | 
| 65 | 
         
            -
                    print("Model converted to fp16  
     | 
| 
         | 
|
| 66 | 
         | 
| 67 | 
         
            -
                     
     | 
| 68 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 69 | 
         
             
                    result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         
     | 
| 70 | 
         
             
                    if result.returncode != 0:
         
     | 
| 71 | 
         
             
                        raise Exception(f"Error quantizing: {result.stderr}")
         
     | 
| 72 | 
         
            -
                    print(" 
     | 
| 
         | 
|
| 73 | 
         | 
| 74 | 
         
             
                    # Create empty repo
         
     | 
| 75 | 
         
            -
                    new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
         
     | 
| 76 | 
         
             
                    new_repo_id = new_repo_url.repo_id
         
     | 
| 77 | 
         
             
                    print("Repo created successfully!", new_repo_url)
         
     | 
| 78 | 
         | 
| 
         @@ -90,50 +136,49 @@ def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken | 
     | 
|
| 90 | 
         
             
                        This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
         
     | 
| 91 | 
         
             
                        Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
         
     | 
| 92 | 
         
             
                        ## Use with llama.cpp
         
     | 
| 93 | 
         
            -
             
     | 
| 94 | 
         
             
                        Install llama.cpp through brew.
         
     | 
| 95 | 
         
            -
             
     | 
| 96 | 
         
             
                        ```bash
         
     | 
| 97 | 
         
             
                        brew install ggerganov/ggerganov/llama.cpp
         
     | 
| 98 | 
         
             
                        ```
         
     | 
| 99 | 
         
             
                        Invoke the llama.cpp server or the CLI.
         
     | 
| 100 | 
         
            -
             
     | 
| 101 | 
         
             
                        CLI:
         
     | 
| 102 | 
         
            -
             
     | 
| 103 | 
         
             
                        ```bash
         
     | 
| 104 | 
         
            -
                        llama-cli --hf-repo {new_repo_id} --model { 
     | 
| 105 | 
         
             
                        ```
         
     | 
| 106 | 
         
            -
             
     | 
| 107 | 
         
             
                        Server:
         
     | 
| 108 | 
         
            -
             
     | 
| 109 | 
         
             
                        ```bash
         
     | 
| 110 | 
         
            -
                        llama-server --hf-repo {new_repo_id} --model { 
     | 
| 111 | 
         
             
                        ```
         
     | 
| 112 | 
         
            -
             
     | 
| 113 | 
         
             
                        Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
         
     | 
| 114 | 
         
            -
             
     | 
| 115 | 
         
             
                        ```
         
     | 
| 116 | 
         
            -
                        git clone https://github.com/ggerganov/llama.cpp &&  
     | 
| 117 | 
         
            -
                        cd llama.cpp &&  
     | 
| 118 | 
         
            -
                        make &&  
     | 
| 119 | 
         
            -
                        ./main -m { 
     | 
| 120 | 
         
             
                        ```
         
     | 
| 121 | 
         
             
                        """
         
     | 
| 122 | 
         
             
                    )
         
     | 
| 123 | 
         
            -
                    card.save( 
     | 
| 124 | 
         
            -
             
     | 
| 125 | 
         
            -
                     
     | 
| 126 | 
         
            -
                         
     | 
| 127 | 
         
            -
             
     | 
| 128 | 
         
            -
                         
     | 
| 129 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 130 | 
         | 
| 131 | 
         
             
                    api.upload_file(
         
     | 
| 132 | 
         
            -
                        path_or_fileobj=f" 
     | 
| 133 | 
         
            -
                        path_in_repo="README.md",
         
     | 
| 134 | 
         
             
                        repo_id=new_repo_id,
         
     | 
| 135 | 
         
             
                    )
         
     | 
| 136 | 
         
            -
                    print("Uploaded successfully!")
         
     | 
| 137 | 
         | 
| 138 | 
         
             
                    return (
         
     | 
| 139 | 
         
             
                        f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
         
     | 
| 
         @@ -147,38 +192,75 @@ def process_model(model_id, q_method, private_repo, oauth_token: gr.OAuthToken | 
     | 
|
| 147 | 
         | 
| 148 | 
         | 
| 149 | 
         
             
            # Create Gradio interface
         
     | 
| 150 | 
         
            -
            iface = gr.Interface(
         
     | 
| 151 | 
         
            -
                fn=process_model,
         
     | 
| 152 | 
         
            -
                inputs=[
         
     | 
| 153 | 
         
            -
                    HuggingfaceHubSearch(
         
     | 
| 154 | 
         
            -
                        label="Hub Model ID",
         
     | 
| 155 | 
         
            -
                        placeholder="Search for model id on Huggingface",
         
     | 
| 156 | 
         
            -
                        search_type="model",
         
     | 
| 157 | 
         
            -
                    ),
         
     | 
| 158 | 
         
            -
                    gr.Dropdown(
         
     | 
| 159 | 
         
            -
                        ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
         
     | 
| 160 | 
         
            -
                        label="Quantization Method",
         
     | 
| 161 | 
         
            -
                        info="GGML quantisation type",
         
     | 
| 162 | 
         
            -
                        value="Q4_K_M",
         
     | 
| 163 | 
         
            -
                        filterable=False
         
     | 
| 164 | 
         
            -
                    ),
         
     | 
| 165 | 
         
            -
                    gr.Checkbox(
         
     | 
| 166 | 
         
            -
                        value=False,
         
     | 
| 167 | 
         
            -
                        label="Private Repo",
         
     | 
| 168 | 
         
            -
                        info="Create a private repo under your username."
         
     | 
| 169 | 
         
            -
                    ),
         
     | 
| 170 | 
         
            -
                ],
         
     | 
| 171 | 
         
            -
                outputs=[
         
     | 
| 172 | 
         
            -
                    gr.Markdown(label="output"),
         
     | 
| 173 | 
         
            -
                    gr.Image(show_label=False),
         
     | 
| 174 | 
         
            -
                ],
         
     | 
| 175 | 
         
            -
                title="Create your own GGUF Quants, blazingly fast ⚡!",
         
     | 
| 176 | 
         
            -
                description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace.",
         
     | 
| 177 | 
         
            -
            )
         
     | 
| 178 | 
         
             
            with gr.Blocks() as demo:
         
     | 
| 179 | 
         
             
                gr.Markdown("You must be logged in to use GGUF-my-repo.")
         
     | 
| 180 | 
         
             
                gr.LoginButton(min_width=250)
         
     | 
| 181 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 182 | 
         | 
| 183 | 
         
             
            def restart_space():
         
     | 
| 184 | 
         
             
                HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
         
     | 
| 
         | 
|
| 28 | 
         
             
                arch = arch[0]
         
     | 
| 29 | 
         
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         
     | 
| 30 | 
         | 
| 31 | 
         
            +
            def split_upload_model(model_path, repo_id, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None):
         
     | 
| 32 | 
         
            +
                if oauth_token.token is None:
         
     | 
| 33 | 
         
            +
                    raise ValueError("You have to be logged in.")
         
     | 
| 34 | 
         
            +
                
         
     | 
| 35 | 
         
            +
                split_cmd = f"llama.cpp/gguf-split --split --split-max-tensors {split_max_tensors}"
         
     | 
| 36 | 
         
            +
                if split_max_size:
         
     | 
| 37 | 
         
            +
                    split_cmd += f" --split-max-size {split_max_size}"
         
     | 
| 38 | 
         
            +
                split_cmd += f" {model_path} {model_path.split('.')[0]}"
         
     | 
| 39 | 
         
            +
                
         
     | 
| 40 | 
         
            +
                print(f"Split command: {split_cmd}") 
         
     | 
| 41 | 
         
            +
                
         
     | 
| 42 | 
         
            +
                result = subprocess.run(split_cmd, shell=True, capture_output=True, text=True)
         
     | 
| 43 | 
         
            +
                print(f"Split command stdout: {result.stdout}") 
         
     | 
| 44 | 
         
            +
                print(f"Split command stderr: {result.stderr}") 
         
     | 
| 45 | 
         
            +
                
         
     | 
| 46 | 
         
            +
                if result.returncode != 0:
         
     | 
| 47 | 
         
            +
                    raise Exception(f"Error splitting the model: {result.stderr}")
         
     | 
| 48 | 
         
            +
                print("Model split successfully!")
         
     | 
| 49 | 
         
            +
                 
         
     | 
| 50 | 
         
            +
                
         
     | 
| 51 | 
         
            +
                sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
         
     | 
| 52 | 
         
            +
                if sharded_model_files:
         
     | 
| 53 | 
         
            +
                    print(f"Sharded model files: {sharded_model_files}")
         
     | 
| 54 | 
         
            +
                    api = HfApi(token=oauth_token.token)
         
     | 
| 55 | 
         
            +
                    for file in sharded_model_files:
         
     | 
| 56 | 
         
            +
                        file_path = os.path.join('.', file)
         
     | 
| 57 | 
         
            +
                        print(f"Uploading file: {file_path}")
         
     | 
| 58 | 
         
            +
                        try:
         
     | 
| 59 | 
         
            +
                            api.upload_file(
         
     | 
| 60 | 
         
            +
                                path_or_fileobj=file_path,
         
     | 
| 61 | 
         
            +
                                path_in_repo=file,
         
     | 
| 62 | 
         
            +
                                repo_id=repo_id,
         
     | 
| 63 | 
         
            +
                            )
         
     | 
| 64 | 
         
            +
                        except Exception as e:
         
     | 
| 65 | 
         
            +
                            raise Exception(f"Error uploading file {file_path}: {e}")
         
     | 
| 66 | 
         
            +
                else:
         
     | 
| 67 | 
         
            +
                    raise Exception("No sharded files found.")
         
     | 
| 68 | 
         
            +
                
         
     | 
| 69 | 
         
            +
                print("Sharded model has been uploaded successfully!")
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            def process_model(model_id, q_method, private_repo, split_model, split_max_tensors, split_max_size, oauth_token: gr.OAuthToken | None):
         
     | 
| 72 | 
         
             
                if oauth_token.token is None:
         
     | 
| 73 | 
         
             
                    raise ValueError("You must be logged in to use GGUF-my-repo")
         
     | 
| 74 | 
         
             
                model_name = model_id.split('/')[-1]
         
     | 
| 75 | 
         
            +
                fp16 = f"{model_name}.fp16.gguf"
         
     | 
| 76 | 
         | 
| 77 | 
         
             
                try:
         
     | 
| 78 | 
         
             
                    api = HfApi(token=oauth_token.token)
         
     | 
| 
         | 
|
| 94 | 
         
             
                    dl_pattern += pattern
         
     | 
| 95 | 
         | 
| 96 | 
         
             
                    api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
         
     | 
| 97 | 
         
            +
                    print("Model downloaded successfully!")
         
     | 
| 98 | 
         
            +
                    print(f"Current working directory: {os.getcwd()}")
         
     | 
| 99 | 
         
            +
                    print(f"Model directory contents: {os.listdir(model_name)}")
         
     | 
| 100 | 
         | 
| 101 | 
         
             
                    conversion_script = script_to_use(model_id, api)
         
     | 
| 102 | 
         
             
                    fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
         
     | 
| 
         | 
|
| 104 | 
         
             
                    print(result)
         
     | 
| 105 | 
         
             
                    if result.returncode != 0:
         
     | 
| 106 | 
         
             
                        raise Exception(f"Error converting to fp16: {result.stderr}")
         
     | 
| 107 | 
         
            +
                    print("Model converted to fp16 successfully!")
         
     | 
| 108 | 
         
            +
                    print(f"Converted model path: {fp16}")
         
     | 
| 109 | 
         | 
| 110 | 
         
            +
                    username = whoami(oauth_token.token)["name"]
         
     | 
| 111 | 
         
            +
                    quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}.gguf"
         
     | 
| 112 | 
         
            +
                    quantized_gguf_path = quantized_gguf_name
         
     | 
| 113 | 
         
            +
                    quantise_ggml = f"./llama.cpp/quantize {fp16} {quantized_gguf_path} {q_method}"
         
     | 
| 114 | 
         
             
                    result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         
     | 
| 115 | 
         
             
                    if result.returncode != 0:
         
     | 
| 116 | 
         
             
                        raise Exception(f"Error quantizing: {result.stderr}")
         
     | 
| 117 | 
         
            +
                    print(f"Quantized successfully with {q_method} option!")
         
     | 
| 118 | 
         
            +
                    print(f"Quantized model path: {quantized_gguf_path}")
         
     | 
| 119 | 
         | 
| 120 | 
         
             
                    # Create empty repo
         
     | 
| 121 | 
         
            +
                    new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
         
     | 
| 122 | 
         
             
                    new_repo_id = new_repo_url.repo_id
         
     | 
| 123 | 
         
             
                    print("Repo created successfully!", new_repo_url)
         
     | 
| 124 | 
         | 
| 
         | 
|
| 136 | 
         
             
                        This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
         
     | 
| 137 | 
         
             
                        Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
         
     | 
| 138 | 
         
             
                        ## Use with llama.cpp
         
     | 
| 
         | 
|
| 139 | 
         
             
                        Install llama.cpp through brew.
         
     | 
| 
         | 
|
| 140 | 
         
             
                        ```bash
         
     | 
| 141 | 
         
             
                        brew install ggerganov/ggerganov/llama.cpp
         
     | 
| 142 | 
         
             
                        ```
         
     | 
| 143 | 
         
             
                        Invoke the llama.cpp server or the CLI.
         
     | 
| 
         | 
|
| 144 | 
         
             
                        CLI:
         
     | 
| 
         | 
|
| 145 | 
         
             
                        ```bash
         
     | 
| 146 | 
         
            +
                        llama-cli --hf-repo {new_repo_id} --model {quantized_gguf_name} -p "The meaning to life and the universe is"
         
     | 
| 147 | 
         
             
                        ```
         
     | 
| 
         | 
|
| 148 | 
         
             
                        Server:
         
     | 
| 
         | 
|
| 149 | 
         
             
                        ```bash
         
     | 
| 150 | 
         
            +
                        llama-server --hf-repo {new_repo_id} --model {quantized_gguf_name} -c 2048
         
     | 
| 151 | 
         
             
                        ```
         
     | 
| 
         | 
|
| 152 | 
         
             
                        Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
         
     | 
| 
         | 
|
| 153 | 
         
             
                        ```
         
     | 
| 154 | 
         
            +
                        git clone https://github.com/ggerganov/llama.cpp && \\
         
     | 
| 155 | 
         
            +
                        cd llama.cpp && \\
         
     | 
| 156 | 
         
            +
                        make && \\
         
     | 
| 157 | 
         
            +
                        ./main -m {quantized_gguf_name} -n 128
         
     | 
| 158 | 
         
             
                        ```
         
     | 
| 159 | 
         
             
                        """
         
     | 
| 160 | 
         
             
                    )
         
     | 
| 161 | 
         
            +
                    card.save(f"README.md")
         
     | 
| 162 | 
         
            +
             
     | 
| 163 | 
         
            +
                    if split_model:
         
     | 
| 164 | 
         
            +
                        split_upload_model(quantized_gguf_path, new_repo_id, oauth_token, split_max_tensors, split_max_size)
         
     | 
| 165 | 
         
            +
                    else:
         
     | 
| 166 | 
         
            +
                        try:
         
     | 
| 167 | 
         
            +
                            print(f"Uploading quantized model: {quantized_gguf_path}")
         
     | 
| 168 | 
         
            +
                            api.upload_file(
         
     | 
| 169 | 
         
            +
                                path_or_fileobj=quantized_gguf_path,
         
     | 
| 170 | 
         
            +
                                path_in_repo=quantized_gguf_name,
         
     | 
| 171 | 
         
            +
                                repo_id=new_repo_id,
         
     | 
| 172 | 
         
            +
                            )
         
     | 
| 173 | 
         
            +
                        except Exception as e:
         
     | 
| 174 | 
         
            +
                            raise Exception(f"Error uploading quantized model: {e}")
         
     | 
| 175 | 
         | 
| 176 | 
         
             
                    api.upload_file(
         
     | 
| 177 | 
         
            +
                        path_or_fileobj=f"README.md",
         
     | 
| 178 | 
         
            +
                        path_in_repo=f"README.md",
         
     | 
| 179 | 
         
             
                        repo_id=new_repo_id,
         
     | 
| 180 | 
         
             
                    )
         
     | 
| 181 | 
         
            +
                    print(f"Uploaded successfully with {q_method} option!")
         
     | 
| 182 | 
         | 
| 183 | 
         
             
                    return (
         
     | 
| 184 | 
         
             
                        f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
         
     | 
| 
         | 
|
| 192 | 
         | 
| 193 | 
         | 
| 194 | 
         
             
            # Create Gradio interface
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 195 | 
         
             
            with gr.Blocks() as demo:
         
     | 
| 196 | 
         
             
                gr.Markdown("You must be logged in to use GGUF-my-repo.")
         
     | 
| 197 | 
         
             
                gr.LoginButton(min_width=250)
         
     | 
| 198 | 
         
            +
             
     | 
| 199 | 
         
            +
                model_id_input = HuggingfaceHubSearch(
         
     | 
| 200 | 
         
            +
                    label="Hub Model ID",
         
     | 
| 201 | 
         
            +
                    placeholder="Search for model id on Huggingface",
         
     | 
| 202 | 
         
            +
                    search_type="model",
         
     | 
| 203 | 
         
            +
                )
         
     | 
| 204 | 
         
            +
             
     | 
| 205 | 
         
            +
                q_method_input = gr.Dropdown(
         
     | 
| 206 | 
         
            +
                    ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
         
     | 
| 207 | 
         
            +
                    label="Quantization Method",
         
     | 
| 208 | 
         
            +
                    info="GGML quantization type",
         
     | 
| 209 | 
         
            +
                    value="Q4_K_M",
         
     | 
| 210 | 
         
            +
                    filterable=False
         
     | 
| 211 | 
         
            +
                )
         
     | 
| 212 | 
         
            +
             
     | 
| 213 | 
         
            +
                private_repo_input = gr.Checkbox(
         
     | 
| 214 | 
         
            +
                    value=False,
         
     | 
| 215 | 
         
            +
                    label="Private Repo",
         
     | 
| 216 | 
         
            +
                    info="Create a private repo under your username."
         
     | 
| 217 | 
         
            +
                )
         
     | 
| 218 | 
         
            +
             
     | 
| 219 | 
         
            +
                split_model_input = gr.Checkbox(
         
     | 
| 220 | 
         
            +
                    value=False,
         
     | 
| 221 | 
         
            +
                    label="Split Model",
         
     | 
| 222 | 
         
            +
                    info="Shard the model using gguf-split."
         
     | 
| 223 | 
         
            +
                )
         
     | 
| 224 | 
         
            +
             
     | 
| 225 | 
         
            +
                split_max_tensors_input = gr.Number(
         
     | 
| 226 | 
         
            +
                    value=256,
         
     | 
| 227 | 
         
            +
                    label="Max Tensors per File",
         
     | 
| 228 | 
         
            +
                    info="Maximum number of tensors per file when splitting model.",
         
     | 
| 229 | 
         
            +
                    visible=False
         
     | 
| 230 | 
         
            +
                )
         
     | 
| 231 | 
         
            +
             
     | 
| 232 | 
         
            +
                split_max_size_input = gr.Textbox(
         
     | 
| 233 | 
         
            +
                    label="Max File Size",
         
     | 
| 234 | 
         
            +
                    info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default.",
         
     | 
| 235 | 
         
            +
                    visible=False
         
     | 
| 236 | 
         
            +
                )
         
     | 
| 237 | 
         
            +
             
     | 
| 238 | 
         
            +
                iface = gr.Interface(
         
     | 
| 239 | 
         
            +
                    fn=process_model,
         
     | 
| 240 | 
         
            +
                    inputs=[
         
     | 
| 241 | 
         
            +
                        model_id_input,
         
     | 
| 242 | 
         
            +
                        q_method_input,
         
     | 
| 243 | 
         
            +
                        private_repo_input,
         
     | 
| 244 | 
         
            +
                        split_model_input,
         
     | 
| 245 | 
         
            +
                        split_max_tensors_input,
         
     | 
| 246 | 
         
            +
                        split_max_size_input,
         
     | 
| 247 | 
         
            +
                    ],
         
     | 
| 248 | 
         
            +
                    outputs=[
         
     | 
| 249 | 
         
            +
                        gr.Markdown(label="output"),
         
     | 
| 250 | 
         
            +
                        gr.Image(show_label=False),
         
     | 
| 251 | 
         
            +
                    ],
         
     | 
| 252 | 
         
            +
                    title="Create your own GGUF Quants, blazingly fast ⚡!",
         
     | 
| 253 | 
         
            +
                    description="The space takes an HF repo as an input, quantizes it and creates a Public repo containing the selected quant under your HF user namespace.",
         
     | 
| 254 | 
         
            +
                )
         
     | 
| 255 | 
         
            +
             
     | 
| 256 | 
         
            +
                def update_visibility(split_model):
         
     | 
| 257 | 
         
            +
                    return gr.update(visible=split_model), gr.update(visible=split_model)
         
     | 
| 258 | 
         
            +
             
     | 
| 259 | 
         
            +
                split_model_input.change(
         
     | 
| 260 | 
         
            +
                    fn=update_visibility,
         
     | 
| 261 | 
         
            +
                    inputs=split_model_input,
         
     | 
| 262 | 
         
            +
                    outputs=[split_max_tensors_input, split_max_size_input]
         
     | 
| 263 | 
         
            +
                )
         
     | 
| 264 | 
         | 
| 265 | 
         
             
            def restart_space():
         
     | 
| 266 | 
         
             
                HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
         
     |