Spaces:
Runtime error
Runtime error
a100 kh
commited on
Commit
·
ffb63fa
1
Parent(s):
22f931e
granite
Browse files- api_endpoints.json +13 -0
- local/hf_upload.py +9 -0
- local/local_setup +10 -0
- local/nginx +4 -3
api_endpoints.json
CHANGED
|
@@ -245,5 +245,18 @@
|
|
| 245 |
},
|
| 246 |
"text-arena": true,
|
| 247 |
"vision-arena": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
}
|
| 249 |
}
|
|
|
|
| 245 |
},
|
| 246 |
"text-arena": true,
|
| 247 |
"vision-arena": false
|
| 248 |
+
},
|
| 249 |
+
"ibm-granite/granite-3.0-8b-instruct-fp8": {
|
| 250 |
+
"model_name": "ibm-granite/granite-3.0-8b-instruct",
|
| 251 |
+
"api_type": "openai-custom-tanuki",
|
| 252 |
+
"api_end": "granite-8B",
|
| 253 |
+
"env_api_key": "VLLM_API_KEY",
|
| 254 |
+
"anony_only": false,
|
| 255 |
+
"recommended_config": {
|
| 256 |
+
"temperature": 0.7,
|
| 257 |
+
"top_p": 1.0
|
| 258 |
+
},
|
| 259 |
+
"text-arena": true,
|
| 260 |
+
"vision-arena": false
|
| 261 |
}
|
| 262 |
}
|
local/hf_upload.py
CHANGED
|
@@ -21,6 +21,15 @@ data["full"]["elo_rating_final"]
|
|
| 21 |
|
| 22 |
df2 = data["full"]["leaderboard_table_df"]
|
| 23 |
df2 = df2.sort_values("rating", ascending=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
ds2 = datasets.Dataset.from_pandas(df2)
|
| 25 |
ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
|
| 26 |
|
|
|
|
| 21 |
|
| 22 |
df2 = data["full"]["leaderboard_table_df"]
|
| 23 |
df2 = df2.sort_values("rating", ascending=False)
|
| 24 |
+
|
| 25 |
+
# index名がdrop_listに含まれる行を削除
|
| 26 |
+
drop_list = [
|
| 27 |
+
"weblab-GENIAC/Tanuki-8x8B-dpo-v1.0",
|
| 28 |
+
"Llama-3.1-Swallow-70B-v0.1.Q8_0.gguf",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
df2 = df2.reset_index()
|
| 32 |
+
df2 = df2[~df2["index"].isin(drop_list)]
|
| 33 |
ds2 = datasets.Dataset.from_pandas(df2)
|
| 34 |
ds2.push_to_hub("kanhatakeyama/chatbot-arena-ja-elo-rating")
|
| 35 |
|
local/local_setup
CHANGED
|
@@ -39,6 +39,16 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
|
|
| 39 |
export CUDA_VISIBLE_DEVICES=1
|
| 40 |
python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
#########################
|
| 43 |
#launch ngrok
|
| 44 |
ngrok http http://localhost:8765
|
|
|
|
| 39 |
export CUDA_VISIBLE_DEVICES=1
|
| 40 |
python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
|
| 41 |
|
| 42 |
+
###################
|
| 43 |
+
#server2
|
| 44 |
+
export CUDA_VISIBLE_DEVICES=0
|
| 45 |
+
python -m vllm.entrypoints.openai.api_server --model ibm-granite/granite-3.0-8b-instruct --max-model-len 4096 \
|
| 46 |
+
--port 8020 --gpu-memory-utilization 0.4 --trust-remote-code \
|
| 47 |
+
--quantization fp8 \
|
| 48 |
+
--api-key $VLLM_API_KEY
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|
| 52 |
#########################
|
| 53 |
#launch ngrok
|
| 54 |
ngrok http http://localhost:8765
|
local/nginx
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
|
| 5 |
|
| 6 |
server {
|
|
@@ -11,5 +11,6 @@ location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
|
|
| 11 |
location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
|
| 12 |
location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
|
| 13 |
location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
|
|
|
|
| 14 |
}
|
| 15 |
|
|
|
|
| 1 |
+
sudo vi /etc/nginx/sites-available/default
|
| 2 |
+
sudo systemctl stop nginx
|
| 3 |
+
sudo systemctl restart nginx
|
| 4 |
|
| 5 |
|
| 6 |
server {
|
|
|
|
| 11 |
location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
|
| 12 |
location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
|
| 13 |
location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
|
| 14 |
+
location /granite-8B/ {proxy_pass http://192.168.11.2:8020/v1/;}
|
| 15 |
}
|
| 16 |
|