burtenshaw
commited on
Commit
Β·
ec60e9a
1
Parent(s):
afaf730
use percentile boundaries in app
Browse files- app.py +40 -25
- default.jpg +0 -0
- default.png +0 -0
- images/empty.png +0 -0
- images/space.png +0 -0
- percentiles.json +1 -0
- pyproject.toml +3 -0
- stats_dataset.ipynb +166 -0
app.py
CHANGED
|
@@ -3,13 +3,28 @@ import gradio as gr
|
|
| 3 |
from urllib.parse import urlencode
|
| 4 |
import os
|
| 5 |
from datetime import datetime
|
|
|
|
| 6 |
|
| 7 |
# Load environment variables
|
| 8 |
|
| 9 |
DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def create_image(stats, username):
|
|
@@ -19,39 +34,39 @@ def create_image(stats, username):
|
|
| 19 |
dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
|
| 20 |
space_activity = total_stats["Space Likes"]
|
| 21 |
|
| 22 |
-
# Calculate percentiles based on
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
)
|
| 27 |
-
dataset_percentile = round(
|
| 28 |
-
(top_items["Top Dataset"]["likes"] / MAX_DATASET_ACTIVITY) * 100, 2
|
| 29 |
-
)
|
| 30 |
-
space_percentile = round(
|
| 31 |
-
(top_items["Top Space"]["likes"] / MAX_SPACE_ACTIVITY) * 100, 2
|
| 32 |
-
)
|
| 33 |
|
| 34 |
-
# Choose base image URL based on highest activity
|
| 35 |
-
# check if no activity in any category
|
| 36 |
-
# if everything is 0, we show the empty image
|
| 37 |
if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
|
| 38 |
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
| 39 |
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
| 40 |
elif model_activity >= max(dataset_activity, space_activity):
|
| 41 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
| 42 |
-
avatar = f"Model Pro (
|
|
|
|
|
|
|
| 43 |
elif dataset_activity >= max(model_activity, space_activity):
|
| 44 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
| 45 |
-
avatar = f"Dataset Guru (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
else:
|
| 47 |
-
url = "https://hub-recap.imglab-cdn.net/images/
|
| 48 |
-
avatar =
|
| 49 |
|
| 50 |
# Build text content with proper formatting
|
| 51 |
text_parts = []
|
| 52 |
|
| 53 |
text_parts.append(
|
| 54 |
-
f'<span size="11pt" weight="bold">Hugging Face
|
| 55 |
)
|
| 56 |
text_parts.append("") # Empty line for spacing
|
| 57 |
|
|
@@ -117,7 +132,7 @@ def create_image(stats, username):
|
|
| 117 |
|
| 118 |
# Update the avatar message with percentile
|
| 119 |
text_parts.append("") # Empty line for spacing
|
| 120 |
-
text_parts.append(f'<span size="9pt">You are a {avatar}
|
| 121 |
|
| 122 |
# Add additional percentile info if other categories are significant
|
| 123 |
other_percentiles = []
|
|
|
|
| 3 |
from urllib.parse import urlencode
|
| 4 |
import os
|
| 5 |
from datetime import datetime
|
| 6 |
+
import json
|
| 7 |
|
| 8 |
# Load environment variables
|
| 9 |
|
| 10 |
DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
|
| 11 |
+
|
| 12 |
+
# Load percentiles data
|
| 13 |
+
with open("percentiles.json") as f:
|
| 14 |
+
PERCENTILES = json.load(f)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_percentile_rank(likes, category):
|
| 18 |
+
if likes == 0:
|
| 19 |
+
return 0
|
| 20 |
+
percentiles = PERCENTILES[f"{category}_percentiles"]
|
| 21 |
+
if likes >= percentiles["p_99999"]:
|
| 22 |
+
return 99.999
|
| 23 |
+
elif likes >= percentiles["p_9999"]:
|
| 24 |
+
return 99.99
|
| 25 |
+
elif likes >= percentiles["p_999"]:
|
| 26 |
+
return 99.9
|
| 27 |
+
return 0
|
| 28 |
|
| 29 |
|
| 30 |
def create_image(stats, username):
|
|
|
|
| 34 |
dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
|
| 35 |
space_activity = total_stats["Space Likes"]
|
| 36 |
|
| 37 |
+
# Calculate percentiles based on likes
|
| 38 |
+
model_percentile = get_percentile_rank(total_stats["Model Likes"], "model")
|
| 39 |
+
dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset")
|
| 40 |
+
space_percentile = get_percentile_rank(space_activity, "space")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
# Choose base image URL based on highest activity (keep using activity for image selection)
|
|
|
|
|
|
|
| 43 |
if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
|
| 44 |
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
| 45 |
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
| 46 |
elif model_activity >= max(dataset_activity, space_activity):
|
| 47 |
+
url = "https://hub-recap.imglab-cdn.net/images/model.png"
|
| 48 |
+
avatar = f"Model Pro" + (
|
| 49 |
+
f" (top {model_percentile}%)" if model_percentile > 0 else ""
|
| 50 |
+
)
|
| 51 |
elif dataset_activity >= max(model_activity, space_activity):
|
| 52 |
+
url = "https://hub-recap.imglab-cdn.net/images/dataset.png"
|
| 53 |
+
avatar = f"Dataset Guru" + (
|
| 54 |
+
f" (top {dataset_percentile}%)" if dataset_percentile > 0 else ""
|
| 55 |
+
)
|
| 56 |
+
elif space_activity >= max(model_activity, dataset_activity):
|
| 57 |
+
url = "https://hub-recap.imglab-cdn.net/images/space.png"
|
| 58 |
+
avatar = f"Space Artiste" + (
|
| 59 |
+
f" (top {space_percentile}%)" if space_percentile > 0 else ""
|
| 60 |
+
)
|
| 61 |
else:
|
| 62 |
+
url = "https://hub-recap.imglab-cdn.net/images/empty.png"
|
| 63 |
+
avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?"
|
| 64 |
|
| 65 |
# Build text content with proper formatting
|
| 66 |
text_parts = []
|
| 67 |
|
| 68 |
text_parts.append(
|
| 69 |
+
f'<span size="11pt" weight="bold">Hugging Face β€οΈ {username} in 2024</span>'
|
| 70 |
)
|
| 71 |
text_parts.append("") # Empty line for spacing
|
| 72 |
|
|
|
|
| 132 |
|
| 133 |
# Update the avatar message with percentile
|
| 134 |
text_parts.append("") # Empty line for spacing
|
| 135 |
+
text_parts.append(f'<span size="9pt">You are a {avatar}!</span>')
|
| 136 |
|
| 137 |
# Add additional percentile info if other categories are significant
|
| 138 |
other_percentiles = []
|
default.jpg
DELETED
|
Binary file (247 kB)
|
|
|
default.png
ADDED
|
images/empty.png
CHANGED
|
|
images/space.png
CHANGED
|
|
percentiles.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"dataset_percentiles": {"p_99999": 1299, "p_9999": 491, "p_999": 125}, "model_percentiles": {"p_99999": 3698, "p_9999": 949, "p_999": 143}, "space_percentiles": {"p_99999": 6040, "p_9999": 1552, "p_999": 326}}
|
pyproject.toml
CHANGED
|
@@ -5,6 +5,9 @@ description = "Add your description here"
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
|
|
|
| 8 |
"gradio>=5.9.1",
|
|
|
|
|
|
|
| 9 |
"requests>=2.32.3",
|
| 10 |
]
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
| 8 |
+
"datasets>=3.2.0",
|
| 9 |
"gradio>=5.9.1",
|
| 10 |
+
"ipykernel>=6.29.5",
|
| 11 |
+
"pandas>=2.2.3",
|
| 12 |
"requests>=2.32.3",
|
| 13 |
]
|
stats_dataset.ipynb
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 2,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stderr",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"/Users/ben/code/hub-recap/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
| 14 |
+
]
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"source": [
|
| 18 |
+
"from datasets import load_dataset\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"datasets\")\n",
|
| 21 |
+
"ds_df = ds[\"train\"].to_pandas()"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 3,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"models\")\n",
|
| 31 |
+
"md_df = ds[\"train\"].to_pandas()"
|
| 32 |
+
]
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"cell_type": "code",
|
| 36 |
+
"execution_count": 30,
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"outputs": [
|
| 39 |
+
{
|
| 40 |
+
"name": "stderr",
|
| 41 |
+
"output_type": "stream",
|
| 42 |
+
"text": [
|
| 43 |
+
"Generating train split: 100%|ββββββββββ| 309714/309714 [00:00<00:00, 353713.86 examples/s]\n"
|
| 44 |
+
]
|
| 45 |
+
}
|
| 46 |
+
],
|
| 47 |
+
"source": [
|
| 48 |
+
"ds = load_dataset(\"cfahlgren1/hub-stats\", \"spaces\")\n",
|
| 49 |
+
"sp_df = ds[\"train\"].to_pandas()"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"cell_type": "code",
|
| 54 |
+
"execution_count": 40,
|
| 55 |
+
"metadata": {},
|
| 56 |
+
"outputs": [
|
| 57 |
+
{
|
| 58 |
+
"name": "stdout",
|
| 59 |
+
"output_type": "stream",
|
| 60 |
+
"text": [
|
| 61 |
+
"{'p_99999': 1299, 'p_9999': 491, 'p_999': 125}\n"
|
| 62 |
+
]
|
| 63 |
+
}
|
| 64 |
+
],
|
| 65 |
+
"source": [
|
| 66 |
+
"dataset_percentiles = {\n",
|
| 67 |
+
" \"p_99999\": int(ds_df[\"likes\"].quantile(0.99999)),\n",
|
| 68 |
+
" \"p_9999\": int(ds_df[\"likes\"].quantile(0.9999)),\n",
|
| 69 |
+
" \"p_999\": int(ds_df[\"likes\"].quantile(0.999)),\n",
|
| 70 |
+
"}\n",
|
| 71 |
+
"print(dataset_percentiles)"
|
| 72 |
+
]
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"cell_type": "code",
|
| 76 |
+
"execution_count": 41,
|
| 77 |
+
"metadata": {},
|
| 78 |
+
"outputs": [
|
| 79 |
+
{
|
| 80 |
+
"name": "stdout",
|
| 81 |
+
"output_type": "stream",
|
| 82 |
+
"text": [
|
| 83 |
+
"{'p_99999': 3698, 'p_9999': 949, 'p_999': 143}\n"
|
| 84 |
+
]
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
"source": [
|
| 88 |
+
"model_percentiles = {\n",
|
| 89 |
+
" \"p_99999\": int(md_df[\"likes\"].quantile(0.99999)),\n",
|
| 90 |
+
" \"p_9999\": int(md_df[\"likes\"].quantile(0.9999)),\n",
|
| 91 |
+
" \"p_999\": int(md_df[\"likes\"].quantile(0.999)),\n",
|
| 92 |
+
"}\n",
|
| 93 |
+
"print(model_percentiles)"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": 42,
|
| 99 |
+
"metadata": {},
|
| 100 |
+
"outputs": [
|
| 101 |
+
{
|
| 102 |
+
"name": "stdout",
|
| 103 |
+
"output_type": "stream",
|
| 104 |
+
"text": [
|
| 105 |
+
"{'p_99999': 6040, 'p_9999': 1552, 'p_999': 326}\n"
|
| 106 |
+
]
|
| 107 |
+
}
|
| 108 |
+
],
|
| 109 |
+
"source": [
|
| 110 |
+
"space_percentiles = {\n",
|
| 111 |
+
" \"p_99999\": int(sp_df[\"likes\"].quantile(0.99999)),\n",
|
| 112 |
+
" \"p_9999\": int(sp_df[\"likes\"].quantile(0.9999)),\n",
|
| 113 |
+
" \"p_999\": int(sp_df[\"likes\"].quantile(0.999)),\n",
|
| 114 |
+
"}\n",
|
| 115 |
+
"print(space_percentiles)"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": 43,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"outputs": [],
|
| 123 |
+
"source": [
|
| 124 |
+
"import json\n",
|
| 125 |
+
"\n",
|
| 126 |
+
"with open(\"percentiles.json\", \"w\") as f:\n",
|
| 127 |
+
" json.dump(\n",
|
| 128 |
+
" {\n",
|
| 129 |
+
" \"dataset_percentiles\": dataset_percentiles,\n",
|
| 130 |
+
" \"model_percentiles\": model_percentiles,\n",
|
| 131 |
+
" \"space_percentiles\": space_percentiles,\n",
|
| 132 |
+
" },\n",
|
| 133 |
+
" f,\n",
|
| 134 |
+
" )"
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": []
|
| 143 |
+
}
|
| 144 |
+
],
|
| 145 |
+
"metadata": {
|
| 146 |
+
"kernelspec": {
|
| 147 |
+
"display_name": ".venv",
|
| 148 |
+
"language": "python",
|
| 149 |
+
"name": "python3"
|
| 150 |
+
},
|
| 151 |
+
"language_info": {
|
| 152 |
+
"codemirror_mode": {
|
| 153 |
+
"name": "ipython",
|
| 154 |
+
"version": 3
|
| 155 |
+
},
|
| 156 |
+
"file_extension": ".py",
|
| 157 |
+
"mimetype": "text/x-python",
|
| 158 |
+
"name": "python",
|
| 159 |
+
"nbconvert_exporter": "python",
|
| 160 |
+
"pygments_lexer": "ipython3",
|
| 161 |
+
"version": "3.11.10"
|
| 162 |
+
}
|
| 163 |
+
},
|
| 164 |
+
"nbformat": 4,
|
| 165 |
+
"nbformat_minor": 2
|
| 166 |
+
}
|