diff --git a/activation/impls/artifacts/benchmark/activation.jsonl b/activation/impls/artifacts/benchmark/activation.jsonl index e014dc32b9e6c116e3ee3407f5e64ef8451eee67..86f39aa830b4b5fba6d875e0c328a472550a56cb 100644 --- a/activation/impls/artifacts/benchmark/activation.jsonl +++ b/activation/impls/artifacts/benchmark/activation.jsonl @@ -1,9 +1,9 @@ -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D768", "num_tokens": 128, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.024230000008174102, "p50": 0.024741000004269154, "p90": 0.025410999967334646, "mean": 0.024872599999525846, "iqr": 0.0011599999538702832, "raw_times": [0.024251000013464363, 0.025730000004386966, 0.024230000008174102, 0.025410999967334646, 0.024741000004269154], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03134100001034312, "peak_bytes": 1966080, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D1024", "num_tokens": 128, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.026611000009779673, "p50": 0.029731000040555955, "p90": 0.03027100001418148, "mean": 0.029349000021738902, "iqr": 0.0009999999974752427, "raw_times": [0.026611000009779673, 0.029731000040555955, 0.030861000027471164, 0.03027100001418148, 0.02927100001670624], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.034871000025304966, "peak_bytes": 2621440, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T128_D2048", "num_tokens": 128, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.027259999967554904, "p50": 0.02879100003383428, "p90": 0.030951000042023225, "mean": 0.029224800016436348, "iqr": 0.0029600000175378227, "raw_times": [0.027991000024485402, 0.031131000014283927, 0.02879100003383428, 0.030951000042023225, 0.027259999967554904], "has_warnings": true, "reps": 5, "warmup": 2}, "compile_ms": 0.0323909999906391, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D768", "num_tokens": 256, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.025391000008312403, "p50": 0.02888100004838634, "p90": 0.029160999986288516, "mean": 0.028055000007043418, "iqr": 0.001839999981712026, "raw_times": [0.025391000008312403, 0.02888100004838634, 0.02952099998765334, 0.029160999986288516, 0.02732100000457649], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.031509999985246395, "peak_bytes": 3932160, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D1024", "num_tokens": 256, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.026630000036220736, "p50": 0.027450000004591857, "p90": 0.027921000025799003, "mean": 0.02735460001304091, "iqr": 0.0010800000040944724, "raw_times": [0.026630000036220736, 0.027450000004591857, 0.02684100002170453, 0.027921000025799003, 0.027930999976888415], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03172099997073019, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T256_D2048", "num_tokens": 256, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.025049999976545223, "p50": 0.02733100001250932, "p90": 0.028329999963716546, "mean": 0.02741439998317219, "iqr": 0.0016189999882953998, "raw_times": [0.025049999976545223, 0.029649999987668707, 0.028329999963716546, 0.02733100001250932, 0.026710999975421146], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.029950999987704563, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D768", "num_tokens": 512, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.028341000017917395, "p50": 0.02927099995986282, "p90": 0.029501000028631097, "mean": 0.02909080000108588, "iqr": 0.0009110000291912002, "raw_times": [0.028341000017917395, 0.02927099995986282, 0.029501000028631097, 0.029750999999578198, 0.028589999999439897], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.03009099998507736, "peak_bytes": 7864320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D1024", "num_tokens": 512, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.024770999971224228, "p50": 0.02814099997294761, "p90": 0.028720999978304462, "mean": 0.0278467999919485, "iqr": 0.0007409999511764909, "raw_times": [0.024770999971224228, 0.02798000002712797, 0.028720999978304462, 0.02814099997294761, 0.029621000010138232], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.031990999957542954, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:08Z", "run": "c3623842075144ab92176d6468514bae", "impl": "hf_kernels_swiglu", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_T512_D2048", "num_tokens": 512, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.027751000004627713, "p50": 0.028230999987499672, "p90": 0.029471000004832604, "mean": 0.028608800005258672, "iqr": 0.0016500000015184924, "raw_times": [0.028230999987499672, 0.027751000004627713, 0.02782100000331411, 0.02977000002601926, 0.029471000004832604], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.030850999962694914, "peak_bytes": 20971520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:36Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T128_D768", "num_tokens": 128, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04274000002624234, "p50": 0.043191000031583826, "p90": 0.04467100006877445, "mean": 0.04373860001578578, "iqr": 0.0017300001218245598, "raw_times": [0.04467100006877445, 0.04515000000537839, 0.043191000031583826, 0.04274000002624234, 0.04294099994694989], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04910000006930204, "peak_bytes": 1966080, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:36Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T128_D1024", "num_tokens": 128, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.048549999974056846, "p50": 0.049830999842015444, "p90": 0.05033100001128332, "mean": 0.04977279995728168, "iqr": 0.0006400000529538374, "raw_times": [0.048549999974056846, 0.049690999958329485, 0.05033100001128332, 0.05046100000072329, 0.049830999842015444], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.05312100006449327, "peak_bytes": 2621440, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:36Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T128_D2048", "num_tokens": 128, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.049481000132800546, "p50": 0.04955999997946492, "p90": 0.04985100008525478, "mean": 0.049792600020737154, "iqr": 0.000360000058208243, "raw_times": [0.04955999997946492, 0.050579999879118986, 0.049481000132800546, 0.04985100008525478, 0.04949100002704654], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.052620999895225395, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:36Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T256_D768", "num_tokens": 256, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04747100001623039, "p50": 0.049561000196263194, "p90": 0.04995100016458309, "mean": 0.04936700006510364, "iqr": 0.0008900001375877764, "raw_times": [0.04747100001623039, 0.049561000196263194, 0.0507909999214462, 0.049061000026995316, 0.04995100016458309], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0509510000483715, "peak_bytes": 3932160, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:36Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T256_D1024", "num_tokens": 256, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04721999994217185, "p50": 0.04802100011147559, "p90": 0.048511000159123796, "mean": 0.0482608000311302, "iqr": 0.0008600002274761209, "raw_times": [0.04802100011147559, 0.04721999994217185, 0.048511000159123796, 0.0499010000112321, 0.047650999931647675], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.051911000127802254, "peak_bytes": 5242880, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:37Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T256_D2048", "num_tokens": 256, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04553000007945229, "p50": 0.047661000053267344, "p90": 0.04845100011152681, "mean": 0.049852800020744326, "iqr": 0.0010610001481836662, "raw_times": [0.04553000007945229, 0.04738999996334314, 0.047661000053267344, 0.04845100011152681, 0.06023199989613204], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04891099979431601, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:37Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T512_D768", "num_tokens": 512, "hidden_dim": 768, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.04606099992088275, "p50": 0.04722100015897013, "p90": 0.047730999995110324, "mean": 0.04745279998132901, "iqr": 0.0006210000265127746, "raw_times": [0.04606099992088275, 0.04914099986308429, 0.04722100015897013, 0.047730999995110324, 0.04710999996859755], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.05060099988440925, "peak_bytes": 7864320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:37Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T512_D1024", "num_tokens": 512, "hidden_dim": 1024, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.047480999910476385, "p50": 0.04807099981007923, "p90": 0.04905100013274932, "mean": 0.049742999999580206, "iqr": 0.0014700001429446274, "raw_times": [0.047480999910476385, 0.047580999989804695, 0.04905100013274932, 0.0565310001547914, 0.04807099981007923], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.04896100017504068, "peak_bytes": 10485760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:37Z", "run": "b81f5729b90144f29ef4b2b3f014bb6b", "impl": "torch_eager", "tags": {"family": "hf-kernels", "backend": "eager"}, "wl": {"name": "cuda_T512_D2048", "num_tokens": 512, "hidden_dim": 2048, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.046829999973851955, "p50": 0.04784099996868463, "p90": 0.0479610000638786, "mean": 0.047636799990868894, "iqr": 0.001030000021273736, "raw_times": [0.046829999973851955, 0.048620999905324425, 0.046931000042604865, 0.0479610000638786, 0.04784099996868463], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.05104100000608014, "peak_bytes": 20971520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "swiglu_fp32"}, "err": null} diff --git a/activation/impls/cells/benchmark.py b/activation/impls/cells/benchmark.py index 04f9df27c14acf429b58dba6cf0677c00cbbbced..711af9e01652ef5081b507affd0f7df9ac99e644 100644 --- a/activation/impls/cells/benchmark.py +++ b/activation/impls/cells/benchmark.py @@ -4,7 +4,6 @@ # "numpy", # "torch==2.8.0", # "kernels-benchmark-tools", -# "kernels", # ] # # [tool.uv.sources] @@ -13,22 +12,17 @@ import torch import sys from kernels_benchmark_tools import KernelTypeEnum, run_benchmark -from kernels import get_kernel +import torch, torch.nn.functional as F -# Load the activation kernel -activation = get_kernel("kernels-community/activation") - -def hf_kernels_swiglu(input_tensor): - hidden_dim = input_tensor.shape[-1] // 2 - out_shape = input_tensor.shape[:-1] + (hidden_dim,) - out = torch.empty(out_shape, dtype=input_tensor.dtype, device=input_tensor.device) - return activation.silu_and_mul(out, input_tensor) +def swiglu_eager(x): + d = x.shape[-1] // 2 + return F.silu(x[..., :d]) * x[..., d:] run_benchmark( kernel_type=KernelTypeEnum.ACTIVATION, - impl_name="hf_kernels_swiglu", - impl_tags={"family": "hf-kernels", "backend": "cuda"}, - impl_func=hf_kernels_swiglu, + impl_name="torch_eager", + impl_tags={"family":"hf-kernels", "backend":"eager"}, + impl_func=swiglu_eager, ) \ No newline at end of file diff --git a/activation/impls/hf_kernels_swiglu.html b/activation/impls/hf_kernels_swiglu.html index 812f027418b96fc5dd3cda564134f577079c3349..e201f1058a1af2a3535b3e87d3ed46a2fef7c2dc 100644 --- a/activation/impls/hf_kernels_swiglu.html +++ b/activation/impls/hf_kernels_swiglu.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.26s +Cell: nv | 0.22s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.26s
-
Fri Oct 31 20:00:17 2025       
+
Mon Nov 10 21:58:08 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   33C    P0            108W /  350W |       0MiB /  46068MiB |     88%      Default |
+| N/A   28C    P0             78W /  350W |       0MiB /  46068MiB |     11%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.26s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 4.19s
+Cell: benchmark | 8.29s
  | 
 
 Raw
@@ -4213,17 +3995,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T128_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us     105.055us      2585.65%     105.055us     105.055us             1  
-                                      hf_kernels_swiglu        11.41%     202.714us        99.64%       1.770ms       1.770ms       0.000us         0.00%       5.471us       5.471us             1  
-                      _activation_beeaae6::silu_and_mul         1.18%      21.050us        84.47%       1.501ms     500.190us       4.063us       100.00%       5.471us       1.824us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.063us       100.00%       4.063us       1.354us             3  
-                                Activity Buffer Request        80.70%       1.434ms        80.70%       1.434ms       1.434ms       1.408us        34.65%       1.408us       1.408us             1  
-                                            aten::empty         3.76%      66.772us         3.76%      66.772us      22.257us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         2.58%      45.872us         2.58%      45.872us      15.291us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.36%       6.420us         0.36%       6.420us       6.420us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      81.151us      1892.96%      81.151us      81.151us             1  
+                                      hf_kernels_swiglu         8.90%     185.545us        99.31%       2.071ms       2.071ms       0.000us         0.00%       5.727us       5.727us             1  
+                      _activation_beeaae6::silu_and_mul         0.90%      18.858us        88.30%       1.842ms     613.846us       4.287us       100.00%       5.727us       1.909us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.287us       100.00%       4.287us       1.429us             3  
+                                Activity Buffer Request        85.28%       1.779ms        85.28%       1.779ms       1.779ms       1.440us        33.59%       1.440us       1.440us             1  
+                                            aten::empty         2.11%      44.080us         2.11%      44.080us      14.693us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         2.11%      44.091us         2.11%      44.091us      14.697us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.69%      14.370us         0.69%      14.370us      14.370us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.776ms
-Self CUDA time total: 4.063us
+Self CPU time total: 2.086ms
+Self CUDA time total: 4.287us
 
 
 
@@ -4233,17 +4015,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T128_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      61.119us      1540.69%      61.119us      61.119us             1  
-                                      hf_kernels_swiglu         6.50%     104.811us        99.67%       1.607ms       1.607ms       0.000us         0.00%       5.279us       5.279us             1  
-                      _activation_beeaae6::silu_and_mul         1.26%      20.331us        91.95%       1.482ms     494.073us       3.967us       100.00%       5.279us       1.760us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       3.967us       100.00%       3.967us       1.322us             3  
-                                Activity Buffer Request        89.13%       1.437ms        89.13%       1.437ms       1.437ms       1.312us        33.07%       1.312us       1.312us             1  
-                                            aten::empty         1.22%      19.632us         1.22%      19.632us       6.544us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         1.56%      25.120us         1.56%      25.120us       8.373us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.33%       5.360us         0.33%       5.360us       5.360us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      65.344us      1660.16%      65.344us      65.344us             1  
+                                      hf_kernels_swiglu         4.80%      90.161us        99.69%       1.871ms       1.871ms       0.000us         0.00%       5.280us       5.280us             1  
+                      _activation_beeaae6::silu_and_mul         1.05%      19.620us        93.88%       1.762ms     587.343us       3.936us       100.00%       5.280us       1.760us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       3.936us       100.00%       3.936us       1.312us             3  
+                                Activity Buffer Request        91.30%       1.714ms        91.30%       1.714ms       1.714ms       1.344us        34.15%       1.344us       1.344us             1  
+                                            aten::empty         1.01%      18.871us         1.01%      18.871us       6.290us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         1.53%      28.801us         1.53%      28.801us       9.600us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.31%       5.880us         0.31%       5.880us       5.880us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.612ms
-Self CUDA time total: 3.967us
+Self CPU time total: 1.877ms
+Self CUDA time total: 3.936us
 
 
 
@@ -4253,17 +4035,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T128_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      63.488us      1288.31%      63.488us      63.488us             1  
-                                      hf_kernels_swiglu         6.89%     111.363us        99.67%       1.611ms       1.611ms       0.000us         0.00%       6.592us       6.592us             1  
-                      _activation_beeaae6::silu_and_mul         1.36%      22.028us        91.47%       1.479ms     492.912us       4.928us       100.00%       6.592us       2.197us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.928us       100.00%       4.928us       1.643us             3  
-                                Activity Buffer Request        88.52%       1.431ms        88.52%       1.431ms       1.431ms       1.664us        33.77%       1.664us       1.664us             1  
-                                            aten::empty         1.30%      21.081us         1.30%      21.081us       7.027us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         1.59%      25.652us         1.59%      25.652us       8.551us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.33%       5.390us         0.33%       5.390us       5.390us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      67.967us      1388.21%      67.967us      67.967us             1  
+                                      hf_kernels_swiglu         4.59%      88.711us        99.72%       1.927ms       1.927ms       0.000us         0.00%       6.560us       6.560us             1  
+                      _activation_beeaae6::silu_and_mul         0.94%      18.080us        94.11%       1.819ms     606.193us       4.896us       100.00%       6.560us       2.187us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.896us       100.00%       4.896us       1.632us             3  
+                                Activity Buffer Request        91.80%       1.774ms        91.80%       1.774ms       1.774ms       1.664us        33.99%       1.664us       1.664us             1  
+                                            aten::empty         1.02%      19.730us         1.02%      19.730us       6.577us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         1.37%      26.441us         1.37%      26.441us       8.814us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.28%       5.470us         0.28%       5.470us       5.470us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.617ms
-Self CUDA time total: 4.928us
+Self CPU time total: 1.932ms
+Self CUDA time total: 4.896us
 
 
 
@@ -4273,17 +4055,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T256_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      68.000us      1585.82%      68.000us      68.000us             1  
-                                      hf_kernels_swiglu         5.97%     106.915us        99.70%       1.784ms       1.784ms       0.000us         0.00%       5.760us       5.760us             1  
-                      _activation_beeaae6::silu_and_mul         1.16%      20.770us        92.62%       1.658ms     552.564us       4.288us       100.00%       5.760us       1.920us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.288us       100.00%       4.288us       1.429us             3  
-                                Activity Buffer Request        80.58%       1.442ms        80.58%       1.442ms       1.442ms       1.472us        34.33%       1.472us       1.472us             1  
-                                            aten::empty         1.10%      19.770us         1.10%      19.770us       6.590us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        10.88%     194.785us        10.88%     194.785us      64.928us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.30%       5.350us         0.30%       5.350us       5.350us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      68.448us      1584.08%      68.448us      68.448us             1  
+                                      hf_kernels_swiglu         4.10%      87.981us        99.77%       2.141ms       2.141ms       0.000us         0.00%       5.794us       5.794us             1  
+                      _activation_beeaae6::silu_and_mul         0.89%      19.190us        94.80%       2.034ms     678.097us       4.321us       100.00%       5.794us       1.931us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       4.321us       100.00%       4.321us       1.440us             3  
+                                Activity Buffer Request        83.35%       1.789ms        83.35%       1.789ms       1.789ms       1.473us        34.09%       1.473us       1.473us             1  
+                                            aten::empty         0.87%      18.670us         0.87%      18.670us       6.223us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        10.55%     226.443us        10.55%     226.443us      75.481us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.23%       4.930us         0.23%       4.930us       4.930us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.790ms
-Self CUDA time total: 4.288us
+Self CPU time total: 2.146ms
+Self CUDA time total: 4.321us
 
 
 
@@ -4293,17 +4075,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T256_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      65.599us      1108.28%      65.599us      65.599us             1  
-                                      hf_kernels_swiglu        18.75%      89.073us        98.88%     469.813us     469.813us       0.000us         0.00%       7.903us       7.903us             1  
-                      _activation_beeaae6::silu_and_mul         4.69%      22.280us        76.20%     362.069us     120.690us       5.919us       100.00%       7.903us       2.634us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       5.919us       100.00%       5.919us       1.973us             3  
-                                Activity Buffer Request        38.23%     181.645us        38.23%     181.645us     181.645us       1.984us        33.52%       1.984us       1.984us             1  
-                                            aten::empty         3.93%      18.671us         3.93%      18.671us       6.224us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        33.28%     158.144us        33.28%     158.144us      52.715us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.12%       5.330us         1.12%       5.330us       5.330us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      66.816us      1128.46%      66.816us      66.816us             1  
+                                      hf_kernels_swiglu         4.29%      87.791us        99.73%       2.043ms       2.043ms       0.000us         0.00%       7.906us       7.906us             1  
+                      _activation_beeaae6::silu_and_mul         1.03%      21.101us        94.53%       1.936ms     645.491us       5.921us       100.00%       7.906us       2.635us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       5.921us       100.00%       5.921us       1.974us             3  
+                                Activity Buffer Request        84.88%       1.739ms        84.88%       1.739ms       1.739ms       1.985us        33.52%       1.985us       1.985us             1  
+                                            aten::empty         0.92%      18.779us         0.92%      18.779us       6.260us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         8.62%     176.604us         8.62%     176.604us      58.868us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.27%       5.500us         0.27%       5.500us       5.500us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 475.143us
-Self CUDA time total: 5.919us
+Self CPU time total: 2.049ms
+Self CUDA time total: 5.921us
 
 
 
@@ -4313,17 +4095,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T256_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      70.207us       906.60%      70.207us      70.207us             1  
-                                      hf_kernels_swiglu         6.12%     106.261us        99.74%       1.733ms       1.733ms       0.000us         0.00%      10.336us      10.336us             1  
-                      _activation_beeaae6::silu_and_mul         1.25%      21.782us        92.41%       1.606ms     535.254us       7.744us       100.00%      10.336us       3.445us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us       100.00%       7.744us       2.581us             3  
-                                Activity Buffer Request        82.36%       1.431ms        82.36%       1.431ms       1.431ms       2.592us        33.47%       2.592us       2.592us             1  
-                                            aten::empty         1.21%      21.081us         1.21%      21.081us       7.027us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         8.80%     152.893us         8.80%     152.893us      50.964us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.26%       4.511us         0.26%       4.511us       4.511us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      63.807us       824.06%      63.807us      63.807us             1  
+                                      hf_kernels_swiglu        17.99%      83.441us        98.85%     458.487us     458.487us       0.000us         0.00%      10.335us      10.335us             1  
+                      _activation_beeaae6::silu_and_mul         4.27%      19.820us        76.93%     356.816us     118.939us       7.743us       100.00%      10.335us       3.445us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       7.743us       100.00%       7.743us       2.581us             3  
+                                Activity Buffer Request        37.06%     171.903us        37.06%     171.903us     171.903us       2.592us        33.48%       2.592us       2.592us             1  
+                                            aten::empty         3.93%      18.230us         3.93%      18.230us       6.077us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        35.60%     165.093us        35.60%     165.093us      55.031us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.15%       5.320us         1.15%       5.320us       5.320us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.738ms
-Self CUDA time total: 7.744us
+Self CPU time total: 463.807us
+Self CUDA time total: 7.743us
 
 
 
@@ -4333,17 +4115,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T512_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      69.214us      1045.06%      69.214us      69.214us             1  
-                                      hf_kernels_swiglu         7.00%     122.783us        99.73%       1.750ms       1.750ms       0.000us         0.00%       8.830us       8.830us             1  
-                      _activation_beeaae6::silu_and_mul         1.22%      21.430us        91.58%       1.607ms     535.694us       6.623us       100.00%       8.830us       2.943us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       6.623us       100.00%       6.623us       2.208us             3  
-                                Activity Buffer Request        81.74%       1.434ms        81.74%       1.434ms       1.434ms       2.207us        33.32%       2.207us       2.207us             1  
-                                            aten::empty         1.15%      20.211us         1.15%      20.211us       6.737us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         8.62%     151.304us         8.62%     151.304us      50.435us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.27%       4.780us         0.27%       4.780us       4.780us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      63.231us       959.06%      63.231us      63.231us             1  
+                                      hf_kernels_swiglu        19.32%      83.900us        98.89%     429.436us     429.436us       0.000us         0.00%       8.802us       8.802us             1  
+                      _activation_beeaae6::silu_and_mul         4.57%      19.830us        75.32%     327.085us     109.028us       6.593us       100.00%       8.802us       2.934us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       6.593us       100.00%       6.593us       2.198us             3  
+                                Activity Buffer Request        34.73%     150.793us        34.73%     150.793us     150.793us       2.209us        33.51%       2.209us       2.209us             1  
+                                            aten::empty         4.25%      18.451us         4.25%      18.451us       6.150us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        36.03%     156.462us        36.03%     156.462us      52.154us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.11%       4.800us         1.11%       4.800us       4.800us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.755ms
-Self CUDA time total: 6.623us
+Self CPU time total: 434.236us
+Self CUDA time total: 6.593us
 
 
 
@@ -4353,17 +4135,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T512_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      65.152us       692.52%      65.152us      65.152us             1  
-                                      hf_kernels_swiglu        21.62%      91.474us        98.93%     418.571us     418.571us       0.000us         0.00%      12.576us      12.576us             1  
-                      _activation_beeaae6::silu_and_mul         4.88%      20.631us        69.03%     292.067us      97.356us       9.408us       100.00%      12.576us       4.192us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       9.408us       100.00%       9.408us       3.136us             3  
-                                Activity Buffer Request        28.63%     121.143us        28.63%     121.143us     121.143us       3.168us        33.67%       3.168us       3.168us             1  
-                                            aten::empty         8.28%      35.030us         8.28%      35.030us      11.677us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        35.52%     150.293us        35.52%     150.293us      50.098us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.07%       4.530us         1.07%       4.530us       4.530us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      68.544us       726.10%      68.544us      68.544us             1  
+                                      hf_kernels_swiglu         4.25%      86.402us        99.73%       2.027ms       2.027ms       0.000us         0.00%      12.608us      12.608us             1  
+                      _activation_beeaae6::silu_and_mul         1.00%      20.252us        94.52%       1.921ms     640.494us       9.440us       100.00%      12.608us       4.203us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us       9.440us       100.00%       9.440us       3.147us             3  
+                                Activity Buffer Request        85.77%       1.743ms        85.77%       1.743ms       1.743ms       3.168us        33.56%       3.168us       3.168us             1  
+                                            aten::empty         0.96%      19.489us         0.96%      19.489us       6.496us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.76%     157.752us         7.76%     157.752us      52.584us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.27%       5.440us         0.27%       5.440us       5.440us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 423.101us
-Self CUDA time total: 9.408us
+Self CPU time total: 2.033ms
+Self CUDA time total: 9.440us
 
 
 
@@ -4373,17 +4155,17 @@ PROFILE TRACE: hf_kernels_swiglu | cuda_T512_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      67.197us       514.72%      67.197us      67.197us             1  
-                                      hf_kernels_swiglu        22.39%      97.642us        98.93%     431.481us     431.481us       0.000us         0.00%      17.439us      17.439us             1  
-                      _activation_beeaae6::silu_and_mul         4.99%      21.781us        71.94%     313.789us     104.596us      13.055us       100.00%      17.439us       5.813us             3  
-void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us      13.055us       100.00%      13.055us       4.352us             3  
-                                Activity Buffer Request        32.48%     141.684us        32.48%     141.684us     141.684us       4.384us        33.58%       4.384us       4.384us             1  
-                                            aten::empty         4.60%      20.050us         4.60%      20.050us       6.683us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        34.47%     150.324us        34.47%     150.324us      50.108us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.07%       4.681us         1.07%       4.681us       4.681us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_swiglu         0.00%       0.000us         0.00%       0.000us       0.000us      61.247us       467.96%      61.247us      61.247us             1  
+                                      hf_kernels_swiglu        19.95%      80.811us        98.74%     399.916us     399.916us       0.000us         0.00%      17.504us      17.504us             1  
+                      _activation_beeaae6::silu_and_mul         4.55%      18.440us        74.43%     301.465us     100.488us      13.088us       100.00%      17.504us       5.835us             3  
+void vllm::act_and_mul_kernel<c10::BFloat16, &(c10::...         0.00%       0.000us         0.00%       0.000us       0.000us      13.088us       100.00%      13.088us       4.363us             3  
+                                Activity Buffer Request        32.08%     129.932us        32.08%     129.932us     129.932us       4.416us        33.74%       4.416us       4.416us             1  
+                                            aten::empty         4.36%      17.640us         4.36%      17.640us       5.880us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        37.80%     153.093us        37.80%     153.093us      51.031us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.26%       5.090us         1.26%       5.090us       5.090us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 436.162us
-Self CUDA time total: 13.055us
+Self CPU time total: 405.006us
+Self CUDA time total: 13.088us
 
 
 impl                     wl                  p50(ms)  ok
@@ -4400,12 +4182,12 @@ hf_kernels_swiglu        cuda_T512_D768         0.03  True
 
▶ UV Install Logs
Fetching 7 files: 0%| | 0/7 [00:00<?, ?it/s] -Fetching 7 files: 71%|███████▏ | 5/7 [00:00<00:00, 15.31it/s] -Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 21.41it/s]
+Fetching 7 files: 71%|███████▏ | 5/7 [00:00<00:00, 12.91it/s] +Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 18.06it/s]

Artifacts:

activation.jsonl diff --git a/activation/impls/torch_swiglu.html b/activation/impls/torch_swiglu.html index 41f6e46a2626019e3e97d61016b7b71b844385d6..ff036bdd6c354316137b20ede89daa6792991e8a 100644 --- a/activation/impls/torch_swiglu.html +++ b/activation/impls/torch_swiglu.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.26s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.26s
-
Fri Oct 31 20:00:17 2025       
+
Mon Nov 10 21:58:08 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   33C    P0            108W /  350W |       0MiB /  46068MiB |     88%      Default |
+| N/A   28C    P0             78W /  350W |       0MiB /  46068MiB |     11%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4153,9 +3935,9 @@ Cell: nv | 0.26s
 
 ▼ code 
 ▼ output
- ▶ uv-logs
+ ▶ uv-logs
  | 
-Cell: benchmark | 7.02s
+Cell: benchmark | 3.61s
  | 
 
 Raw
@@ -4205,20 +3987,20 @@ PROFILE TRACE: torch_eager | cuda_T128_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     188.575us      1476.70%     188.575us     188.575us             1  
-                                            torch_eager        11.13%     210.826us        99.56%       1.887ms       1.887ms       0.000us         0.00%      15.106us      15.106us             1  
-                                             aten::silu         3.37%      63.781us        82.44%       1.562ms     520.736us       6.497us        50.88%       8.833us       2.944us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.497us        50.88%       6.497us       2.166us             3  
-                                              aten::mul         1.86%      35.170us         2.95%      55.841us      18.614us       6.273us        49.12%       6.273us       2.091us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.273us        49.12%       6.273us       2.091us             3  
-                                Activity Buffer Request        76.78%       1.455ms        76.78%       1.455ms       1.455ms       2.336us        18.29%       2.336us       2.336us             1  
-                                            aten::slice         2.45%      46.380us         3.05%      57.842us       9.640us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.60%      11.462us         0.60%      11.462us       1.910us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         3.38%      64.112us         3.38%      64.112us      10.685us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.44%       8.280us         0.44%       8.280us       8.280us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     172.065us      1357.73%     172.065us     172.065us             1  
+                                            torch_eager         8.84%     192.611us        99.34%       2.164ms       2.164ms       0.000us         0.00%      14.977us      14.977us             1  
+                                             aten::silu         2.51%      54.611us        85.85%       1.870ms     623.473us       6.496us        51.26%       8.800us       2.933us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.496us        51.26%       6.496us       2.165us             3  
+                                              aten::mul         1.45%      31.541us         2.42%      52.781us      17.594us       6.177us        48.74%       6.177us       2.059us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.177us        48.74%       6.177us       2.059us             3  
+                                Activity Buffer Request        81.30%       1.771ms        81.30%       1.771ms       1.771ms       2.304us        18.18%       2.304us       2.304us             1  
+                                            aten::slice         1.79%      39.021us         2.23%      48.532us       8.089us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.44%       9.511us         0.44%       9.511us       1.585us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         3.01%      65.621us         3.01%      65.621us      10.937us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.66%      14.470us         0.66%      14.470us      14.470us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.895ms
-Self CUDA time total: 12.770us
+Self CPU time total: 2.179ms
+Self CUDA time total: 12.673us
 
 
 
@@ -4228,20 +4010,20 @@ PROFILE TRACE: torch_eager | cuda_T128_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     152.926us      1234.87%     152.926us     152.926us             1  
-                                            torch_eager         6.55%     113.093us        99.67%       1.721ms       1.721ms       0.000us         0.00%      14.560us      14.560us             1  
-                                             aten::silu         2.40%      41.391us        88.69%       1.532ms     510.609us       6.400us        51.68%       8.576us       2.859us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.400us        51.68%       6.400us       2.133us             3  
-                                              aten::mul         1.50%      25.830us         2.63%      45.361us      15.120us       5.984us        48.32%       5.984us       1.995us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.984us        48.32%       5.984us       1.995us             3  
-                                Activity Buffer Request        84.72%       1.463ms        84.72%       1.463ms       1.463ms       2.176us        17.57%       2.176us       2.176us             1  
-                                            aten::slice         1.43%      24.741us         1.80%      31.062us       5.177us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.37%       6.321us         0.37%       6.321us       1.054us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         2.71%      46.721us         2.71%      46.721us       7.787us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.33%       5.741us         0.33%       5.741us       5.741us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     147.935us      1197.66%     147.935us     147.935us             1  
+                                            torch_eager         6.19%     128.671us        99.72%       2.072ms       2.072ms       0.000us         0.00%      14.528us      14.528us             1  
+                                             aten::silu         1.99%      41.241us        90.00%       1.870ms     623.253us       6.432us        52.07%       8.608us       2.869us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.432us        52.07%       6.432us       2.144us             3  
+                                              aten::mul         1.21%      25.191us         2.13%      44.341us      14.780us       5.920us        47.93%       5.920us       1.973us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.920us        47.93%       5.920us       1.973us             3  
+                                Activity Buffer Request        86.71%       1.801ms        86.71%       1.801ms       1.801ms       2.176us        17.62%       2.176us       2.176us             1  
+                                            aten::slice         1.12%      23.301us         1.40%      28.981us       4.830us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.27%       5.680us         0.27%       5.680us       0.947us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         2.23%      46.310us         2.23%      46.310us       7.718us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.28%       5.721us         0.28%       5.721us       5.721us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.727ms
-Self CUDA time total: 12.384us
+Self CPU time total: 2.077ms
+Self CUDA time total: 12.352us
 
 
 
@@ -4251,20 +4033,20 @@ PROFILE TRACE: torch_eager | cuda_T128_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     152.413us      1147.86%     152.413us     152.413us             1  
-                                            torch_eager         6.17%     105.134us        99.68%       1.699ms       1.699ms       0.000us         0.00%      15.581us      15.581us             1  
-                                             aten::silu         2.58%      43.990us        88.96%       1.517ms     505.533us       6.814us        51.32%       9.117us       3.039us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.814us        51.32%       6.814us       2.271us             3  
-                                              aten::mul         1.63%      27.711us         2.72%      46.371us      15.457us       6.464us        48.68%       6.464us       2.155us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.464us        48.68%       6.464us       2.155us             3  
-                                Activity Buffer Request        84.84%       1.446ms        84.84%       1.446ms       1.446ms       2.303us        17.34%       2.303us       2.303us             1  
-                                            aten::slice         1.47%      24.990us         1.83%      31.250us       5.208us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.37%       6.260us         0.37%       6.260us       1.043us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         2.63%      44.871us         2.63%      44.871us       7.478us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.32%       5.431us         0.32%       5.431us       5.431us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     147.135us      1116.10%     147.135us     147.135us             1  
+                                            torch_eager         6.76%     134.342us        99.73%       1.980ms       1.980ms       0.000us         0.00%      15.455us      15.455us             1  
+                                             aten::silu         1.89%      37.461us        89.35%       1.774ms     591.479us       6.784us        51.46%       9.056us       3.019us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.784us        51.46%       6.784us       2.261us             3  
+                                              aten::mul         1.28%      25.422us         2.19%      43.411us      14.470us       6.399us        48.54%       6.399us       2.133us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.399us        48.54%       6.399us       2.133us             3  
+                                Activity Buffer Request        86.14%       1.711ms        86.14%       1.711ms       1.711ms       2.272us        17.23%       2.272us       2.272us             1  
+                                            aten::slice         1.16%      23.079us         1.42%      28.280us       4.713us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.26%       5.201us         0.26%       5.201us       0.867us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         2.23%      44.359us         2.23%      44.359us       7.393us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.27%       5.441us         0.27%       5.441us       5.441us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.705ms
-Self CUDA time total: 13.278us
+Self CPU time total: 1.986ms
+Self CUDA time total: 13.183us
 
 
 
@@ -4274,20 +4056,20 @@ PROFILE TRACE: torch_eager | cuda_T256_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     155.359us      1219.84%     155.359us     155.359us             1  
-                                            torch_eager         6.31%     109.593us        99.71%       1.733ms       1.733ms       0.000us         0.00%      14.944us      14.944us             1  
-                                             aten::silu         2.48%      43.021us        88.93%       1.545ms     515.160us       6.560us        51.51%       8.768us       2.923us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.560us        51.51%       6.560us       2.187us             3  
-                                              aten::mul         1.62%      28.091us         2.66%      46.261us      15.420us       6.176us        48.49%       6.176us       2.059us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     152.034us      1193.64%     152.034us     152.034us             1  
+                                            torch_eager         5.57%     123.804us        99.78%       2.219ms       2.219ms       0.000us         0.00%      14.945us      14.945us             1  
+                                             aten::silu         1.71%      38.060us        90.80%       2.019ms     672.957us       6.561us        51.51%       8.769us       2.923us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.561us        51.51%       6.561us       2.187us             3  
+                                              aten::mul         1.26%      28.020us         2.11%      46.890us      15.630us       6.176us        48.49%       6.176us       2.059us             3  
 void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.176us        48.49%       6.176us       2.059us             3  
-                                Activity Buffer Request        74.70%       1.298ms        74.70%       1.298ms       1.298ms       2.208us        17.34%       2.208us       2.208us             1  
-                                            aten::slice         1.46%      25.370us         1.82%      31.631us       5.272us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.36%       6.261us         0.36%       6.261us       1.043us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        12.80%     222.405us        12.80%     222.405us      37.068us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.29%       4.960us         0.29%       4.960us       4.960us       0.000us         0.00%       0.000us       0.000us             1  
+                                Activity Buffer Request        81.46%       1.811ms        81.46%       1.811ms       1.811ms       2.208us        17.34%       2.208us       2.208us             1  
+                                            aten::slice         1.06%      23.629us         1.31%      29.120us       4.853us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.25%       5.491us         0.25%       5.491us       0.915us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.48%     188.472us         8.48%     188.472us      31.412us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.22%       4.841us         0.22%       4.841us       4.841us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.738ms
-Self CUDA time total: 12.736us
+Self CPU time total: 2.224ms
+Self CUDA time total: 12.737us
 
 
 
@@ -4297,20 +4079,20 @@ PROFILE TRACE: torch_eager | cuda_T256_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     153.122us      1152.94%     153.122us     153.122us             1  
-                                            torch_eager         5.95%     108.905us        99.72%       1.827ms       1.827ms       0.000us         0.00%      15.585us      15.585us             1  
-                                             aten::silu         2.26%      41.441us        89.57%       1.641ms     546.874us       6.816us        51.32%       9.120us       3.040us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.816us        51.32%       6.816us       2.272us             3  
-                                              aten::mul         1.45%      26.581us         2.47%      45.261us      15.087us       6.465us        48.68%       6.465us       2.155us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.465us        48.68%       6.465us       2.155us             3  
-                                Activity Buffer Request        78.54%       1.439ms        78.54%       1.439ms       1.439ms       2.304us        17.35%       2.304us       2.304us             1  
-                                            aten::slice         1.41%      25.869us         1.74%      31.870us       5.312us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.33%       6.001us         0.33%       6.001us       1.000us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         9.78%     179.164us         9.78%     179.164us      29.861us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.28%       5.090us         0.28%       5.090us       5.090us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     152.353us      1152.70%     152.353us     152.353us             1  
+                                            torch_eager         6.19%     135.991us        99.76%       2.192ms       2.192ms       0.000us         0.00%      15.489us      15.489us             1  
+                                             aten::silu         1.77%      38.889us        90.16%       1.981ms     660.320us       6.752us        51.09%       9.024us       3.008us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.752us        51.09%       6.752us       2.251us             3  
+                                              aten::mul         1.20%      26.341us         2.10%      46.211us      15.404us       6.465us        48.91%       6.465us       2.155us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.465us        48.91%       6.465us       2.155us             3  
+                                Activity Buffer Request        80.60%       1.771ms        80.60%       1.771ms       1.771ms       2.272us        17.19%       2.272us       2.272us             1  
+                                            aten::slice         1.06%      23.362us         1.31%      28.762us       4.794us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.25%       5.400us         0.25%       5.400us       0.900us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.70%     191.103us         8.70%     191.103us      31.851us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.24%       5.211us         0.24%       5.211us       5.211us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.832ms
-Self CUDA time total: 13.281us
+Self CPU time total: 2.197ms
+Self CUDA time total: 13.217us
 
 
 
@@ -4320,20 +4102,20 @@ PROFILE TRACE: torch_eager | cuda_T256_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     150.877us       970.08%     150.877us     150.877us             1  
-                                            torch_eager        20.61%     104.763us        99.03%     503.283us     503.283us       0.000us         0.00%      18.241us      18.241us             1  
-                                             aten::silu         8.60%      43.701us        63.19%     321.148us     107.049us       7.969us        51.24%      10.657us       3.552us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.969us        51.24%       7.969us       2.656us             3  
-                                              aten::mul         5.45%      27.720us         8.99%      45.690us      15.230us       7.584us        48.76%       7.584us       2.528us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.584us        48.76%       7.584us       2.528us             3  
-                                Activity Buffer Request        24.24%     123.213us        24.24%     123.213us     123.213us       2.688us        17.28%       2.688us       2.688us             1  
-                                            aten::slice         5.04%      25.603us         6.23%      31.682us       5.280us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         1.20%       6.079us         1.20%       6.079us       1.013us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        33.88%     172.204us        33.88%     172.204us      28.701us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.97%       4.940us         0.97%       4.940us       4.940us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     153.216us       991.30%     153.216us     153.216us             1  
+                                            torch_eager         5.88%     135.461us        99.78%       2.300ms       2.300ms       0.000us         0.00%      18.144us      18.144us             1  
+                                             aten::silu         1.72%      39.670us        90.62%       2.089ms     696.338us       7.936us        51.35%      10.624us       3.541us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.936us        51.35%       7.936us       2.645us             3  
+                                              aten::mul         1.19%      27.391us         2.02%      46.461us      15.487us       7.520us        48.65%       7.520us       2.507us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.520us        48.65%       7.520us       2.507us             3  
+                                Activity Buffer Request        81.58%       1.881ms        81.58%       1.881ms       1.881ms       2.688us        17.39%       2.688us       2.688us             1  
+                                            aten::slice         1.04%      24.071us         1.27%      29.261us       4.877us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.23%       5.190us         0.23%       5.190us       0.865us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.15%     187.833us         8.15%     187.833us      31.305us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.22%       5.060us         0.22%       5.060us       5.060us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 508.223us
-Self CUDA time total: 15.553us
+Self CPU time total: 2.305ms
+Self CUDA time total: 15.456us
 
 
 
@@ -4343,19 +4125,19 @@ PROFILE TRACE: torch_eager | cuda_T512_D768
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     156.541us      1089.44%     156.541us     156.541us             1  
-                                            torch_eager         6.81%     125.673us        99.72%       1.840ms       1.840ms       0.000us         0.00%      16.866us      16.866us             1  
-                                             aten::silu         2.28%      42.101us        88.57%       1.634ms     544.654us       7.361us        51.23%       9.858us       3.286us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     148.287us      1031.99%     148.287us     148.287us             1  
+                                            torch_eager         4.89%     105.043us        99.76%       2.144ms       2.144ms       0.000us         0.00%      16.833us      16.833us             1  
+                                             aten::silu         1.85%      39.730us        91.47%       1.966ms     655.253us       7.361us        51.23%       9.825us       3.275us             3  
 void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.361us        51.23%       7.361us       2.454us             3  
-                                              aten::mul         1.53%      28.200us         2.53%      46.622us      15.541us       7.008us        48.77%       7.008us       2.336us             3  
+                                              aten::mul         1.23%      26.350us         2.09%      44.980us      14.993us       7.008us        48.77%       7.008us       2.336us             3  
 void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.008us        48.77%       7.008us       2.336us             3  
-                                Activity Buffer Request        77.96%       1.438ms        77.96%       1.438ms       1.438ms       2.497us        17.38%       2.497us       2.497us             1  
-                                            aten::slice         1.46%      26.979us         1.81%      33.310us       5.552us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.34%       6.331us         0.34%       6.331us       1.055us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         9.33%     172.076us         9.33%     172.076us      28.679us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.28%       5.210us         0.28%       5.210us       5.210us       0.000us         0.00%       0.000us       0.000us             1  
+                                Activity Buffer Request        81.83%       1.759ms        81.83%       1.759ms       1.759ms       2.464us        17.15%       2.464us       2.464us             1  
+                                            aten::slice         1.07%      23.090us         1.31%      28.260us       4.710us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.24%       5.170us         0.24%       5.170us       0.862us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.65%     185.993us         8.65%     185.993us      30.999us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.24%       5.111us         0.24%       5.111us       5.111us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.845ms
+Self CPU time total: 2.149ms
 Self CUDA time total: 14.369us
 
 
@@ -4366,20 +4148,20 @@ PROFILE TRACE: torch_eager | cuda_T512_D1024
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     149.754us       962.92%     149.754us     149.754us             1  
-                                            torch_eager        21.77%     106.163us        98.85%     481.952us     481.952us       0.000us         0.00%      18.240us      18.240us             1  
-                                             aten::silu         8.65%      42.151us        61.90%     301.788us     100.596us       7.968us        51.23%      10.656us       3.552us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.968us        51.23%       7.968us       2.656us             3  
-                                              aten::mul         5.09%      24.801us         8.77%      42.752us      14.251us       7.584us        48.77%       7.584us       2.528us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.584us        48.77%       7.584us       2.528us             3  
-                                Activity Buffer Request        21.73%     105.953us        21.73%     105.953us     105.953us       2.688us        17.28%       2.688us       2.688us             1  
-                                            aten::slice         5.14%      25.050us         6.41%      31.249us       5.208us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         1.27%       6.199us         1.27%       6.199us       1.033us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        35.20%     171.635us        35.20%     171.635us      28.606us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         1.15%       5.600us         1.15%       5.600us       5.600us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     152.095us       983.92%     152.095us     152.095us             1  
+                                            torch_eager        10.87%     257.253us        99.76%       2.361ms       2.361ms       0.000us         0.00%      18.146us      18.146us             1  
+                                             aten::silu         1.67%      39.540us        85.73%       2.029ms     676.344us       7.905us        51.14%      10.593us       3.531us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.905us        51.14%       7.905us       2.635us             3  
+                                              aten::mul         1.20%      28.421us         1.97%      46.561us      15.520us       7.553us        48.86%       7.553us       2.518us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.553us        48.86%       7.553us       2.518us             3  
+                                Activity Buffer Request        76.39%       1.808ms        76.39%       1.808ms       1.808ms       2.688us        17.39%       2.688us       2.688us             1  
+                                            aten::slice         0.98%      23.079us         1.19%      28.100us       4.683us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.21%       5.021us         0.21%       5.021us       0.837us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.43%     199.594us         8.43%     199.594us      33.266us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.24%       5.780us         0.24%       5.780us       5.780us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 487.552us
-Self CUDA time total: 15.552us
+Self CPU time total: 2.367ms
+Self CUDA time total: 15.458us
 
 
 
@@ -4389,20 +4171,20 @@ PROFILE TRACE: torch_eager | cuda_T512_D2048
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     187.357us       834.00%     187.357us     187.357us             1  
-                                            torch_eager         6.93%     128.860us        99.74%       1.856ms       1.856ms       0.000us         0.00%      26.369us      26.369us             1  
-                                             aten::silu         2.32%      43.123us        88.23%       1.642ms     547.175us      11.616us        51.71%      15.520us       5.173us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      11.616us        51.71%      11.616us       3.872us             3  
-                                              aten::mul         1.63%      30.312us         2.74%      50.922us      16.974us      10.849us        48.29%      10.849us       3.616us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.849us        48.29%      10.849us       3.616us             3  
-                                Activity Buffer Request        77.79%       1.447ms        77.79%       1.447ms       1.447ms       3.904us        17.38%       3.904us       3.904us             1  
-                                            aten::slice         1.49%      27.691us         1.84%      34.251us       5.708us       0.000us         0.00%       0.000us       0.000us             6  
-                                       aten::as_strided         0.35%       6.560us         0.35%       6.560us       1.093us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         9.23%     171.734us         9.23%     171.734us      28.622us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.26%       4.930us         0.26%       4.930us       4.930us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     145.313us       647.79%     145.313us     145.313us             1  
+                                            torch_eager        16.48%      98.469us        99.14%     592.319us     592.319us       0.000us         0.00%      26.336us      26.336us             1  
+                                             aten::silu         6.71%      40.110us        70.79%     422.906us     140.969us      11.520us        51.36%      15.424us       5.141us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      11.520us        51.36%      11.520us       3.840us             3  
+                                              aten::mul         4.29%      25.642us         7.38%      44.092us      14.697us      10.912us        48.64%      10.912us       3.637us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.912us        48.64%      10.912us       3.637us             3  
+                                Activity Buffer Request        36.05%     215.374us        36.05%     215.374us     215.374us       3.904us        17.40%       3.904us       3.904us             1  
+                                            aten::slice         3.67%      21.912us         4.49%      26.852us       4.475us       0.000us         0.00%       0.000us       0.000us             6  
+                                       aten::as_strided         0.83%       4.940us         0.83%       4.940us       0.823us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        31.11%     185.872us        31.11%     185.872us      30.979us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.86%       5.130us         0.86%       5.130us       5.130us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.860ms
-Self CUDA time total: 22.465us
+Self CPU time total: 597.449us
+Self CUDA time total: 22.432us
 
 
 impl                     wl                  p50(ms)  ok
@@ -4416,12 +4198,6 @@ torch_eager              cuda_T512_D1024        0.05  True
 torch_eager              cuda_T512_D2048        0.05  True
 torch_eager              cuda_T512_D768         0.05  True
 
-
-
▶ UV Install Logs
- -

Artifacts:

activation.jsonl diff --git a/activation/results/artifacts/combine/latency.svg b/activation/results/artifacts/combine/latency.svg index c90094a9212ed4b3ea466620aa29c029e98de04f..19ece38766cec489de50d8370af88b3cf43790f1 100644 --- a/activation/results/artifacts/combine/latency.svg +++ b/activation/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:085b4a64bddea2955d6d074836121ec2e120fb1ca9140f3ccb75e8358e4526b3 -size 20644 +oid sha256:e3876e00c4cce265206e6202ea531c8de65cca8fefa80010473a6b76e6f54cc4 +size 20642 diff --git a/activation/results/combined_results.html b/activation/results/combined_results.html index aefcf7c048ef413bda722db3be44aa8b9b9cef43..2f6bca83651184e430ebcca39d73090005e897c2 100644 --- a/activation/results/combined_results.html +++ b/activation/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:01.265668 + 2025-11-10T22:12:14.776732 image/svg+xml @@ -4256,83 +4038,83 @@ body[data-tool="eraser"] .main-content { - + - + - 0.025 + 0.025 - + - + - 0.030 + 0.030 - + - + - 0.035 + 0.035 - + - + - 0.040 + 0.040 - + - + - 0.045 + 0.045 - + - + - 0.050 + 0.050 @@ -4340,37 +4122,37 @@ body[data-tool="eraser"] .main-content { - + - - - - - - - - + + + + + + + + - + - + - - - - - - - + + + + + + + @@ -4428,7 +4210,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.32s +Cell: combine | 4.55s | Raw @@ -4554,7 +4336,7 @@ Implementations included:
▶ UV Install Logs
@@ -4567,7 +4349,7 @@ Installed 37 packages in 213ms - 2025-10-31T20:14:01.265668 + 2025-11-10T22:12:14.776732 image/svg+xml @@ -4716,83 +4498,83 @@ Installed 37 packages in 213ms - + - + - 0.025 + 0.025 - + - + - 0.030 + 0.030 - + - + - 0.035 + 0.035 - + - + - 0.040 + 0.040 - + - + - 0.045 + 0.045 - + - + - 0.050 + 0.050 @@ -4800,37 +4582,37 @@ Installed 37 packages in 213ms - + - - - - - - - - + + + + + + + + - + - + - - - - - - - + + + + + + + diff --git a/causal_conv1d/impls/artifacts/benchmark/causal_conv1d.jsonl b/causal_conv1d/impls/artifacts/benchmark/causal_conv1d.jsonl index 7bfddcfb2c66ba429fccc98758725309b85f6780..689a2bc0ebe8e5aec5d6e1bf4b2e7df78242d051 100644 --- a/causal_conv1d/impls/artifacts/benchmark/causal_conv1d.jsonl +++ b/causal_conv1d/impls/artifacts/benchmark/causal_conv1d.jsonl @@ -1,24 +1,24 @@ -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W2", "batch": 2, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.06906199996592477, "p50": 0.07093199997143529, "p90": 0.07169200000589626, "mean": 0.07107379998387842, "iqr": 0.0011000000199601345, "raw_times": [0.07093199997143529, 0.07309099999019963, 0.07059199998593613, 0.07169200000589626, 0.06906199996592477], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.07642200000645971, "peak_bytes": 295936, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W4", "batch": 2, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08730199999718025, "p50": 0.08879199998546028, "p90": 0.08886199998414668, "mean": 0.0890762000040013, "iqr": 0.00037899997096246807, "raw_times": [0.08730199999718025, 0.08879199998546028, 0.08848300001318421, 0.08886199998414668, 0.09194200004003505], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.091862999965997, "peak_bytes": 296448, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W2", "batch": 2, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08465199999818651, "p50": 0.08821300002637145, "p90": 0.08871199997884105, "mean": 0.08770840000806857, "iqr": 0.0007599999776175537, "raw_times": [0.08465199999818651, 0.0879520000012235, 0.08821300002637145, 0.08901300003572032, 0.08871199997884105], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09156300001222917, "peak_bytes": 1180672, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W4", "batch": 2, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08501199999955134, "p50": 0.08710200000905388, "p90": 0.08719199996676252, "mean": 0.08665020000080403, "iqr": 0.001349999934063817, "raw_times": [0.08501199999955134, 0.08710200000905388, 0.08719199996676252, 0.0858420000326987, 0.08810299999595372], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09103200000026845, "peak_bytes": 1181184, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W2", "batch": 2, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08575200001814665, "p50": 0.08690200002092752, "p90": 0.08706200003416598, "mean": 0.08684220001669019, "iqr": 0.00029900002118665725, "raw_times": [0.08773199999723147, 0.08676300001297932, 0.08690200002092752, 0.08706200003416598, 0.08575200001814665], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09036199998035954, "peak_bytes": 4719616, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W4", "batch": 2, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08490200002597703, "p50": 0.08731200000511308, "p90": 0.0877829999694768, "mean": 0.08806820000017979, "iqr": 0.001451000002816727, "raw_times": [0.09401200003367194, 0.08731200000511308, 0.08633199996666008, 0.08490200002597703, 0.0877829999694768], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0907329999790818, "peak_bytes": 4720128, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W2", "batch": 2, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0847820000444699, "p50": 0.08513199998105847, "p90": 0.08660200001031626, "mean": 0.08566600000676772, "iqr": 0.0016600000094513234, "raw_times": [0.08494200000086494, 0.0847820000444699, 0.08687199999712902, 0.08660200001031626, 0.08513199998105847], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0911219999579771, "peak_bytes": 9461760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W4", "batch": 2, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08356199998615921, "p50": 0.0846430000365217, "p90": 0.08576199996923606, "mean": 0.08508039999242101, "iqr": 0.0011189999895577785, "raw_times": [0.08356199998615921, 0.0867919999905098, 0.08464299997967828, 0.08576199996923606, 0.0846430000365217], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08955300000934585, "peak_bytes": 9478144, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W2", "batch": 2, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08469199997307442, "p50": 0.08614199998646654, "p90": 0.08723299998791845, "mean": 0.08654439999418173, "iqr": 0.0011309999763398082, "raw_times": [0.08469199997307442, 0.08610200001157864, 0.08614199998646654, 0.08855300001187061, 0.08723299998791845], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09115300002804361, "peak_bytes": 37773312, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W4", "batch": 2, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08576300001550408, "p50": 0.08703200001036748, "p90": 0.08823299998539369, "mean": 0.09075460000076419, "iqr": 0.0015310000094359566, "raw_times": [0.10604300001659794, 0.08823299998539369, 0.08703200001036748, 0.08670199997595773, 0.08576300001550408], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08985199997368909, "peak_bytes": 37789696, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W2", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.14525299997103502, "p50": 0.1457439999512644, "p90": 0.1459139999724357, "mean": 0.1457395999750588, "iqr": 0.00044099999740865314, "raw_times": [0.14525299997103502, 0.14547299997502705, 0.1457439999512644, 0.14631400000553185, 0.1459139999724357], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.1472430000148961, "peak_bytes": 151019520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W4", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.16037399996093882, "p50": 0.16231400002197915, "p90": 0.16309400001546237, "mean": 0.1622881999992387, "iqr": 0.0012190000120426703, "raw_times": [0.16309400001546237, 0.16231400002197915, 0.16378399999439353, 0.1618750000034197, 0.16037399996093882], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.16341399998509587, "peak_bytes": 151035904, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W2", "batch": 4, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08445299999948475, "p50": 0.08518200002072263, "p90": 0.08666200000106983, "mean": 0.08572240001285536, "iqr": 0.0017899999988912896, "raw_times": [0.08445299999948475, 0.08744300004082106, 0.08518200002072263, 0.08666200000106983, 0.08487200000217854], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0890119999894523, "peak_bytes": 33727488, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W4", "batch": 4, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08437200000344092, "p50": 0.08463200003916427, "p90": 0.08609200000364581, "mean": 0.08522400000856578, "iqr": 0.0015900000107649248, "raw_times": [0.08463200003916427, 0.08609200000364581, 0.08652200000369703, 0.08437200000344092, 0.08450199999288088], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08977199996706986, "peak_bytes": 591360, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W2", "batch": 4, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08375199996635274, "p50": 0.08519199997181204, "p90": 0.08627200003274993, "mean": 0.08607399998936671, "iqr": 0.0020100000597267353, "raw_times": [0.08375199996635274, 0.0842619999730232, 0.08627200003274993, 0.08519199997181204, 0.09089200000289566], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08821199998010343, "peak_bytes": 2360320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W4", "batch": 4, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08470200003785067, "p50": 0.08566200000359458, "p90": 0.08573299999170558, "mean": 0.08566220001284819, "iqr": 0.0006109999617365247, "raw_times": [0.08470200003785067, 0.08709200000112105, 0.08512200002996906, 0.08566200000359458, 0.08573299999170558], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08864200003699807, "peak_bytes": 2360832, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W2", "batch": 4, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08451200000081371, "p50": 0.08525300000883362, "p90": 0.08580199994412396, "mean": 0.08525219999455658, "iqr": 0.0009299999419454252, "raw_times": [0.08580199994412396, 0.08525300000883362, 0.08451200000081371, 0.08487200000217854, 0.08582200001683304], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08942300001990588, "peak_bytes": 9438208, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W4", "batch": 4, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08533199996918484, "p50": 0.08693199998788259, "p90": 0.09015199998430035, "mean": 0.08883799998784525, "iqr": 0.0043200000163778896, "raw_times": [0.08533199996918484, 0.09015199998430035, 0.08583199996792246, 0.08693199998788259, 0.09594200002993603], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09176200001093093, "peak_bytes": 9438720, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W2", "batch": 4, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08384200003774822, "p50": 0.08611200001951147, "p90": 0.08663199997727133, "mean": 0.08570400000280642, "iqr": 0.001730000008137722, "raw_times": [0.08384200003774822, 0.08611200001951147, 0.08703200001036748, 0.08663199997727133, 0.08490199996913361], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08941200002254845, "peak_bytes": 18931712, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W4", "batch": 4, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08507300003657292, "p50": 0.0865819999944506, "p90": 0.08741199997075455, "mean": 0.09195439998848087, "iqr": 0.0020300000187489786, "raw_times": [0.11532299998862072, 0.0865819999944506, 0.08741199997075455, 0.08538199995200557, 0.08507300003657292], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08733200002097874, "peak_bytes": 18948096, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W2", "batch": 4, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09419299999535724, "p50": 0.09539199999153425, "p90": 0.09730299996135727, "mean": 0.09678459998667677, "iqr": 0.002380999944762152, "raw_times": [0.10211299996853995, 0.09730299996135727, 0.09492200001659512, 0.09539199999153425, 0.09419299999535724], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09651299995994123, "peak_bytes": 75522048, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W4", "batch": 4, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.10080199996309602, "p50": 0.10192199999892182, "p90": 0.1026219999857858, "mean": 0.10294419998899684, "iqr": 0.0008999999749903509, "raw_times": [0.10765299998638511, 0.10172200001079545, 0.1026219999857858, 0.10192199999892182, 0.10080199996309602], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.10299199999508346, "peak_bytes": 75538432, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W2", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.4861929999719905, "p50": 0.4890019999947981, "p90": 0.48961200002395344, "mean": 0.48862639999924795, "iqr": 0.001079000014669873, "raw_times": [0.48979199999621414, 0.4861929999719905, 0.48961200002395344, 0.4890019999947981, 0.48853300000928357], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.48705300002893637, "peak_bytes": 302014464, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} -{"ts": "2025-10-31T20:01:11Z", "run": "a7ca8117e1294b1ba730e0240038ddbc", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W4", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.49736299996538946, "p50": 0.49848299994437184, "p90": 0.49918199999865465, "mean": 0.4987367999774506, "iqr": 0.0007590000450363732, "raw_times": [0.4984229999536183, 0.49848299994437184, 0.49918199999865465, 0.5002330000252186, 0.49736299996538946], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.4985730000157673, "peak_bytes": 302030848, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W2", "batch": 2, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0682910000477932, "p50": 0.0693509999791786, "p90": 0.06985099997791622, "mean": 0.0695532000008825, "iqr": 0.0006490000146186503, "raw_times": [0.0710710000362269, 0.06985099997791622, 0.0693509999791786, 0.0682910000477932, 0.06920199996329757], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.07755100000395032, "peak_bytes": 295936, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S128_W4", "batch": 2, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08078100000830091, "p50": 0.08191099999521612, "p90": 0.08361100003639876, "mean": 0.08277140000245709, "iqr": 0.0022690000491820683, "raw_times": [0.08191099999521612, 0.08361100003639876, 0.08078100000830091, 0.0813419999872167, 0.08621199998515294], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08947200001330202, "peak_bytes": 296448, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W2", "batch": 2, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07914100001471525, "p50": 0.0805310000373538, "p90": 0.08094200001096397, "mean": 0.08066520001648314, "iqr": 0.0007310000000870787, "raw_times": [0.0805310000373538, 0.0825010000085058, 0.08094200001096397, 0.08021100001087689, 0.07914100001471525], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0867210000023988, "peak_bytes": 1180672, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S512_W4", "batch": 2, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08032099998445119, "p50": 0.08104099998718084, "p90": 0.08252100002437146, "mean": 0.08145320000494394, "iqr": 0.0017790000015338592, "raw_times": [0.08104099998718084, 0.0826410000058786, 0.08252100002437146, 0.0807420000228376, 0.08032099998445119], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08690100003150292, "peak_bytes": 1181184, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W2", "batch": 2, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08014200000161509, "p50": 0.08045200002015918, "p90": 0.08049199999504708, "mean": 0.08065180001040062, "iqr": 9.099994713324122e-05, "raw_times": [0.08049199999504708, 0.08177199998726792, 0.08045200002015918, 0.08014200000161509, 0.08040100004791384], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08574200001021381, "peak_bytes": 4719616, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D64_S2048_W4", "batch": 2, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08046099998182399, "p50": 0.08075099998450241, "p90": 0.08139099998061283, "mean": 0.0811031999774059, "iqr": 0.0006690000304843124, "raw_times": [0.08075099998450241, 0.08046099998182399, 0.08072199995012852, 0.08139099998061283, 0.08219099998996171], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08660099996404824, "peak_bytes": 4720128, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W2", "batch": 2, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07871100001466402, "p50": 0.0798610000174449, "p90": 0.08112200004006809, "mean": 0.07994760002247858, "iqr": 0.0023900000201138027, "raw_times": [0.07873200001995428, 0.07871100001466402, 0.08112200004006809, 0.08131200002026162, 0.0798610000174449], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0850010000021939, "peak_bytes": 9461760, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S128_W4", "batch": 2, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08012099999632483, "p50": 0.08051100002148814, "p90": 0.08112199998322467, "mean": 0.08078719999957684, "iqr": 0.0007209999921542476, "raw_times": [0.08051100002148814, 0.08040099999107042, 0.08012099999632483, 0.08112199998322467, 0.08178100000577615], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08605099998248988, "peak_bytes": 9478144, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W2", "batch": 2, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07783099999869592, "p50": 0.07915100002264808, "p90": 0.07922200001075907, "mean": 0.0788234000083321, "iqr": 0.0006800000278417428, "raw_times": [0.07915100002264808, 0.0793710000266401, 0.07854199998291733, 0.07922200001075907, 0.07783099999869592], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08282100003498272, "peak_bytes": 37773312, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S512_W4", "batch": 2, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07959199996321331, "p50": 0.08125099998324004, "p90": 0.08163100000047052, "mean": 0.08102919999828373, "iqr": 0.0016999999843392288, "raw_times": [0.08163100000047052, 0.08274100002836349, 0.07959199996321331, 0.07993100001613129, 0.08125099998324004], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08671099999446596, "peak_bytes": 37789696, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W2", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.15852199999244476, "p50": 0.15985300001375435, "p90": 0.15988199999128483, "mean": 0.15945239999837213, "iqr": 0.0009890000001178123, "raw_times": [0.15988199999128483, 0.15985300001375435, 0.1601120000032097, 0.15889299999116702, 0.15852199999244476], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.16028200002438098, "peak_bytes": 151019520, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_D2048_S2048_W4", "batch": 2, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.16347299998642484, "p50": 0.1640229999679832, "p90": 0.16425199999048345, "mean": 0.1702108000017688, "iqr": 0.00036999995245423634, "raw_times": [0.16388200003802922, 0.1954240000259233, 0.16347299998642484, 0.1640229999679832, 0.16425199999048345], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.1656729999695017, "peak_bytes": 151035904, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W2", "batch": 4, "dim": 64, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07862100000011196, "p50": 0.07903099998429752, "p90": 0.08047100004660024, "mean": 0.08115100000622988, "iqr": 0.001730000064981141, "raw_times": [0.07862100000011196, 0.0787409999816191, 0.08047100004660024, 0.08889100001852057, 0.07903099998429752], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08358100001260027, "peak_bytes": 33727488, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S128_W4", "batch": 4, "dim": 64, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08011200003466001, "p50": 0.0809910000043601, "p90": 0.08145100002820982, "mean": 0.0812654000128532, "iqr": 0.0010090000159834744, "raw_times": [0.08044200001222634, 0.08333099998480975, 0.08145100002820982, 0.0809910000043601, 0.08011200003466001], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08477100004711247, "peak_bytes": 591360, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W2", "batch": 4, "dim": 64, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07822099996701581, "p50": 0.07905100000016319, "p90": 0.07909100003189451, "mean": 0.07904699999699005, "iqr": 0.00011000003041772288, "raw_times": [0.07822099996701581, 0.07989099998439997, 0.07898100000147679, 0.07905100000016319, 0.07909100003189451], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08219099998996171, "peak_bytes": 2360320, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S512_W4", "batch": 4, "dim": 64, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07998199998837663, "p50": 0.08066199995937495, "p90": 0.08123099996737437, "mean": 0.08075179998741078, "iqr": 0.0008489999459015962, "raw_times": [0.07998199998837663, 0.08150200000045515, 0.08123099996737437, 0.08066199995937495, 0.08038200002147278], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08594100000891558, "peak_bytes": 2360832, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W2", "batch": 4, "dim": 64, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07915100002264808, "p50": 0.0798309999936464, "p90": 0.07988099997646714, "mean": 0.07979119999390605, "iqr": 0.0006789999815737247, "raw_times": [0.07915100002264808, 0.08089099998187521, 0.07988099997646714, 0.07920199999489341, 0.0798309999936464], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08425199996509036, "peak_bytes": 9438208, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D64_S2048_W4", "batch": 4, "dim": 64, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08051100002148814, "p50": 0.08109199995942618, "p90": 0.08230200000980403, "mean": 0.08190759998569774, "iqr": 0.0016110000160551863, "raw_times": [0.08109199995942618, 0.08494199994402152, 0.08069099999374885, 0.08051100002148814, 0.08230200000980403], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09682099999963611, "peak_bytes": 9438720, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W2", "batch": 4, "dim": 2048, "seqlen": 128, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07942100000946084, "p50": 0.08037099996727193, "p90": 0.08038100003204818, "mean": 0.08024699999396034, "iqr": 0.0001800000291041215, "raw_times": [0.07942100000946084, 0.08038100003204818, 0.08020100000294406, 0.08037099996727193, 0.08086099995807672], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0851419999889913, "peak_bytes": 18931712, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S128_W4", "batch": 4, "dim": 2048, "seqlen": 128, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07963100000552004, "p50": 0.08073099996863675, "p90": 0.08166100002426901, "mean": 0.08793740000783146, "iqr": 0.0012990000186619, "raw_times": [0.07963100000552004, 0.08073099996863675, 0.11730200003512437, 0.08166100002426901, 0.08036200000560711], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08495099996252975, "peak_bytes": 18948096, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W2", "batch": 4, "dim": 2048, "seqlen": 512, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0912609999659253, "p50": 0.09163199996464755, "p90": 0.09213200002022859, "mean": 0.09177579999004593, "iqr": 0.0006200000370881753, "raw_times": [0.09151199998314041, 0.09163199996464755, 0.0912609999659253, 0.09213200002022859, 0.09234200001628778], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0949509999941256, "peak_bytes": 75522048, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S512_W4", "batch": 4, "dim": 2048, "seqlen": 512, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09837099997866972, "p50": 0.09879099997078811, "p90": 0.09935200000654731, "mean": 0.09890359998507847, "iqr": 0.0009400000067216752, "raw_times": [0.09841199999982564, 0.09837099997866972, 0.09879099997078811, 0.09959199996956158, 0.09935200000654731], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.10164200000417623, "peak_bytes": 75538432, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W2", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 2, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.47857700002396086, "p50": 0.48315699996237527, "p90": 0.4835080000020753, "mean": 0.48229559999981575, "iqr": 0.0009899999895424116, "raw_times": [0.48251800001253287, 0.4837179999981345, 0.4835080000020753, 0.47857700002396086, 0.48315699996237527], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.48609800001031545, "peak_bytes": 302014464, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} +{"ts": "2025-11-10T21:59:05Z", "run": "2e4d4658589243d8bcde88068971c4df", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_D2048_S2048_W4", "batch": 4, "dim": 2048, "seqlen": 2048, "width": 4, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.49741800000902003, "p50": 0.5014380000147867, "p90": 0.5020579999950314, "mean": 0.501099999996768, "iqr": 0.0024500000108673703, "raw_times": [0.5020579999950314, 0.5014380000147867, 0.5049779999808379, 0.49960799998416405, 0.49741800000902003], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.5005179999670872, "peak_bytes": 302030848, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 0.003, "atol": 0.005, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "causal_conv1d_fp32"}, "err": null} diff --git a/causal_conv1d/impls/hf_kernels_causal_conv1d.html b/causal_conv1d/impls/hf_kernels_causal_conv1d.html index cb1bde40be01c47bdde38e8da86912f92e3be9c0..3cc070018eda60505b2989113197f0c0370d6f7a 100644 --- a/causal_conv1d/impls/hf_kernels_causal_conv1d.html +++ b/causal_conv1d/impls/hf_kernels_causal_conv1d.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.21s +Cell: nv | 0.22s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.21s
-
Fri Oct 31 20:00:25 2025       
+
Mon Nov 10 21:57:49 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   33C    P0             79W /  350W |       0MiB /  46068MiB |     11%      Default |
+| N/A   27C    P0             77W /  350W |       0MiB /  46068MiB |     18%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.21s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 9.11s
+Cell: benchmark | 10.37s
  | 
 
 Raw
@@ -4210,19 +3992,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     180.703us      4446.43%     180.703us     180.703us             1  
-                               hf_kernels_causal_conv1d         8.48%     160.534us        99.62%       1.886ms       1.886ms       0.000us         0.00%       5.504us       5.504us             1  
-                                         CausalConv1dFn         6.47%     122.423us        91.15%       1.726ms     575.261us       0.000us         0.00%       5.504us       1.835us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.51%      28.612us        80.84%       1.531ms     510.207us       4.064us       100.00%       5.504us       1.835us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.064us       100.00%       4.064us       1.355us             3  
-                                Activity Buffer Request        76.71%       1.452ms        76.71%       1.452ms       1.452ms       1.440us        35.43%       1.440us       1.440us             1  
-                                       aten::empty_like         1.07%      20.220us         3.84%      72.741us      24.247us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         2.77%      52.521us         2.77%      52.521us      17.507us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         2.62%      49.571us         2.62%      49.571us      16.524us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.38%       7.101us         0.38%       7.101us       7.101us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     156.321us      3758.62%     156.321us     156.321us             1  
+                               hf_kernels_causal_conv1d         6.87%     159.072us        99.36%       2.300ms       2.300ms       0.000us         0.00%       5.599us       5.599us             1  
+                                         CausalConv1dFn         4.82%     111.622us        92.49%       2.141ms     713.785us       0.000us         0.00%       5.599us       1.866us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.19%      27.462us        84.76%       1.962ms     654.127us       4.159us       100.00%       5.599us       1.866us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.159us       100.00%       4.159us       1.386us             3  
+                                Activity Buffer Request        81.39%       1.884ms        81.39%       1.884ms       1.884ms       1.440us        34.62%       1.440us       1.440us             1  
+                                       aten::empty_like         0.94%      21.650us         2.91%      67.351us      22.450us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.97%      45.701us         1.97%      45.701us      15.234us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         2.18%      50.500us         2.18%      50.500us      16.833us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.64%      14.811us         0.64%      14.811us      14.811us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.893ms
-Self CUDA time total: 4.064us
+Self CPU time total: 2.315ms
+Self CUDA time total: 4.159us
 
 
 
@@ -4232,19 +4014,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     125.791us      3331.33%     125.791us     125.791us             1  
-                               hf_kernels_causal_conv1d         5.58%      96.392us        99.64%       1.721ms       1.721ms       0.000us         0.00%       5.056us       5.056us             1  
-                                         CausalConv1dFn         4.40%      76.074us        94.06%       1.625ms     541.671us       0.000us         0.00%       5.056us       1.685us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.52%      26.231us        87.95%       1.519ms     506.473us       3.776us       100.00%       5.056us       1.685us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.776us       100.00%       3.776us       1.259us             3  
-                                Activity Buffer Request        84.56%       1.461ms        84.56%       1.461ms       1.461ms       1.280us        33.90%       1.280us       1.280us             1  
-                                       aten::empty_like         0.44%       7.590us         1.71%      29.520us       9.840us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.27%      21.930us         1.27%      21.930us       7.310us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         1.87%      32.290us         1.87%      32.290us      10.763us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.36%       6.200us         0.36%       6.200us       6.200us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     123.455us      3297.41%     123.455us     123.455us             1  
+                               hf_kernels_causal_conv1d         4.13%      83.101us        99.73%       2.009ms       2.009ms       0.000us         0.00%       4.992us       4.992us             1  
+                                         CausalConv1dFn         3.66%      73.760us        95.61%       1.926ms     641.917us       0.000us         0.00%       4.992us       1.664us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.15%      23.071us        90.47%       1.822ms     607.420us       3.744us       100.00%       4.992us       1.664us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.744us       100.00%       3.744us       1.248us             3  
+                                Activity Buffer Request        87.83%       1.769ms        87.83%       1.769ms       1.769ms       1.248us        33.33%       1.248us       1.248us             1  
+                                       aten::empty_like         0.39%       7.860us         1.48%      29.730us       9.910us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.09%      21.870us         1.09%      21.870us       7.290us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         1.49%      30.082us         1.49%      30.082us      10.027us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.27%       5.421us         0.27%       5.421us       5.421us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.728ms
-Self CUDA time total: 3.776us
+Self CPU time total: 2.014ms
+Self CUDA time total: 3.744us
 
 
 
@@ -4254,19 +4036,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     125.758us      3330.46%     125.758us     125.758us             1  
-                               hf_kernels_causal_conv1d         5.23%      90.742us        99.66%       1.729ms       1.729ms       0.000us         0.00%       5.056us       5.056us             1  
-                                         CausalConv1dFn         4.39%      76.092us        94.43%       1.638ms     546.081us       0.000us         0.00%       5.056us       1.685us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.50%      26.031us        88.31%       1.532ms     510.660us       3.776us       100.00%       5.056us       1.685us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.776us       100.00%       3.776us       1.259us             3  
-                                Activity Buffer Request        84.98%       1.474ms        84.98%       1.474ms       1.474ms       1.280us        33.90%       1.280us       1.280us             1  
-                                       aten::empty_like         0.47%       8.201us         1.74%      30.171us      10.057us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.27%      21.970us         1.27%      21.970us       7.323us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         1.83%      31.671us         1.83%      31.671us      10.557us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.34%       5.850us         0.34%       5.850us       5.850us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     119.263us      3185.44%     119.263us     119.263us             1  
+                               hf_kernels_causal_conv1d         3.91%      78.640us        99.72%       2.003ms       2.003ms       0.000us         0.00%       4.992us       4.992us             1  
+                                         CausalConv1dFn         3.57%      71.661us        95.80%       1.925ms     641.537us       0.000us         0.00%       4.992us       1.664us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.13%      22.781us        90.75%       1.823ms     607.693us       3.744us       100.00%       4.992us       1.664us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.744us       100.00%       3.744us       1.248us             3  
+                                Activity Buffer Request        88.14%       1.771ms        88.14%       1.771ms       1.771ms       1.248us        33.33%       1.248us       1.248us             1  
+                                       aten::empty_like         0.41%       8.160us         1.49%      29.872us       9.957us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.08%      21.712us         1.08%      21.712us       7.237us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         1.48%      29.670us         1.48%      29.670us       9.890us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.28%       5.669us         0.28%       5.669us       5.669us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.735ms
-Self CUDA time total: 3.776us
+Self CPU time total: 2.009ms
+Self CUDA time total: 3.744us
 
 
 
@@ -4276,19 +4058,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     127.584us      3350.42%     127.584us     127.584us             1  
-                               hf_kernels_causal_conv1d         4.53%      88.983us        99.75%       1.962ms       1.962ms       0.000us         0.00%       5.088us       5.088us             1  
-                                         CausalConv1dFn         3.93%      77.252us        95.23%       1.873ms     624.219us       0.000us         0.00%       5.088us       1.696us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.36%      26.710us        89.83%       1.766ms     588.805us       3.808us       100.00%       5.088us       1.696us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.808us       100.00%       3.808us       1.269us             3  
-                                Activity Buffer Request        74.34%       1.462ms        74.34%       1.462ms       1.462ms       1.280us        33.61%       1.280us       1.280us             1  
-                                       aten::empty_like         0.41%       8.060us         1.47%      28.990us       9.663us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.06%      20.930us         1.06%      20.930us       6.977us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        14.13%     277.777us        14.13%     277.777us      92.592us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.25%       4.831us         0.25%       4.831us       4.831us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     121.790us      3253.81%     121.790us     121.790us             1  
+                               hf_kernels_causal_conv1d         3.48%      76.970us        99.77%       2.208ms       2.208ms       0.000us         0.00%       4.991us       4.991us             1  
+                                         CausalConv1dFn         3.33%      73.753us        96.30%       2.131ms     710.368us       0.000us         0.00%       4.991us       1.664us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.03%      22.770us        91.66%       2.029ms     676.184us       3.743us       100.00%       4.991us       1.664us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.743us       100.00%       3.743us       1.248us             3  
+                                Activity Buffer Request        81.47%       1.803ms        81.47%       1.803ms       1.803ms       1.248us        33.34%       1.248us       1.248us             1  
+                                       aten::empty_like         0.36%       7.858us         1.30%      28.800us       9.600us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.95%      20.942us         0.95%      20.942us       6.981us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         9.17%     202.863us         9.17%     202.863us      67.621us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.23%       4.991us         0.23%       4.991us       4.991us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.966ms
-Self CUDA time total: 3.808us
+Self CPU time total: 2.213ms
+Self CUDA time total: 3.743us
 
 
 
@@ -4298,19 +4080,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     126.686us      2639.84%     126.686us     126.686us             1  
-                               hf_kernels_causal_conv1d         4.55%      87.622us        99.73%       1.920ms       1.920ms       0.000us         0.00%       6.430us       6.430us             1  
-                                         CausalConv1dFn         3.92%      75.482us        95.18%       1.832ms     610.789us       0.000us         0.00%       6.430us       2.143us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.44%      27.663us        89.66%       1.726ms     575.372us       4.799us       100.00%       6.430us       2.143us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.799us       100.00%       4.799us       1.600us             3  
-                                Activity Buffer Request        74.49%       1.434ms        74.49%       1.434ms       1.434ms       1.631us        33.99%       1.631us       1.631us             1  
-                                       aten::empty_like         0.42%       8.140us         1.60%      30.770us      10.257us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.18%      22.630us         1.18%      22.630us       7.543us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        13.74%     264.526us        13.74%     264.526us      88.175us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.27%       5.120us         0.27%       5.120us       5.120us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     123.073us      2547.57%     123.073us     123.073us             1  
+                               hf_kernels_causal_conv1d         3.82%      79.680us        99.75%       2.083ms       2.083ms       0.000us         0.00%       6.463us       6.463us             1  
+                                         CausalConv1dFn         3.53%      73.692us        95.93%       2.003ms     667.744us       0.000us         0.00%       6.463us       2.154us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.17%      24.371us        90.98%       1.900ms     633.257us       4.831us       100.00%       6.463us       2.154us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.831us       100.00%       4.831us       1.610us             3  
+                                Activity Buffer Request        81.73%       1.707ms        81.73%       1.707ms       1.707ms       1.632us        33.78%       1.632us       1.632us             1  
+                                       aten::empty_like         0.42%       8.791us         1.43%      29.771us       9.924us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.00%      20.980us         1.00%      20.980us       6.993us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         8.08%     168.682us         8.08%     168.682us      56.227us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.25%       5.250us         0.25%       5.250us       5.250us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.925ms
-Self CUDA time total: 4.799us
+Self CPU time total: 2.088ms
+Self CUDA time total: 4.831us
 
 
 
@@ -4320,19 +4102,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D64_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     117.083us      2423.58%     117.083us     117.083us             1  
-                               hf_kernels_causal_conv1d        12.24%      83.203us        99.28%     674.957us     674.957us       0.000us         0.00%       6.463us       6.463us             1  
-                                         CausalConv1dFn        10.43%      70.911us        87.04%     591.754us     197.251us       0.000us         0.00%       6.463us       2.154us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.93%      26.710us        72.18%     490.682us     163.561us       4.831us       100.00%       6.463us       2.154us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.831us       100.00%       4.831us       1.610us             3  
-                                Activity Buffer Request        32.42%     220.416us        32.42%     220.416us     220.416us       1.632us        33.78%       1.632us       1.632us             1  
-                                       aten::empty_like         1.07%       7.270us         4.44%      30.161us      10.054us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.37%      22.891us         3.37%      22.891us       7.630us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        35.83%     243.556us        35.83%     243.556us      81.185us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.72%       4.870us         0.72%       4.870us       4.870us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     113.883us      2373.55%     113.883us     113.883us             1  
+                               hf_kernels_causal_conv1d        15.03%      75.250us        99.01%     495.717us     495.717us       0.000us         0.00%       6.430us       6.430us             1  
+                                         CausalConv1dFn        13.70%      68.601us        83.98%     420.467us     140.156us       0.000us         0.00%       6.430us       2.143us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.03%      25.190us        64.69%     323.874us     107.958us       4.798us       100.00%       6.430us       2.143us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.798us       100.00%       4.798us       1.599us             3  
+                                Activity Buffer Request        28.01%     140.222us        28.01%     140.222us     140.222us       1.632us        34.01%       1.632us       1.632us             1  
+                                       aten::empty_like         1.45%       7.260us         5.59%      27.992us       9.331us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.14%      20.732us         4.14%      20.732us       6.911us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        31.65%     158.462us        31.65%     158.462us      52.821us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.99%       4.940us         0.99%       4.940us       4.940us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 679.827us
-Self CUDA time total: 4.831us
+Self CPU time total: 500.657us
+Self CUDA time total: 4.798us
 
 
 
@@ -4342,19 +4124,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     124.381us      1167.35%     124.381us     124.381us             1  
-                               hf_kernels_causal_conv1d         4.48%      85.542us        99.75%       1.904ms       1.904ms       0.000us         0.00%      14.271us      14.271us             1  
-                                         CausalConv1dFn         3.83%      73.182us        95.27%       1.819ms     606.282us       0.000us         0.00%      14.271us       4.757us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.41%      26.960us        89.88%       1.716ms     571.988us      10.655us       100.00%      14.271us       4.757us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      10.655us       100.00%      10.655us       3.552us             3  
-                                Activity Buffer Request        76.01%       1.451ms        76.01%       1.451ms       1.451ms       3.616us        33.94%       3.616us       3.616us             1  
-                                       aten::empty_like         0.43%       8.120us         1.56%      29.700us       9.900us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.13%      21.580us         1.13%      21.580us       7.193us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        12.45%     237.787us        12.45%     237.787us      79.262us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.25%       4.860us         0.25%       4.860us       4.860us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     122.365us      1148.32%     122.365us     122.365us             1  
+                               hf_kernels_causal_conv1d         3.51%      76.530us        99.77%       2.176ms       2.176ms       0.000us         0.00%      14.208us      14.208us             1  
+                                         CausalConv1dFn         3.29%      71.713us        96.26%       2.099ms     699.771us       0.000us         0.00%      14.208us       4.736us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.11%      24.170us        91.65%       1.999ms     666.274us      10.656us       100.00%      14.208us       4.736us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      10.656us       100.00%      10.656us       3.552us             3  
+                                Activity Buffer Request        82.90%       1.808ms        82.90%       1.808ms       1.808ms       3.552us        33.33%       3.552us       3.552us             1  
+                                       aten::empty_like         0.37%       8.070us         1.32%      28.780us       9.593us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.95%      20.710us         0.95%      20.710us       6.903us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.64%     166.713us         7.64%     166.713us      55.571us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.23%       5.051us         0.23%       5.051us       5.051us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.909ms
-Self CUDA time total: 10.655us
+Self CPU time total: 2.181ms
+Self CUDA time total: 10.656us
 
 
 
@@ -4364,19 +4146,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     122.652us      1120.72%     122.652us     122.652us             1  
-                               hf_kernels_causal_conv1d        12.91%      86.303us        99.27%     663.588us     663.588us       0.000us         0.00%      14.624us      14.624us             1  
-                                         CausalConv1dFn        10.74%      71.821us        86.36%     577.285us     192.428us       0.000us         0.00%      14.624us       4.875us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.81%      25.480us        71.21%     476.023us     158.674us      10.944us       100.00%      14.624us       4.875us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      10.944us       100.00%      10.944us       3.648us             3  
-                                Activity Buffer Request        32.82%     219.426us        32.82%     219.426us     219.426us       3.680us        33.63%       3.680us       3.680us             1  
-                                       aten::empty_like         1.14%       7.591us         4.40%      29.441us       9.814us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.27%      21.850us         3.27%      21.850us       7.283us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        34.57%     231.117us        34.57%     231.117us      77.039us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.73%       4.900us         0.73%       4.900us       4.900us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     115.676us      1057.08%     115.676us     115.676us             1  
+                               hf_kernels_causal_conv1d        15.90%      75.141us        98.97%     467.777us     467.777us       0.000us         0.00%      14.654us      14.654us             1  
+                                         CausalConv1dFn        14.89%      70.359us        83.07%     392.636us     130.879us       0.000us         0.00%      14.654us       4.885us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.95%      23.391us        62.24%     294.186us      98.062us      10.943us       100.00%      14.654us       4.885us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      10.943us       100.00%      10.943us       3.648us             3  
+                                Activity Buffer Request        23.54%     111.281us        23.54%     111.281us     111.281us       3.711us        33.91%       3.711us       3.711us             1  
+                                       aten::empty_like         1.66%       7.830us         5.94%      28.091us       9.364us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.29%      20.261us         4.29%      20.261us       6.754us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        33.75%     159.514us        33.75%     159.514us      53.171us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.03%       4.890us         1.03%       4.890us       4.890us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 668.488us
-Self CUDA time total: 10.944us
+Self CPU time total: 472.667us
+Self CUDA time total: 10.943us
 
 
 
@@ -4386,19 +4168,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     130.430us      1181.43%     130.430us     130.430us             1  
-                               hf_kernels_causal_conv1d         4.23%      79.341us        99.73%       1.871ms       1.871ms       0.000us         0.00%      14.784us      14.784us             1  
-                                         CausalConv1dFn         4.03%      75.521us        95.50%       1.792ms     597.206us       0.000us         0.00%      14.784us       4.928us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.43%      26.810us        89.82%       1.685ms     561.675us      11.040us       100.00%      14.784us       4.928us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      11.040us       100.00%      11.040us       3.680us             3  
-                                Activity Buffer Request        77.07%       1.446ms        77.07%       1.446ms       1.446ms       3.744us        33.91%       3.744us       3.744us             1  
-                                       aten::empty_like         0.44%       8.272us         1.66%      31.072us      10.357us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.22%      22.800us         1.22%      22.800us       7.600us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        11.32%     212.286us        11.32%     212.286us      70.762us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.27%       5.130us         0.27%       5.130us       5.130us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     123.422us      1124.47%     123.422us     123.422us             1  
+                               hf_kernels_causal_conv1d         3.69%      77.100us        99.75%       2.084ms       2.084ms       0.000us         0.00%      14.656us      14.656us             1  
+                                         CausalConv1dFn         3.52%      73.471us        96.06%       2.007ms     668.988us       0.000us         0.00%      14.656us       4.885us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.13%      23.660us        90.70%       1.895ms     631.647us      10.976us       100.00%      14.656us       4.885us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      10.976us       100.00%      10.976us       3.659us             3  
+                                Activity Buffer Request        81.81%       1.709ms        81.81%       1.709ms       1.709ms       3.680us        33.53%       3.680us       3.680us             1  
+                                       aten::empty_like         0.81%      17.020us         1.85%      38.551us      12.850us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.03%      21.531us         1.03%      21.531us       7.177us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.76%     162.104us         7.76%     162.104us      54.035us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.25%       5.260us         0.25%       5.260us       5.260us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.876ms
-Self CUDA time total: 11.040us
+Self CPU time total: 2.089ms
+Self CUDA time total: 10.976us
 
 
 
@@ -4408,19 +4190,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     120.097us      1060.18%     120.097us     120.097us             1  
-                               hf_kernels_causal_conv1d        13.35%      76.301us        99.17%     566.674us     566.674us       0.000us         0.00%      15.168us      15.168us             1  
-                                         CausalConv1dFn        12.80%      73.153us        85.81%     490.373us     163.458us       0.000us         0.00%      15.168us       5.056us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.71%      26.911us        68.00%     388.569us     129.523us      11.328us       100.00%      15.168us       5.056us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      11.328us       100.00%      11.328us       3.776us             3  
-                                Activity Buffer Request        34.49%     197.075us        34.49%     197.075us     197.075us       3.840us        33.90%       3.840us       3.840us             1  
-                                       aten::empty_like         1.29%       7.379us         5.01%      28.651us       9.550us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.72%      21.272us         3.72%      21.272us       7.091us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        28.80%     164.583us        28.80%     164.583us      54.861us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.83%       4.760us         0.83%       4.760us       4.760us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     117.952us      1044.29%     117.952us     117.952us             1  
+                               hf_kernels_causal_conv1d        16.01%      73.960us        98.90%     456.837us     456.837us       0.000us         0.00%      15.071us      15.071us             1  
+                                         CausalConv1dFn        15.53%      71.741us        82.89%     382.877us     127.626us       0.000us         0.00%      15.071us       5.024us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.93%      22.791us        61.20%     282.685us      94.228us      11.295us       100.00%      15.071us       5.024us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      11.295us       100.00%      11.295us       3.765us             3  
+                                Activity Buffer Request        21.70%     100.232us        21.70%     100.232us     100.232us       3.776us        33.43%       3.776us       3.776us             1  
+                                       aten::empty_like         1.73%       7.970us         6.16%      28.451us       9.484us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.43%      20.481us         4.43%      20.481us       6.827us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        34.57%     159.662us        34.57%     159.662us      53.221us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.10%       5.060us         1.10%       5.060us       5.060us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 571.434us
-Self CUDA time total: 11.328us
+Self CPU time total: 461.897us
+Self CUDA time total: 11.295us
 
 
 
@@ -4430,19 +4212,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     133.919us       265.71%     133.919us     133.919us             1  
-                               hf_kernels_causal_conv1d         4.38%      80.552us        99.73%       1.836ms       1.836ms       0.000us         0.00%      83.873us      83.873us             1  
-                                         CausalConv1dFn         4.09%      75.353us        95.35%       1.755ms     585.145us       0.000us         0.00%      83.873us      27.958us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.33%      24.410us        89.50%       1.648ms     549.264us      50.401us       100.00%      83.873us      27.958us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      50.401us       100.00%      50.401us      16.800us             3  
-                                Activity Buffer Request        79.01%       1.455ms        79.01%       1.455ms       1.455ms      33.472us        66.41%      33.472us      33.472us             1  
-                                       aten::empty_like         0.45%       8.369us         1.75%      32.290us      10.763us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.30%      23.921us         1.30%      23.921us       7.974us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         9.17%     168.764us         9.17%     168.764us      56.255us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.27%       5.020us         0.27%       5.020us       5.020us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     128.158us       256.57%     128.158us     128.158us             1  
+                               hf_kernels_causal_conv1d         3.51%      75.280us        99.75%       2.140ms       2.140ms       0.000us         0.00%      83.102us      83.102us             1  
+                                         CausalConv1dFn         3.36%      72.172us        96.24%       2.065ms     688.218us       0.000us         0.00%      83.102us      27.701us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.14%      24.540us        91.55%       1.964ms     654.657us      49.951us       100.00%      83.102us      27.701us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      49.951us       100.00%      49.951us      16.650us             3  
+                                Activity Buffer Request        82.86%       1.778ms        82.86%       1.778ms       1.778ms      33.151us        66.37%      33.151us      33.151us             1  
+                                       aten::empty_like         0.37%       7.920us         1.33%      28.510us       9.503us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.96%      20.590us         0.96%      20.590us       6.863us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.54%     161.824us         7.54%     161.824us      53.941us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.25%       5.290us         0.25%       5.290us       5.290us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.841ms
-Self CUDA time total: 50.401us
+Self CPU time total: 2.145ms
+Self CUDA time total: 49.951us
 
 
 
@@ -4452,19 +4234,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B2_D2048_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     131.005us       256.03%     131.005us     131.005us             1  
-                               hf_kernels_causal_conv1d        11.69%      77.241us        99.25%     655.717us     655.717us       0.000us         0.00%      85.534us      85.534us             1  
-                                         CausalConv1dFn        10.97%      72.503us        87.56%     578.476us     192.825us       0.000us         0.00%      85.534us      28.511us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.89%      25.692us        71.76%     474.103us     158.034us      51.167us       100.00%      85.534us      28.511us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      51.167us       100.00%      51.167us      17.056us             3  
-                                Activity Buffer Request        43.08%     284.587us        43.08%     284.587us     284.587us      34.367us        67.17%      34.367us      34.367us             1  
-                                       aten::empty_like         1.14%       7.549us         4.82%      31.870us      10.623us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.68%      24.321us         3.68%      24.321us       8.107us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        24.80%     163.824us        24.80%     163.824us      54.608us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.75%       4.929us         0.75%       4.929us       4.929us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     121.310us       261.10%     121.310us     121.310us             1  
+                               hf_kernels_causal_conv1d        16.42%      74.560us        98.88%     448.987us     448.987us       0.000us         0.00%      75.933us      75.933us             1  
+                                         CausalConv1dFn        15.28%      69.392us        82.46%     374.427us     124.809us       0.000us         0.00%      75.933us      25.311us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.01%      22.740us        60.80%     276.074us      92.025us      46.462us       100.00%      75.933us      25.311us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      46.462us       100.00%      46.462us      15.487us             3  
+                                Activity Buffer Request        21.27%      96.581us        21.27%      96.581us      96.581us      29.471us        63.43%      29.471us      29.471us             1  
+                                       aten::empty_like         1.63%       7.411us         6.38%      28.961us       9.654us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.75%      21.550us         4.75%      21.550us       7.183us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        34.52%     156.753us        34.52%     156.753us      52.251us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.12%       5.090us         1.12%       5.090us       5.090us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 660.646us
-Self CUDA time total: 51.167us
+Self CPU time total: 454.077us
+Self CUDA time total: 46.462us
 
 
 
@@ -4474,19 +4256,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     118.686us      3040.89%     118.686us     118.686us             1  
-                               hf_kernels_causal_conv1d        11.60%      73.750us        99.24%     631.216us     631.216us       0.000us         0.00%       5.183us       5.183us             1  
-                                         CausalConv1dFn        11.30%      71.845us        87.65%     557.466us     185.822us       0.000us         0.00%       5.183us       1.728us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.22%      26.861us        71.87%     457.101us     152.367us       3.903us       100.00%       5.183us       1.728us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.903us       100.00%       3.903us       1.301us             3  
-                                Activity Buffer Request        42.38%     269.577us        42.38%     269.577us     269.577us       1.280us        32.80%       1.280us       1.280us             1  
-                                       aten::empty_like         1.23%       7.810us         4.48%      28.520us       9.507us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.26%      20.710us         3.26%      20.710us       6.903us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        25.26%     160.663us        25.26%     160.663us      53.554us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.76%       4.821us         0.76%       4.821us       4.821us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     128.254us      3312.35%     128.254us     128.254us             1  
+                               hf_kernels_causal_conv1d         3.31%      74.540us        99.77%       2.245ms       2.245ms       0.000us         0.00%       5.120us       5.120us             1  
+                                         CausalConv1dFn         3.41%      76.802us        96.46%       2.170ms     723.418us       0.000us         0.00%       5.120us       1.707us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.08%      24.209us        91.78%       2.065ms     688.374us       3.872us       100.00%       5.120us       1.707us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.872us       100.00%       3.872us       1.291us             3  
+                                Activity Buffer Request        83.69%       1.883ms        83.69%       1.883ms       1.883ms       1.248us        32.23%       1.248us       1.248us             1  
+                                       aten::empty_like         0.34%       7.679us         1.26%      28.331us       9.444us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.92%      20.652us         0.92%      20.652us       6.884us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.01%     157.803us         7.01%     157.803us      52.601us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.23%       5.180us         0.23%       5.180us       5.180us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 636.037us
-Self CUDA time total: 3.903us
+Self CPU time total: 2.250ms
+Self CUDA time total: 3.872us
 
 
 
@@ -4496,19 +4278,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     120.221us      3029.76%     120.221us     120.221us             1  
-                               hf_kernels_causal_conv1d        13.01%      75.082us        99.09%     571.775us     571.775us       0.000us         0.00%       5.248us       5.248us             1  
-                                         CausalConv1dFn        12.35%      71.241us        86.08%     496.693us     165.564us       0.000us         0.00%       5.248us       1.749us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.88%      28.181us        68.58%     395.720us     131.907us       3.968us       100.00%       5.248us       1.749us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.968us       100.00%       3.968us       1.323us             3  
-                                Activity Buffer Request        36.26%     209.246us        36.26%     209.246us     209.246us       1.280us        32.26%       1.280us       1.280us             1  
-                                       aten::empty_like         1.42%       8.172us         5.15%      29.732us       9.911us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.74%      21.560us         3.74%      21.560us       7.187us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        27.43%     158.293us        27.43%     158.293us      52.764us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.91%       5.270us         0.91%       5.270us       5.270us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     117.470us      3059.11%     117.470us     117.470us             1  
+                               hf_kernels_causal_conv1d        16.52%      75.490us        98.91%     451.907us     451.907us       0.000us         0.00%       5.056us       5.056us             1  
+                                         CausalConv1dFn        15.55%      71.061us        82.39%     376.417us     125.472us       0.000us         0.00%       5.056us       1.685us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.27%      24.090us        60.40%     275.984us      91.995us       3.840us       100.00%       5.056us       1.685us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       3.840us       100.00%       3.840us       1.280us             3  
+                                Activity Buffer Request        20.75%      94.821us        20.75%      94.821us      94.821us       1.216us        31.67%       1.216us       1.216us             1  
+                                       aten::empty_like         1.80%       8.242us         6.43%      29.372us       9.791us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.62%      21.130us         4.62%      21.130us       7.043us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        34.38%     157.073us        34.38%     157.073us      52.358us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.09%       4.990us         1.09%       4.990us       4.990us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 577.045us
-Self CUDA time total: 3.968us
+Self CPU time total: 456.897us
+Self CUDA time total: 3.840us
 
 
 
@@ -4518,19 +4300,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     117.374us      2843.36%     117.374us     117.374us             1  
-                               hf_kernels_causal_conv1d        14.38%      74.792us        98.97%     514.843us     514.843us       0.000us         0.00%       5.504us       5.504us             1  
-                                         CausalConv1dFn        13.25%      68.940us        84.59%     440.051us     146.684us       0.000us         0.00%       5.504us       1.835us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.99%      25.981us        65.51%     340.779us     113.593us       4.128us       100.00%       5.504us       1.835us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.128us       100.00%       4.128us       1.376us             3  
-                                Activity Buffer Request        29.84%     155.214us        29.84%     155.214us     155.214us       1.376us        33.33%       1.376us       1.376us             1  
-                                       aten::empty_like         1.55%       8.080us         5.83%      30.332us      10.111us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.28%      22.252us         4.28%      22.252us       7.417us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        30.68%     159.584us        30.68%     159.584us      53.195us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.03%       5.380us         1.03%       5.380us       5.380us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     120.191us      2958.18%     120.191us     120.191us             1  
+                               hf_kernels_causal_conv1d         3.64%      78.360us        99.76%       2.149ms       2.149ms       0.000us         0.00%       5.406us       5.406us             1  
+                                         CausalConv1dFn         3.37%      72.531us        96.13%       2.071ms     690.275us       0.000us         0.00%       5.406us       1.802us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.05%      22.591us        91.41%       1.969ms     656.417us       4.063us       100.00%       5.406us       1.802us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.063us       100.00%       4.063us       1.354us             3  
+                                Activity Buffer Request        83.09%       1.790ms        83.09%       1.790ms       1.790ms       1.343us        33.05%       1.343us       1.343us             1  
+                                       aten::empty_like         0.37%       8.020us         1.35%      29.041us       9.680us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.98%      21.021us         0.98%      21.021us       7.007us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.27%     156.703us         7.27%     156.703us      52.234us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.24%       5.100us         0.24%       5.100us       5.100us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 520.223us
-Self CUDA time total: 4.128us
+Self CPU time total: 2.154ms
+Self CUDA time total: 4.063us
 
 
 
@@ -4540,19 +4322,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     116.831us      2875.49%     116.831us     116.831us             1  
-                               hf_kernels_causal_conv1d        13.78%      75.282us        99.09%     541.484us     541.484us       0.000us         0.00%       5.439us       5.439us             1  
-                                         CausalConv1dFn        12.58%      68.741us        85.32%     466.202us     155.401us       0.000us         0.00%       5.439us       1.813us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.76%      26.021us        67.34%     367.980us     122.660us       4.063us       100.00%       5.439us       1.813us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.063us       100.00%       4.063us       1.354us             3  
-                                Activity Buffer Request        33.52%     183.175us        33.52%     183.175us     183.175us       1.376us        33.87%       1.376us       1.376us             1  
-                                       aten::empty_like         1.37%       7.489us         5.40%      29.481us       9.827us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.02%      21.992us         4.02%      21.992us       7.331us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        29.06%     158.784us        29.06%     158.784us      52.928us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.91%       4.951us         0.91%       4.951us       4.951us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     120.509us      2988.81%     120.509us     120.509us             1  
+                               hf_kernels_causal_conv1d        16.24%      73.950us        98.87%     450.317us     450.317us       0.000us         0.00%       5.376us       5.376us             1  
+                                         CausalConv1dFn        17.23%      78.473us        82.64%     376.367us     125.456us       0.000us         0.00%       5.376us       1.792us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.08%      23.119us        59.28%     269.974us      89.991us       4.032us       100.00%       5.376us       1.792us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       4.032us       100.00%       4.032us       1.344us             3  
+                                Activity Buffer Request        19.95%      90.851us        19.95%      90.851us      90.851us       1.344us        33.33%       1.344us       1.344us             1  
+                                       aten::empty_like         1.73%       7.890us         6.13%      27.920us       9.307us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.40%      20.030us         4.40%      20.030us       6.677us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        34.25%     156.004us        34.25%     156.004us      52.001us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.13%       5.130us         1.13%       5.130us       5.130us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 546.435us
-Self CUDA time total: 4.063us
+Self CPU time total: 455.447us
+Self CUDA time total: 4.032us
 
 
 
@@ -4562,19 +4344,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     119.806us      2228.53%     119.806us     119.806us             1  
-                               hf_kernels_causal_conv1d        11.93%      76.073us        99.21%     632.507us     632.507us       0.000us         0.00%       7.200us       7.200us             1  
-                                         CausalConv1dFn        11.21%      71.480us        87.28%     556.434us     185.478us       0.000us         0.00%       7.200us       2.400us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.13%      26.361us        71.46%     455.612us     151.871us       5.376us       100.00%       7.200us       2.400us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       5.376us       100.00%       5.376us       1.792us             3  
-                                Activity Buffer Request        42.49%     270.867us        42.49%     270.867us     270.867us       1.824us        33.93%       1.824us       1.824us             1  
-                                       aten::empty_like         1.24%       7.892us         4.60%      29.342us       9.781us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.36%      21.450us         3.36%      21.450us       7.150us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        24.84%     158.384us        24.84%     158.384us      52.795us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.79%       5.050us         0.79%       5.050us       5.050us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     124.767us      2334.71%     124.767us     124.767us             1  
+                               hf_kernels_causal_conv1d         3.64%      76.791us        99.75%       2.102ms       2.102ms       0.000us         0.00%       7.168us       7.168us             1  
+                                         CausalConv1dFn         3.46%      72.920us        96.11%       2.025ms     674.997us       0.000us         0.00%       7.168us       2.389us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.08%      22.730us        91.24%       1.923ms     640.840us       5.344us       100.00%       7.168us       2.389us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       5.344us       100.00%       5.344us       1.781us             3  
+                                Activity Buffer Request        82.66%       1.742ms        82.66%       1.742ms       1.742ms       1.824us        34.13%       1.824us       1.824us             1  
+                                       aten::empty_like         0.40%       8.480us         1.40%      29.552us       9.851us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.00%      21.072us         1.00%      21.072us       7.024us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.51%     158.242us         7.51%     158.242us      52.747us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.25%       5.220us         0.25%       5.220us       5.220us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 637.557us
-Self CUDA time total: 5.376us
+Self CPU time total: 2.107ms
+Self CUDA time total: 5.344us
 
 
 
@@ -4584,19 +4366,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D64_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     119.676us      2174.35%     119.676us     119.676us             1  
-                               hf_kernels_causal_conv1d        14.25%      74.352us        99.01%     516.513us     516.513us       0.000us         0.00%       7.392us       7.392us             1  
-                                         CausalConv1dFn        14.02%      73.122us        84.76%     442.161us     147.387us       0.000us         0.00%       7.392us       2.464us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.04%      26.281us        65.18%     340.038us     113.346us       5.504us       100.00%       7.392us       2.464us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       5.504us       100.00%       5.504us       1.835us             3  
-                                Activity Buffer Request        30.19%     157.524us        30.19%     157.524us     157.524us       1.888us        34.30%       1.888us       1.888us             1  
-                                       aten::empty_like         1.50%       7.800us         5.56%      29.001us       9.667us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.06%      21.201us         4.06%      21.201us       7.067us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        29.95%     156.233us        29.95%     156.233us      52.078us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.99%       5.180us         0.99%       5.180us       5.180us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     114.399us      2127.96%     114.399us     114.399us             1  
+                               hf_kernels_causal_conv1d        16.62%      75.320us        98.88%     448.097us     448.097us       0.000us         0.00%       7.200us       7.200us             1  
+                                         CausalConv1dFn        15.04%      68.172us        82.26%     372.777us     124.259us       0.000us         0.00%       7.200us       2.400us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.05%      22.881us        60.95%     276.214us      92.071us       5.376us       100.00%       7.200us       2.400us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us       5.376us       100.00%       5.376us       1.792us             3  
+                                Activity Buffer Request        20.71%      93.851us        20.71%      93.851us      93.851us       1.824us        33.93%       1.824us       1.824us             1  
+                                       aten::empty_like         1.68%       7.630us         6.27%      28.391us       9.464us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.58%      20.761us         4.58%      20.761us       6.920us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        35.19%     159.482us        35.19%     159.482us      53.161us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.12%       5.070us         1.12%       5.070us       5.070us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 521.693us
-Self CUDA time total: 5.504us
+Self CPU time total: 453.167us
+Self CUDA time total: 5.376us
 
 
 
@@ -4606,19 +4388,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     124.798us       715.63%     124.798us     124.798us             1  
-                               hf_kernels_causal_conv1d        11.85%      75.293us        99.15%     630.167us     630.167us       0.000us         0.00%      23.295us      23.295us             1  
-                                         CausalConv1dFn        11.06%      70.310us        87.30%     554.874us     184.958us       0.000us         0.00%      23.295us       7.765us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.18%      26.540us        71.39%     453.732us     151.244us      17.439us       100.00%      23.295us       7.765us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      17.439us       100.00%      17.439us       5.813us             3  
-                                Activity Buffer Request        42.20%     268.237us        42.20%     268.237us     268.237us       5.856us        33.58%       5.856us       5.856us             1  
-                                       aten::empty_like         1.25%       7.951us         4.85%      30.832us      10.277us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.60%      22.881us         3.60%      22.881us       7.627us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        25.01%     158.955us        25.01%     158.955us      52.985us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.85%       5.410us         0.85%       5.410us       5.410us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     121.887us       696.30%     121.887us     121.887us             1  
+                               hf_kernels_causal_conv1d         3.44%      74.640us        99.77%       2.162ms       2.162ms       0.000us         0.00%      23.361us      23.361us             1  
+                                         CausalConv1dFn         3.19%      69.031us        96.32%       2.087ms     695.668us       0.000us         0.00%      23.361us       7.787us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.10%      23.730us        91.78%       1.989ms     662.904us      17.505us       100.00%      23.361us       7.787us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      17.505us       100.00%      17.505us       5.835us             3  
+                                Activity Buffer Request        82.75%       1.793ms        82.75%       1.793ms       1.793ms       5.856us        33.45%       5.856us       5.856us             1  
+                                       aten::empty_like         0.40%       8.582us         1.35%      29.262us       9.754us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         0.95%      20.680us         0.95%      20.680us       6.893us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.94%     172.113us         7.94%     172.113us      57.371us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.23%       5.069us         0.23%       5.069us       5.069us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 635.577us
-Self CUDA time total: 17.439us
+Self CPU time total: 2.167ms
+Self CUDA time total: 17.505us
 
 
 
@@ -4628,19 +4410,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     124.252us       695.89%     124.252us     124.252us             1  
-                               hf_kernels_causal_conv1d        15.28%      76.213us        99.04%     494.053us     494.053us       0.000us         0.00%      23.839us      23.839us             1  
-                                         CausalConv1dFn        14.60%      72.841us        83.76%     417.840us     139.280us       0.000us         0.00%      23.839us       7.946us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.38%      26.851us        63.27%     315.607us     105.202us      17.855us       100.00%      23.839us       7.946us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      17.855us       100.00%      17.855us       5.952us             3  
-                                Activity Buffer Request        26.40%     131.703us        26.40%     131.703us     131.703us       5.984us        33.51%       5.984us       5.984us             1  
-                                       aten::empty_like         1.62%       8.090us         5.89%      29.392us       9.797us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.27%      21.302us         4.27%      21.302us       7.101us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        31.48%     157.053us        31.48%     157.053us      52.351us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.96%       4.810us         0.96%       4.810us       4.810us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     119.997us       664.91%     119.997us     119.997us             1  
+                               hf_kernels_causal_conv1d        16.46%      76.510us        98.91%     459.857us     459.857us       0.000us         0.00%      24.063us      24.063us             1  
+                                         CausalConv1dFn        14.99%      69.691us        82.45%     383.347us     127.782us       0.000us         0.00%      24.063us       8.021us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.12%      23.810us        61.53%     286.094us      95.365us      18.047us       100.00%      24.063us       8.021us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      18.047us       100.00%      18.047us       6.016us             3  
+                                Activity Buffer Request        22.64%     105.271us        22.64%     105.271us     105.271us       6.016us        33.34%       6.016us       6.016us             1  
+                                       aten::empty_like         1.59%       7.411us         5.93%      27.562us       9.187us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.33%      20.151us         4.33%      20.151us       6.717us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        33.77%     157.013us        33.77%     157.013us      52.338us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.09%       5.080us         1.09%       5.080us       5.080us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 498.863us
-Self CUDA time total: 17.855us
+Self CPU time total: 464.937us
+Self CUDA time total: 18.047us
 
 
 
@@ -4650,19 +4432,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     124.253us       695.94%     124.253us     124.253us             1  
-                               hf_kernels_causal_conv1d        14.09%      92.581us        99.22%     652.096us     652.096us       0.000us         0.00%      23.838us      23.838us             1  
-                                         CausalConv1dFn        11.45%      75.254us        85.13%     559.515us     186.505us       0.000us         0.00%      23.838us       7.946us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.84%      25.251us        69.30%     455.481us     151.827us      17.854us       100.00%      23.838us       7.946us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      17.854us       100.00%      17.854us       5.951us             3  
-                                Activity Buffer Request        41.42%     272.247us        41.42%     272.247us     272.247us       5.984us        33.52%       5.984us       5.984us             1  
-                                       aten::empty_like         1.19%       7.849us         4.38%      28.780us       9.593us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         3.18%      20.931us         3.18%      20.931us       6.977us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        24.04%     157.983us        24.04%     157.983us      52.661us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.78%       5.140us         0.78%       5.140us       5.140us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     125.983us       701.78%     125.983us     125.983us             1  
+                               hf_kernels_causal_conv1d         3.62%      75.400us        99.76%       2.076ms       2.076ms       0.000us         0.00%      23.968us      23.968us             1  
+                                         CausalConv1dFn         3.51%      72.963us        96.14%       2.001ms     667.008us       0.000us         0.00%      23.968us       7.989us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.17%      24.320us        91.19%       1.898ms     632.703us      17.952us       100.00%      23.968us       7.989us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      17.952us       100.00%      17.952us       5.984us             3  
+                                Activity Buffer Request        82.20%       1.711ms        82.20%       1.711ms       1.711ms       6.016us        33.51%       6.016us       6.016us             1  
+                                       aten::empty_like         0.41%       8.499us         1.44%      29.950us       9.983us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         1.03%      21.451us         1.03%      21.451us       7.150us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         7.83%     162.893us         7.83%     162.893us      54.298us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.24%       4.969us         0.24%       4.969us       4.969us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 657.236us
-Self CUDA time total: 17.854us
+Self CPU time total: 2.081ms
+Self CUDA time total: 17.952us
 
 
 
@@ -4672,19 +4454,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     121.982us       651.61%     121.982us     121.982us             1  
-                               hf_kernels_causal_conv1d        16.26%      76.273us        99.00%     464.343us     464.343us       0.000us         0.00%      25.088us      25.088us             1  
-                                         CausalConv1dFn        15.20%      71.302us        82.74%     388.070us     129.357us       0.000us         0.00%      25.088us       8.363us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.49%      25.750us        61.15%     286.808us      95.603us      18.720us       100.00%      25.088us       8.363us             3  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      18.720us       100.00%      18.720us       6.240us             3  
-                                Activity Buffer Request        22.13%     103.813us        22.13%     103.813us     103.813us       6.368us        34.02%       6.368us       6.368us             1  
-                                       aten::empty_like         1.75%       8.210us         6.39%      29.960us       9.987us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.64%      21.750us         4.64%      21.750us       7.250us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        33.53%     157.245us        33.53%     157.245us      52.415us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.00%       4.680us         1.00%       4.680us       4.680us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     119.901us       639.40%     119.901us     119.901us             1  
+                               hf_kernels_causal_conv1d        11.47%      73.600us        99.21%     636.820us     636.820us       0.000us         0.00%      25.088us      25.088us             1  
+                                         CausalConv1dFn        11.28%      72.380us        87.74%     563.220us     187.740us       0.000us         0.00%      25.088us       8.363us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.65%      23.431us        72.11%     462.887us     154.296us      18.752us       100.00%      25.088us       8.363us             3  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      18.752us       100.00%      18.752us       6.251us             3  
+                                Activity Buffer Request        43.62%     280.014us        43.62%     280.014us     280.014us       6.336us        33.79%       6.336us       6.336us             1  
+                                       aten::empty_like         1.22%       7.832us         4.35%      27.953us       9.318us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         3.13%      20.121us         3.13%      20.121us       6.707us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        24.84%     159.442us        24.84%     159.442us      53.147us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.79%       5.080us         0.79%       5.080us       5.080us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 469.023us
-Self CUDA time total: 18.720us
+Self CPU time total: 641.900us
+Self CUDA time total: 18.752us
 
 
 
@@ -4694,19 +4476,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d         4.40%      80.973us        99.73%       1.837ms       1.837ms       0.000us         0.00%     162.749us     162.749us             1  
-                                         CausalConv1dFn         4.14%      76.301us        95.33%       1.756ms     585.285us       0.000us         0.00%     162.749us      54.250us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         1.45%      26.730us        89.50%       1.648ms     549.474us      97.918us       100.00%     162.749us      54.250us             3  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     141.950us       144.97%     141.950us     141.950us             1  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      97.918us       100.00%      97.918us      32.639us             3  
-                                Activity Buffer Request        78.99%       1.455ms        78.99%       1.455ms       1.455ms      64.831us        66.21%      64.831us      64.831us             1  
-                                       aten::empty_like         0.45%       8.340us         1.69%      31.131us      10.377us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         1.24%      22.791us         1.24%      22.791us       7.597us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         9.06%     166.885us         9.06%     166.885us      55.628us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.27%       4.980us         0.27%       4.980us       4.980us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d        11.42%      73.310us        99.16%     636.780us     636.780us       0.000us         0.00%     162.591us     162.591us             1  
+                                         CausalConv1dFn        11.12%      71.382us        87.74%     563.470us     187.823us       0.000us         0.00%     162.591us      54.197us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         3.58%      22.989us        72.14%     463.287us     154.429us      97.631us       100.00%     162.591us      54.197us             3  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     130.208us       133.37%     130.208us     130.208us             1  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      97.631us       100.00%      97.631us      32.544us             3  
+                                Activity Buffer Request        43.38%     278.604us        43.38%     278.604us     278.604us      64.960us        66.54%      64.960us      64.960us             1  
+                                       aten::empty_like         1.24%       7.950us         4.48%      28.801us       9.600us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         3.25%      20.851us         3.25%      20.851us       6.950us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        25.18%     161.694us        25.18%     161.694us      53.898us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.84%       5.420us         0.84%       5.420us       5.420us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.842ms
-Self CUDA time total: 97.918us
+Self CPU time total: 642.200us
+Self CUDA time total: 97.631us
 
 
 
@@ -4716,19 +4498,19 @@ PROFILE TRACE: hf_kernels_causal_conv1d | cuda_B4_D2048_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                               hf_kernels_causal_conv1d        16.07%      76.871us        98.94%     473.172us     473.172us       0.000us         0.00%     163.803us     163.803us             1  
-                                         CausalConv1dFn        14.96%      71.532us        82.87%     396.301us     132.100us       0.000us         0.00%     163.803us      54.601us             3  
-              _causal_conv1d_90f5a60::causal_conv1d_fwd         5.75%      27.501us        61.56%     294.418us      98.139us      98.685us       100.00%     163.803us      54.601us             3  
-                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     133.180us       134.95%     133.180us     133.180us             1  
-void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      98.685us       100.00%      98.685us      32.895us             3  
-                                Activity Buffer Request        21.65%     103.543us        21.65%     103.543us     103.543us      65.118us        65.99%      65.118us      65.118us             1  
-                                       aten::empty_like         1.52%       7.251us         6.35%      30.351us      10.117us       0.000us         0.00%       0.000us       0.000us             3  
-                                    aten::empty_strided         4.83%      23.100us         4.83%      23.100us       7.700us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        34.16%     163.374us        34.16%     163.374us      54.458us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         1.06%       5.061us         1.06%       5.061us       5.061us       0.000us         0.00%       0.000us       0.000us             1  
+                               hf_kernels_causal_conv1d        13.89%      72.060us        98.98%     513.378us     513.378us       0.000us         0.00%     163.263us     163.263us             1  
+                                         CausalConv1dFn        13.96%      72.421us        85.08%     441.318us     147.106us       0.000us         0.00%     163.263us      54.421us             3  
+              _causal_conv1d_90f5a60::causal_conv1d_fwd         4.45%      23.099us        65.49%     339.676us     113.225us      98.623us       100.00%     163.263us      54.421us             3  
+                               hf_kernels_causal_conv1d         0.00%       0.000us         0.00%       0.000us       0.000us     130.111us       131.93%     130.111us     130.111us             1  
+void causal_conv1d_fwd_kernel<Causal_conv1d_fwd_kern...         0.00%       0.000us         0.00%       0.000us       0.000us      98.623us       100.00%      98.623us      32.874us             3  
+                                Activity Buffer Request        30.19%     156.612us        30.19%     156.612us     156.612us      64.640us        65.54%      64.640us      64.640us             1  
+                                       aten::empty_like         1.62%       8.391us         5.63%      29.221us       9.740us       0.000us         0.00%       0.000us       0.000us             3  
+                                    aten::empty_strided         4.02%      20.830us         4.02%      20.830us       6.943us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        30.84%     159.965us        30.84%     159.965us      53.322us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         1.02%       5.310us         1.02%       5.310us       5.310us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 478.233us
-Self CUDA time total: 98.685us
+Self CPU time total: 518.688us
+Self CUDA time total: 98.623us
 
 
 impl                     wl                  p50(ms)  ok
@@ -4738,7 +4520,7 @@ hf_kernels_causal_conv1d cuda_B2_D2048_S2048_W2     0.05  True
 hf_kernels_causal_conv1d cuda_B2_D2048_S2048_W4     0.05  True
 hf_kernels_causal_conv1d cuda_B2_D2048_S512_W2     0.05  True
 hf_kernels_causal_conv1d cuda_B2_D2048_S512_W4     0.05  True
-hf_kernels_causal_conv1d cuda_B2_D64_S128_W2     0.05  True
+hf_kernels_causal_conv1d cuda_B2_D64_S128_W2     0.04  True
 hf_kernels_causal_conv1d cuda_B2_D64_S128_W4     0.05  True
 hf_kernels_causal_conv1d cuda_B2_D64_S2048_W2     0.05  True
 hf_kernels_causal_conv1d cuda_B2_D64_S2048_W4     0.05  True
@@ -4760,13 +4542,14 @@ hf_kernels_causal_conv1d cuda_B4_D64_S512_W4     0.05  True
 
▶ UV Install Logs
Fetching 11 files: 0%| | 0/11 [00:00<?, ?it/s] -Fetching 11 files: 9%|▉ | 1/11 [00:00<00:01, 9.42it/s] -Fetching 11 files: 64%|██████▎ | 7/11 [00:01<00:00, 4.98it/s] -Fetching 11 files: 100%|██████████| 11/11 [00:01<00:00, 7.98it/s]
+Fetching 11 files: 64%|██████▎ | 7/11 [00:01<00:01, 3.51it/s] +Fetching 11 files: 100%|██████████| 11/11 [00:01<00:00, 5.51it/s]

Artifacts:

causal_conv1d.jsonl diff --git a/causal_conv1d/impls/torch_causal_conv1d.html b/causal_conv1d/impls/torch_causal_conv1d.html index 6358d2b943cf22bb9f31aeb2e669932f13397132..123e4df6574a5fdcd6eea76825c04ba391a38dc3 100644 --- a/causal_conv1d/impls/torch_causal_conv1d.html +++ b/causal_conv1d/impls/torch_causal_conv1d.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.21s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.21s
-
Fri Oct 31 20:00:25 2025       
+
Mon Nov 10 21:57:49 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   33C    P0             79W /  350W |       0MiB /  46068MiB |     11%      Default |
+| N/A   27C    P0             77W /  350W |       0MiB /  46068MiB |     18%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4155,7 +3937,7 @@ Cell: nv | 0.21s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 3.68s
+Cell: benchmark | 3.89s
  | 
 
 Raw
@@ -4217,29 +3999,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     439.324us      2269.12%     439.324us     439.324us             1  
-                                            torch_eager        10.31%     220.478us        99.69%       2.131ms       2.131ms       0.000us         0.00%      21.729us      21.729us             1  
-                                               aten::to         0.50%      10.770us        79.87%       1.707ms     284.530us       0.000us         0.00%      14.369us       2.395us             6  
-                                         aten::_to_copy         1.71%      36.499us        79.36%       1.696ms     282.735us       0.000us         0.00%      14.369us       2.395us             6  
-                                            aten::copy_         2.77%      59.234us        75.21%       1.608ms     267.930us      12.001us        61.99%      14.369us       2.395us             6  
-                                           aten::conv1d         0.36%       7.590us         7.34%     156.883us      52.294us       0.000us         0.00%       7.360us       2.453us             3  
-                                      aten::convolution         0.66%      14.070us         6.98%     149.293us      49.764us       0.000us         0.00%       7.360us       2.453us             3  
-                                     aten::_convolution         1.51%      32.210us         6.33%     135.223us      45.074us       0.000us         0.00%       7.360us       2.453us             3  
-                                aten::_conv_depthwise2d         1.61%      34.371us         4.00%      85.463us      28.488us       7.360us        38.01%       7.360us       2.453us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.360us        38.01%       7.360us       2.453us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.337us        32.73%       6.337us       2.112us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.664us        29.25%       5.664us       1.888us             3  
-                                Activity Buffer Request        69.37%       1.483ms        69.37%       1.483ms       1.483ms       2.368us        12.23%       2.368us       2.368us             1  
-                                    aten::empty_strided         2.45%      52.331us         2.45%      52.331us       8.722us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         4.26%      91.032us         4.26%      91.032us      10.115us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         1.32%      28.311us         1.71%      36.491us       4.055us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.64%      13.700us         0.64%      13.700us       0.913us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.60%      12.790us         0.60%      12.790us       4.263us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.59%      12.710us         0.59%      12.710us       4.237us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.31%       6.640us         0.38%       8.090us       2.697us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     411.136us      2127.15%     411.136us     411.136us             1  
+                                            torch_eager         8.60%     205.173us        99.40%       2.372ms       2.372ms       0.000us         0.00%      21.632us      21.632us             1  
+                                               aten::to         0.40%       9.649us        83.06%       1.982ms     330.358us       0.000us         0.00%      14.272us       2.379us             6  
+                                         aten::_to_copy         1.47%      35.141us        82.65%       1.973ms     328.750us       0.000us         0.00%      14.272us       2.379us             6  
+                                            aten::copy_         2.42%      57.830us        79.13%       1.889ms     314.753us      11.968us        61.92%      14.272us       2.379us             6  
+                                           aten::conv1d         0.32%       7.640us         6.22%     148.384us      49.461us       0.000us         0.00%       7.360us       2.453us             3  
+                                      aten::convolution         0.55%      13.222us         5.90%     140.744us      46.915us       0.000us         0.00%       7.360us       2.453us             3  
+                                     aten::_convolution         1.23%      29.427us         5.34%     127.522us      42.507us       0.000us         0.00%       7.360us       2.453us             3  
+                                aten::_conv_depthwise2d         1.41%      33.690us         3.44%      82.073us      27.358us       7.360us        38.08%       7.360us       2.453us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.360us        38.08%       7.360us       2.453us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.304us        32.62%       6.304us       2.101us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.664us        29.30%       5.664us       1.888us             3  
+                                Activity Buffer Request        73.85%       1.762ms        73.85%       1.762ms       1.762ms       2.304us        11.92%       2.304us       2.304us             1  
+                                    aten::empty_strided         2.05%      48.841us         2.05%      48.841us       8.140us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         3.88%      92.484us         3.88%      92.484us      10.276us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.94%      22.551us         1.23%      29.352us       3.261us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.46%      10.991us         0.46%      10.991us       0.733us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.53%      12.660us         0.53%      12.660us       4.220us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.49%      11.631us         0.49%      11.631us       3.877us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.27%       6.340us         0.32%       7.570us       2.523us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.138ms
-Self CUDA time total: 19.361us
+Self CPU time total: 2.386ms
+Self CUDA time total: 19.328us
 
 
 
@@ -4249,29 +4031,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     341.789us      1742.49%     341.789us     341.789us             1  
-                                            torch_eager         7.86%     151.082us        99.71%       1.916ms       1.916ms       0.000us         0.00%      21.695us      21.695us             1  
-                                               aten::to         0.35%       6.661us        83.96%       1.614ms     268.966us       0.000us         0.00%      13.695us       2.282us             6  
-                                         aten::_to_copy         1.29%      24.781us        83.61%       1.607ms     267.856us       0.000us         0.00%      13.695us       2.282us             6  
-                                            aten::copy_         2.59%      49.784us        80.72%       1.552ms     258.589us      11.615us        59.21%      13.695us       2.282us             6  
-                                           aten::conv1d         0.32%       6.220us         6.35%     122.113us      40.704us       0.000us         0.00%       8.000us       2.667us             3  
-                                      aten::convolution         0.53%      10.120us         6.03%     115.893us      38.631us       0.000us         0.00%       8.000us       2.667us             3  
-                                     aten::_convolution         1.20%      23.080us         5.50%     105.773us      35.258us       0.000us         0.00%       8.000us       2.667us             3  
-                                aten::_conv_depthwise2d         1.19%      22.952us         3.39%      65.123us      21.708us       8.000us        40.79%       8.000us       2.667us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       8.000us        40.79%       8.000us       2.667us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.047us        30.83%       6.047us       2.016us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.568us        28.39%       5.568us       1.856us             3  
-                                Activity Buffer Request        75.54%       1.452ms        75.54%       1.452ms       1.452ms       2.080us        10.60%       2.080us       2.080us             1  
-                                    aten::empty_strided         1.60%      30.820us         1.60%      30.820us       5.137us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         3.74%      71.953us         3.74%      71.953us       7.995us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.98%      18.881us         1.29%      24.750us       2.750us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.50%       9.609us         0.50%       9.609us       0.641us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.56%      10.750us         0.56%      10.750us       3.583us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.49%       9.339us         0.49%       9.339us       3.113us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.34%       6.630us         0.42%       8.000us       2.667us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     320.094us      1629.14%     320.094us     320.094us             1  
+                                            torch_eager         6.61%     147.267us        99.75%       2.222ms       2.222ms       0.000us         0.00%      21.856us      21.856us             1  
+                                               aten::to         0.28%       6.328us        86.86%       1.935ms     322.525us       0.000us         0.00%      13.888us       2.315us             6  
+                                         aten::_to_copy         0.99%      22.058us        86.58%       1.929ms     321.470us       0.000us         0.00%      13.888us       2.315us             6  
+                                            aten::copy_         2.09%      46.581us        84.13%       1.874ms     312.384us      11.680us        59.45%      13.888us       2.315us             6  
+                                           aten::conv1d         0.26%       5.880us         5.20%     115.901us      38.634us       0.000us         0.00%       7.968us       2.656us             3  
+                                      aten::convolution         0.41%       9.201us         4.94%     110.021us      36.674us       0.000us         0.00%       7.968us       2.656us             3  
+                                     aten::_convolution         0.99%      22.029us         4.53%     100.820us      33.607us       0.000us         0.00%       7.968us       2.656us             3  
+                                aten::_conv_depthwise2d         0.98%      21.809us         2.84%      63.210us      21.070us       7.968us        40.55%       7.968us       2.656us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.968us        40.55%       7.968us       2.656us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.112us        31.11%       6.112us       2.037us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.568us        28.34%       5.568us       1.856us             3  
+                                Activity Buffer Request        79.89%       1.780ms        79.89%       1.780ms       1.780ms       2.208us        11.24%       2.208us       2.208us             1  
+                                    aten::empty_strided         1.46%      32.461us         1.46%      32.461us       5.410us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         3.22%      71.802us         3.22%      71.802us       7.978us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.71%      15.809us         0.93%      20.750us       2.306us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.38%       8.492us         0.38%       8.492us       0.566us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.41%       9.081us         0.41%       9.081us       3.027us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.38%       8.530us         0.38%       8.530us       2.843us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.26%       5.730us         0.32%       7.140us       2.380us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.922ms
-Self CUDA time total: 19.615us
+Self CPU time total: 2.228ms
+Self CUDA time total: 19.648us
 
 
 
@@ -4281,29 +4063,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     343.328us      1837.45%     343.328us     343.328us             1  
-                                            torch_eager         7.88%     151.015us        99.69%       1.911ms       1.911ms       0.000us         0.00%      20.605us      20.605us             1  
-                                               aten::to         0.33%       6.409us        84.02%       1.611ms     268.468us       0.000us         0.00%      13.662us       2.277us             6  
-                                         aten::_to_copy         1.32%      25.354us        83.68%       1.604ms     267.400us       0.000us         0.00%      13.662us       2.277us             6  
-                                            aten::copy_         2.65%      50.770us        80.80%       1.549ms     258.170us      11.742us        62.84%      13.662us       2.277us             6  
-                                           aten::conv1d         0.33%       6.290us         6.34%     121.483us      40.494us       0.000us         0.00%       6.943us       2.314us             3  
-                                      aten::convolution         0.54%      10.430us         6.01%     115.193us      38.398us       0.000us         0.00%       6.943us       2.314us             3  
-                                     aten::_convolution         1.17%      22.439us         5.46%     104.763us      34.921us       0.000us         0.00%       6.943us       2.314us             3  
-                                aten::_conv_depthwise2d         1.17%      22.412us         3.43%      65.843us      21.948us       6.943us        37.16%       6.943us       2.314us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       6.943us        37.16%       6.943us       2.314us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       5.982us        32.01%       5.982us       1.994us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.760us        30.83%       5.760us       1.920us             3  
-                                Activity Buffer Request        75.50%       1.448ms        75.50%       1.448ms       1.448ms       1.920us        10.28%       1.920us       1.920us             1  
-                                    aten::empty_strided         1.57%      30.029us         1.57%      30.029us       5.005us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel         3.90%      74.680us         3.90%      74.680us       8.298us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.93%      17.782us         1.21%      23.252us       2.584us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.48%       9.281us         0.48%       9.281us       0.619us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.57%      10.910us         0.57%      10.910us       3.637us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.44%       8.531us         0.44%       8.531us       2.844us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.32%       6.170us         0.39%       7.570us       2.523us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     322.750us      1724.09%     322.750us     322.750us             1  
+                                            torch_eager         6.97%     154.353us        99.74%       2.208ms       2.208ms       0.000us         0.00%      20.736us      20.736us             1  
+                                               aten::to         0.30%       6.580us        86.44%       1.913ms     318.849us       0.000us         0.00%      13.791us       2.299us             6  
+                                         aten::_to_copy         1.09%      24.161us        86.14%       1.907ms     317.752us       0.000us         0.00%      13.791us       2.299us             6  
+                                            aten::copy_         2.12%      46.909us        83.64%       1.851ms     308.533us      11.775us        62.90%      13.791us       2.299us             6  
+                                           aten::conv1d         0.30%       6.591us         5.18%     114.662us      38.221us       0.000us         0.00%       6.945us       2.315us             3  
+                                      aten::convolution         0.40%       8.811us         4.88%     108.071us      36.024us       0.000us         0.00%       6.945us       2.315us             3  
+                                     aten::_convolution         0.96%      21.188us         4.48%      99.260us      33.087us       0.000us         0.00%       6.945us       2.315us             3  
+                                aten::_conv_depthwise2d         0.97%      21.520us         2.82%      62.461us      20.820us       6.945us        37.10%       6.945us       2.315us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       6.945us        37.10%       6.945us       2.315us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.047us        32.30%       6.047us       2.016us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.728us        30.60%       5.728us       1.909us             3  
+                                Activity Buffer Request        79.41%       1.758ms        79.41%       1.758ms       1.758ms       2.016us        10.77%       2.016us       2.016us             1  
+                                    aten::empty_strided         1.41%      31.151us         1.41%      31.151us       5.192us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         3.17%      70.153us         3.17%      70.153us       7.795us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.77%      17.060us         1.01%      22.310us       2.479us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.39%       8.641us         0.39%       8.641us       0.576us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.42%       9.380us         0.42%       9.380us       3.127us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.37%       8.090us         0.37%       8.090us       2.697us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.25%       5.450us         0.31%       6.801us       2.267us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.917ms
-Self CUDA time total: 18.685us
+Self CPU time total: 2.213ms
+Self CUDA time total: 18.720us
 
 
 
@@ -4313,29 +4095,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     340.280us      1734.88%     340.280us     340.280us             1  
-                                            torch_eager         6.89%     141.563us        99.72%       2.049ms       2.049ms       0.000us         0.00%      21.726us      21.726us             1  
-                                               aten::to         0.30%       6.132us        85.38%       1.755ms     292.424us       0.000us         0.00%      13.982us       2.330us             6  
-                                         aten::_to_copy         1.19%      24.439us        85.08%       1.748ms     291.402us       0.000us         0.00%      13.982us       2.330us             6  
-                                            aten::copy_         2.50%      51.302us        82.39%       1.693ms     282.182us      11.870us        60.52%      13.982us       2.330us             6  
-                                           aten::conv1d         0.29%       5.930us         5.97%     122.723us      40.908us       0.000us         0.00%       7.744us       2.581us             3  
-                                      aten::convolution         0.50%      10.300us         5.68%     116.793us      38.931us       0.000us         0.00%       7.744us       2.581us             3  
-                                     aten::_convolution         1.17%      23.960us         5.18%     106.493us      35.498us       0.000us         0.00%       7.744us       2.581us             3  
-                                aten::_conv_depthwise2d         1.08%      22.141us         3.19%      65.452us      21.817us       7.744us        39.48%       7.744us       2.581us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us        39.48%       7.744us       2.581us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.143us        31.32%       6.143us       2.048us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.727us        29.20%       5.727us       1.909us             3  
-                                Activity Buffer Request        70.00%       1.438ms        70.00%       1.438ms       1.438ms       2.112us        10.77%       2.112us       2.112us             1  
-                                    aten::empty_strided         1.50%      30.881us         1.50%      30.881us       5.147us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        11.01%     226.194us        11.01%     226.194us      25.133us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.89%      18.302us         1.19%      24.432us       2.715us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.49%       9.981us         0.49%       9.981us       0.665us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.55%      11.260us         0.55%      11.260us       3.753us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.45%       9.171us         0.45%       9.171us       3.057us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.32%       6.620us         0.39%       8.030us       2.677us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     328.254us      1673.31%     328.254us     328.254us             1  
+                                            torch_eager         6.02%     146.742us        99.79%       2.431ms       2.431ms       0.000us         0.00%      21.729us      21.729us             1  
+                                               aten::to         0.25%       6.201us        87.89%       2.141ms     356.794us       0.000us         0.00%      14.048us       2.341us             6  
+                                         aten::_to_copy         0.95%      23.051us        87.64%       2.135ms     355.761us       0.000us         0.00%      14.048us       2.341us             6  
+                                            aten::copy_         1.93%      46.899us        85.39%       2.080ms     346.662us      11.936us        60.85%      14.048us       2.341us             6  
+                                           aten::conv1d         0.28%       6.941us         4.83%     117.552us      39.184us       0.000us         0.00%       7.681us       2.560us             3  
+                                      aten::convolution         0.38%       9.320us         4.54%     110.611us      36.870us       0.000us         0.00%       7.681us       2.560us             3  
+                                     aten::_convolution         0.86%      20.861us         4.16%     101.291us      33.764us       0.000us         0.00%       7.681us       2.560us             3  
+                                aten::_conv_depthwise2d         0.93%      22.752us         2.67%      64.991us      21.664us       7.681us        39.15%       7.681us       2.560us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.681us        39.15%       7.681us       2.560us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.208us        31.65%       6.208us       2.069us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.728us        29.20%       5.728us       1.909us             3  
+                                Activity Buffer Request        75.50%       1.839ms        75.50%       1.839ms       1.839ms       2.112us        10.77%       2.112us       2.112us             1  
+                                    aten::empty_strided         1.29%      31.540us         1.29%      31.540us       5.257us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.87%     216.103us         8.87%     216.103us      24.011us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.70%      16.989us         0.90%      21.970us       2.441us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.35%       8.601us         0.35%       8.601us       0.573us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.43%      10.359us         0.43%      10.359us       3.453us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.40%       9.840us         0.40%       9.840us       3.280us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.22%       5.410us         0.28%       6.920us       2.307us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.055ms
-Self CUDA time total: 19.614us
+Self CPU time total: 2.436ms
+Self CUDA time total: 19.617us
 
 
 
@@ -4345,29 +4127,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     379.964us      1548.03%     379.964us     379.964us             1  
-                                            torch_eager         7.69%     160.944us        99.76%       2.089ms       2.089ms       0.000us         0.00%      26.817us      26.817us             1  
-                                               aten::to         0.33%       7.000us        83.76%       1.754ms     292.349us       0.000us         0.00%      15.265us       2.544us             6  
-                                         aten::_to_copy         1.23%      25.779us        83.43%       1.747ms     291.183us       0.000us         0.00%      15.265us       2.544us             6  
-                                            aten::copy_         2.49%      52.100us        80.65%       1.689ms     281.484us      12.993us        52.94%      15.265us       2.544us             6  
-                                           aten::conv1d         0.31%       6.410us         6.85%     143.364us      47.788us       0.000us         0.00%      11.552us       3.851us             3  
-                                      aten::convolution         1.48%      31.021us         6.54%     136.954us      45.651us       0.000us         0.00%      11.552us       3.851us             3  
-                                     aten::_convolution         1.13%      23.621us         5.06%     105.933us      35.311us       0.000us         0.00%      11.552us       3.851us             3  
-                                aten::_conv_depthwise2d         1.06%      22.209us         3.13%      65.632us      21.877us      11.552us        47.06%      11.552us       3.851us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      11.552us        47.06%      11.552us       3.851us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.625us        26.99%       6.625us       2.208us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.368us        25.94%       6.368us       2.123us             3  
-                                Activity Buffer Request        68.76%       1.440ms        68.76%       1.440ms       1.440ms       2.272us         9.26%       2.272us       2.272us             1  
-                                    aten::empty_strided         1.55%      32.413us         1.55%      32.413us       5.402us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.50%     219.817us        10.50%     219.817us      24.424us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.87%      18.301us         1.15%      24.061us       2.673us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.50%      10.530us         0.50%      10.530us       0.702us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.50%      10.490us         0.50%      10.490us       3.497us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.47%       9.872us         0.47%       9.872us       3.291us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.30%       6.220us         0.37%       7.740us       2.580us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     325.374us      1318.69%     325.374us     325.374us             1  
+                                            torch_eager         6.23%     145.210us        99.78%       2.326ms       2.326ms       0.000us         0.00%      26.978us      26.978us             1  
+                                               aten::to         0.28%       6.471us        87.58%       2.041ms     340.232us       0.000us         0.00%      15.298us       2.550us             6  
+                                         aten::_to_copy         1.01%      23.559us        87.30%       2.035ms     339.154us       0.000us         0.00%      15.298us       2.550us             6  
+                                            aten::copy_         2.04%      47.563us        85.03%       1.982ms     330.320us      12.994us        52.66%      15.298us       2.550us             6  
+                                           aten::conv1d         0.26%       6.060us         4.91%     114.341us      38.114us       0.000us         0.00%      11.680us       3.893us             3  
+                                      aten::convolution         0.40%       9.250us         4.65%     108.281us      36.094us       0.000us         0.00%      11.680us       3.893us             3  
+                                     aten::_convolution         0.89%      20.669us         4.25%      99.031us      33.010us       0.000us         0.00%      11.680us       3.893us             3  
+                                aten::_conv_depthwise2d         0.95%      22.039us         2.73%      63.550us      21.183us      11.680us        47.34%      11.680us       3.893us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      11.680us        47.34%      11.680us       3.893us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.657us        26.98%       6.657us       2.219us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.337us        25.68%       6.337us       2.112us             3  
+                                Activity Buffer Request        74.59%       1.739ms        74.59%       1.739ms       1.739ms       2.304us         9.34%       2.304us       2.304us             1  
+                                    aten::empty_strided         1.26%      29.442us         1.26%      29.442us       4.907us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         9.39%     218.802us         9.39%     218.802us      24.311us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.69%      16.041us         0.91%      21.173us       2.353us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.37%       8.602us         0.37%       8.602us       0.573us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.40%       9.341us         0.40%       9.341us       3.114us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.39%       8.990us         0.39%       8.990us       2.997us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.23%       5.290us         0.28%       6.580us       2.193us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.094ms
-Self CUDA time total: 24.545us
+Self CPU time total: 2.331ms
+Self CUDA time total: 24.674us
 
 
 
@@ -4377,29 +4159,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D64_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     351.133us      1341.43%     351.133us     351.133us             1  
-                                            torch_eager         7.55%     157.812us        99.73%       2.084ms       2.084ms       0.000us         0.00%      28.416us      28.416us             1  
-                                               aten::to         0.31%       6.571us        84.80%       1.772ms     295.318us       0.000us         0.00%      15.264us       2.544us             6  
-                                         aten::_to_copy         1.22%      25.450us        84.49%       1.765ms     294.223us       0.000us         0.00%      15.264us       2.544us             6  
-                                            aten::copy_         2.31%      48.301us        81.82%       1.710ms     284.947us      13.024us        49.76%      15.264us       2.544us             6  
-                                           aten::conv1d         0.32%       6.640us         5.96%     124.543us      41.514us       0.000us         0.00%      13.152us       4.384us             3  
-                                      aten::convolution         0.50%      10.360us         5.64%     117.903us      39.301us       0.000us         0.00%      13.152us       4.384us             3  
-                                     aten::_convolution         1.16%      24.330us         5.15%     107.543us      35.848us       0.000us         0.00%      13.152us       4.384us             3  
-                                aten::_conv_depthwise2d         1.06%      22.241us         3.14%      65.623us      21.874us      13.152us        50.24%      13.152us       4.384us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      13.152us        50.24%      13.152us       4.384us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.656us        25.43%       6.656us       2.219us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.368us        24.33%       6.368us       2.123us             3  
-                                Activity Buffer Request        70.10%       1.465ms        70.10%       1.465ms       1.465ms       2.240us         8.56%       2.240us       2.240us             1  
-                                    aten::empty_strided         1.45%      30.202us         1.45%      30.202us       5.034us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.51%     219.677us        10.51%     219.677us      24.409us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.90%      18.881us         1.17%      24.421us       2.713us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.46%       9.580us         0.46%       9.580us       0.639us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.55%      11.471us         0.55%      11.471us       3.824us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.43%       8.890us         0.43%       8.890us       2.963us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.33%       6.950us         0.40%       8.400us       2.800us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     325.853us      1241.91%     325.853us     325.853us             1  
+                                            torch_eager         6.02%     142.382us        99.78%       2.359ms       2.359ms       0.000us         0.00%      28.510us      28.510us             1  
+                                               aten::to         0.27%       6.279us        87.80%       2.076ms     345.959us       0.000us         0.00%      15.262us       2.544us             6  
+                                         aten::_to_copy         0.97%      22.980us        87.54%       2.069ms     344.912us       0.000us         0.00%      15.262us       2.544us             6  
+                                            aten::copy_         2.02%      47.672us        85.33%       2.017ms     336.189us      12.990us        49.51%      15.262us       2.544us             6  
+                                           aten::conv1d         0.27%       6.391us         4.88%     115.262us      38.421us       0.000us         0.00%      13.248us       4.416us             3  
+                                      aten::convolution         0.41%       9.629us         4.61%     108.871us      36.290us       0.000us         0.00%      13.248us       4.416us             3  
+                                     aten::_convolution         0.88%      20.800us         4.20%      99.242us      33.081us       0.000us         0.00%      13.248us       4.416us             3  
+                                aten::_conv_depthwise2d         0.93%      21.882us         2.62%      62.041us      20.680us      13.248us        50.49%      13.248us       4.416us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      13.248us        50.49%      13.248us       4.416us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.622us        25.24%       6.622us       2.207us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       6.368us        24.27%       6.368us       2.123us             3  
+                                Activity Buffer Request        75.21%       1.778ms        75.21%       1.778ms       1.778ms       2.272us         8.66%       2.272us       2.272us             1  
+                                    aten::empty_strided         1.24%      29.361us         1.24%      29.361us       4.893us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.97%     212.032us         8.97%     212.032us      23.559us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.75%      17.821us         0.98%      23.130us       2.570us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.37%       8.699us         0.37%       8.699us       0.580us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.38%       9.090us         0.38%       9.090us       3.030us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.44%      10.480us         0.44%      10.480us       3.493us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.631us         0.30%       7.011us       2.337us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.089ms
-Self CUDA time total: 26.176us
+Self CPU time total: 2.364ms
+Self CUDA time total: 26.238us
 
 
 
@@ -4409,29 +4191,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     349.627us       908.24%     349.627us     349.627us             1  
-                                            torch_eager         7.45%     152.992us        99.76%       2.049ms       2.049ms       0.000us         0.00%      41.086us      41.086us             1  
-                                           aten::conv1d         0.32%       6.640us         6.06%     124.413us      41.471us       0.000us         0.00%      22.561us       7.520us             3  
-                                      aten::convolution         0.50%      10.370us         5.73%     117.773us      39.258us       0.000us         0.00%      22.561us       7.520us             3  
-                                     aten::_convolution         1.14%      23.411us         5.23%     107.403us      35.801us       0.000us         0.00%      22.561us       7.520us             3  
-                                aten::_conv_depthwise2d         1.15%      23.650us         3.29%      67.532us      22.511us      22.561us        58.61%      22.561us       7.520us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      22.561us        58.61%      22.561us       7.520us             3  
-                                               aten::to         0.33%       6.780us        84.82%       1.743ms     290.446us       0.000us         0.00%      18.525us       3.087us             6  
-                                         aten::_to_copy         1.29%      26.502us        84.49%       1.736ms     289.316us       0.000us         0.00%      18.525us       3.087us             6  
-                                            aten::copy_         2.40%      49.251us        81.74%       1.679ms     279.869us      15.934us        41.39%      18.525us       3.087us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.543us        22.19%       8.543us       2.848us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.391us        19.20%       7.391us       2.464us             3  
-                                Activity Buffer Request        69.84%       1.435ms        69.84%       1.435ms       1.435ms       2.591us         6.73%       2.591us       2.591us             1  
-                                    aten::empty_strided         1.47%      30.182us         1.47%      30.182us       5.030us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.64%     218.664us        10.64%     218.664us      24.296us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.89%      18.281us         1.17%      24.011us       2.668us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.47%       9.739us         0.47%       9.739us       0.649us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.53%      10.991us         0.53%      10.991us       3.664us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.46%       9.421us         0.46%       9.421us       3.140us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.29%       5.970us         0.36%       7.320us       2.440us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     331.328us       858.50%     331.328us     331.328us             1  
+                                            torch_eager         5.97%     146.471us        99.79%       2.446ms       2.446ms       0.000us         0.00%      41.186us      41.186us             1  
+                                           aten::conv1d         0.25%       6.210us         4.77%     116.961us      38.987us       0.000us         0.00%      22.849us       7.616us             3  
+                                      aten::convolution         0.40%       9.740us         4.52%     110.751us      36.917us       0.000us         0.00%      22.849us       7.616us             3  
+                                     aten::_convolution         0.89%      21.911us         4.12%     101.011us      33.670us       0.000us         0.00%      22.849us       7.616us             3  
+                                aten::_conv_depthwise2d         0.92%      22.550us         2.59%      63.530us      21.177us      22.849us        59.20%      22.849us       7.616us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      22.849us        59.20%      22.849us       7.616us             3  
+                                               aten::to         0.25%       6.228us        88.01%       2.158ms     359.617us       0.000us         0.00%      18.337us       3.056us             6  
+                                         aten::_to_copy         1.00%      24.602us        87.76%       2.151ms     358.579us       0.000us         0.00%      18.337us       3.056us             6  
+                                            aten::copy_         1.98%      48.619us        85.49%       2.096ms     349.334us      15.745us        40.80%      18.337us       3.056us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.385us        21.73%       8.385us       2.795us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.360us        19.07%       7.360us       2.453us             3  
+                                Activity Buffer Request        75.73%       1.857ms        75.73%       1.857ms       1.857ms       2.592us         6.72%       2.592us       2.592us             1  
+                                    aten::empty_strided         1.26%      30.871us         1.26%      30.871us       5.145us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.69%     213.074us         8.69%     213.074us      23.675us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.69%      16.899us         0.91%      22.302us       2.478us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.35%       8.674us         0.35%       8.674us       0.578us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.39%       9.670us         0.39%       9.670us       3.223us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.37%       9.000us         0.37%       9.000us       3.000us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.23%       5.570us         0.28%       6.790us       2.263us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.054ms
-Self CUDA time total: 38.495us
+Self CPU time total: 2.452ms
+Self CUDA time total: 38.594us
 
 
 
@@ -4441,29 +4223,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     345.054us       837.81%     345.054us     345.054us             1  
-                                            torch_eager         7.39%     151.695us        99.75%       2.049ms       2.049ms       0.000us         0.00%      43.810us      43.810us             1  
-                                           aten::conv1d         0.32%       6.620us         6.03%     123.883us      41.294us       0.000us         0.00%      25.375us       8.458us             3  
-                                      aten::convolution         0.50%      10.320us         5.71%     117.263us      39.088us       0.000us         0.00%      25.375us       8.458us             3  
-                                     aten::_convolution         1.20%      24.592us         5.21%     106.943us      35.648us       0.000us         0.00%      25.375us       8.458us             3  
-                                aten::_conv_depthwise2d         1.13%      23.150us         3.19%      65.451us      21.817us      25.375us        61.61%      25.375us       8.458us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      25.375us        61.61%      25.375us       8.458us             3  
-                                               aten::to         0.31%       6.440us        84.93%       1.744ms     290.716us       0.000us         0.00%      18.435us       3.072us             6  
-                                         aten::_to_copy         1.24%      25.501us        84.61%       1.738ms     289.642us       0.000us         0.00%      18.435us       3.072us             6  
-                                            aten::copy_         2.41%      49.431us        81.91%       1.682ms     280.380us      15.810us        38.39%      18.435us       3.072us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.386us        20.36%       8.386us       2.795us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.424us        18.03%       7.424us       2.475us             3  
-                                Activity Buffer Request        70.32%       1.444ms        70.32%       1.444ms       1.444ms       2.625us         6.37%       2.625us       2.625us             1  
-                                    aten::empty_strided         1.46%      30.070us         1.46%      30.070us       5.012us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.28%     211.144us        10.28%     211.144us      23.460us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.92%      18.949us         1.19%      24.411us       2.712us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.45%       9.313us         0.45%       9.313us       0.621us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.52%      10.601us         0.52%      10.601us       3.534us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.44%       9.110us         0.44%       9.110us       3.037us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.29%       5.930us         0.36%       7.410us       2.470us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     324.382us       781.00%     324.382us     324.382us             1  
+                                            torch_eager         6.15%     143.693us        99.76%       2.329ms       2.329ms       0.000us         0.00%      44.158us      44.158us             1  
+                                           aten::conv1d         0.25%       5.870us         4.90%     114.381us      38.127us       0.000us         0.00%      25.694us       8.565us             3  
+                                      aten::convolution         0.39%       9.129us         4.65%     108.511us      36.170us       0.000us         0.00%      25.694us       8.565us             3  
+                                     aten::_convolution         0.92%      21.560us         4.26%      99.382us      33.127us       0.000us         0.00%      25.694us       8.565us             3  
+                                aten::_conv_depthwise2d         0.91%      21.251us         2.67%      62.331us      20.777us      25.694us        61.86%      25.694us       8.565us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      25.694us        61.86%      25.694us       8.565us             3  
+                                               aten::to         0.26%       6.051us        87.64%       2.046ms     341.007us       0.000us         0.00%      18.464us       3.077us             6  
+                                         aten::_to_copy         0.99%      23.033us        87.38%       2.040ms     339.999us       0.000us         0.00%      18.464us       3.077us             6  
+                                            aten::copy_         2.09%      48.709us        85.05%       1.985ms     330.910us      15.840us        38.14%      18.464us       3.077us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.448us        20.34%       8.448us       2.816us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.392us        17.80%       7.392us       2.464us             3  
+                                Activity Buffer Request        74.80%       1.746ms        74.80%       1.746ms       1.746ms       2.624us         6.32%       2.624us       2.624us             1  
+                                    aten::empty_strided         1.35%      31.498us         1.35%      31.498us       5.250us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         9.10%     212.334us         9.10%     212.334us      23.593us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.70%      16.311us         0.92%      21.550us       2.394us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.38%       8.780us         0.38%       8.780us       0.585us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.39%       9.170us         0.39%       9.170us       3.057us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.44%      10.170us         0.44%      10.170us       3.390us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.530us         0.30%       6.891us       2.297us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.054ms
-Self CUDA time total: 41.185us
+Self CPU time total: 2.335ms
+Self CUDA time total: 41.534us
 
 
 
@@ -4473,29 +4255,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     348.348us       338.39%     348.348us     348.348us             1  
-                                            torch_eager         7.21%     148.863us        99.73%       2.059ms       2.059ms       0.000us         0.00%     108.926us     108.926us             1  
-                                           aten::conv1d         0.31%       6.430us         5.95%     122.893us      40.964us       0.000us         0.00%      70.592us      23.531us             3  
-                                      aten::convolution         0.50%      10.290us         5.64%     116.463us      38.821us       0.000us         0.00%      70.592us      23.531us             3  
-                                     aten::_convolution         1.17%      24.211us         5.14%     106.173us      35.391us       0.000us         0.00%      70.592us      23.531us             3  
-                                aten::_conv_depthwise2d         1.12%      23.052us         3.16%      65.282us      21.761us      70.592us        68.57%      70.592us      23.531us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      70.592us        68.57%      70.592us      23.531us             3  
-                                               aten::to         0.31%       6.372us        85.15%       1.758ms     292.949us       0.000us         0.00%      38.334us       6.389us             6  
-                                         aten::_to_copy         1.20%      24.680us        84.84%       1.751ms     291.887us       0.000us         0.00%      38.334us       6.389us             6  
-                                            aten::copy_         2.47%      51.072us        82.20%       1.697ms     282.787us      32.350us        31.43%      38.334us       6.389us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      17.695us        17.19%      17.695us       5.898us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      14.655us        14.24%      14.655us       4.885us             3  
-                                Activity Buffer Request        70.59%       1.457ms        70.59%       1.457ms       1.457ms       5.984us         5.81%       5.984us       5.984us             1  
-                                    aten::empty_strided         1.45%      29.921us         1.45%      29.921us       4.987us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.23%     211.264us        10.23%     211.264us      23.474us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.89%      18.462us         1.17%      24.111us       2.679us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.47%       9.709us         0.47%       9.709us       0.647us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.47%       9.780us         0.47%       9.780us       3.260us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.47%       9.740us         0.47%       9.740us       3.247us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.28%       5.880us         0.35%       7.260us       2.420us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     319.038us       307.34%     319.038us     319.038us             1  
+                                            torch_eager         4.95%     115.620us        99.75%       2.329ms       2.329ms       0.000us         0.00%     109.886us     109.886us             1  
+                                           aten::conv1d         0.24%       5.500us         4.79%     111.722us      37.241us       0.000us         0.00%      71.360us      23.787us             3  
+                                      aten::convolution         0.38%       8.820us         4.55%     106.222us      35.407us       0.000us         0.00%      71.360us      23.787us             3  
+                                     aten::_convolution         0.86%      20.169us         4.17%      97.402us      32.467us       0.000us         0.00%      71.360us      23.787us             3  
+                                aten::_conv_depthwise2d         0.88%      20.499us         2.70%      62.992us      20.997us      71.360us        68.74%      71.360us      23.787us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      71.360us        68.74%      71.360us      23.787us             3  
+                                               aten::to         0.25%       5.942us        88.99%       2.078ms     346.257us       0.000us         0.00%      38.526us       6.421us             6  
+                                         aten::_to_copy         0.97%      22.531us        88.74%       2.072ms     345.267us       0.000us         0.00%      38.526us       6.421us             6  
+                                            aten::copy_         1.95%      45.459us        86.50%       2.019ms     336.557us      32.447us        31.26%      38.526us       6.421us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      17.791us        17.14%      17.791us       5.930us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      14.656us        14.12%      14.656us       4.885us             3  
+                                Activity Buffer Request        76.44%       1.784ms        76.44%       1.784ms       1.784ms       6.079us         5.86%       6.079us       6.079us             1  
+                                    aten::empty_strided         1.27%      29.730us         1.27%      29.730us       4.955us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         9.13%     213.066us         9.13%     213.066us      23.674us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.66%      15.410us         0.85%      19.870us       2.208us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.33%       7.790us         0.33%       7.790us       0.519us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.44%      10.351us         0.44%      10.351us       3.450us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.36%       8.461us         0.36%       8.461us       2.820us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.23%       5.401us         0.29%       6.691us       2.230us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.064ms
-Self CUDA time total: 102.942us
+Self CPU time total: 2.335ms
+Self CUDA time total: 103.807us
 
 
 
@@ -4505,29 +4287,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     344.181us       304.53%     344.181us     344.181us             1  
-                                            torch_eager        14.98%     124.863us        99.35%     828.302us     828.302us       0.000us         0.00%     119.036us     119.036us             1  
-                                           aten::conv1d         0.70%       5.870us        14.55%     121.343us      40.448us       0.000us         0.00%      80.669us      26.890us             3  
-                                      aten::convolution         1.17%       9.720us        13.85%     115.473us      38.491us       0.000us         0.00%      80.669us      26.890us             3  
-                                     aten::_convolution         2.96%      24.691us        12.68%     105.753us      35.251us       0.000us         0.00%      80.669us      26.890us             3  
-                                aten::_conv_depthwise2d         2.65%      22.121us         7.65%      63.762us      21.254us      80.669us        71.38%      80.669us      26.890us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      80.669us        71.38%      80.669us      26.890us             3  
-                                               aten::to         0.77%       6.429us        66.53%     554.705us      92.451us       0.000us         0.00%      38.367us       6.394us             6  
-                                         aten::_to_copy         3.01%      25.101us        65.76%     548.276us      91.379us       0.000us         0.00%      38.367us       6.394us             6  
-                                            aten::copy_         6.16%      51.352us        59.05%     492.343us      82.057us      32.351us        28.62%      38.367us       6.394us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      17.696us        15.66%      17.696us       5.899us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      14.655us        12.97%      14.655us       4.885us             3  
-                                Activity Buffer Request        28.81%     240.197us        28.81%     240.197us     240.197us       6.016us         5.32%       6.016us       6.016us             1  
-                                    aten::empty_strided         3.70%      30.832us         3.70%      30.832us       5.139us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        26.65%     222.174us        26.65%     222.174us      24.686us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.09%      17.401us         2.70%      22.541us       2.505us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.05%       8.790us         1.05%       8.790us       0.586us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.34%      11.151us         1.34%      11.151us       3.717us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.09%       9.110us         1.09%       9.110us       3.037us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.89%       7.450us         1.05%       8.790us       2.930us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     320.032us       281.56%     320.032us     320.032us             1  
+                                            torch_eager         4.89%     112.502us        99.77%       2.297ms       2.297ms       0.000us         0.00%     119.649us     119.649us             1  
+                                           aten::conv1d         0.24%       5.540us         4.86%     111.980us      37.327us       0.000us         0.00%      81.407us      27.136us             3  
+                                      aten::convolution         0.38%       8.839us         4.62%     106.440us      35.480us       0.000us         0.00%      81.407us      27.136us             3  
+                                     aten::_convolution         0.90%      20.821us         4.24%      97.601us      32.534us       0.000us         0.00%      81.407us      27.136us             3  
+                                aten::_conv_depthwise2d         0.94%      21.639us         2.69%      61.990us      20.663us      81.407us        71.62%      81.407us      27.136us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      81.407us        71.62%      81.407us      27.136us             3  
+                                               aten::to         0.26%       5.912us        88.93%       2.047ms     341.211us       0.000us         0.00%      38.242us       6.374us             6  
+                                         aten::_to_copy         0.96%      22.099us        88.68%       2.041ms     340.225us       0.000us         0.00%      38.242us       6.374us             6  
+                                            aten::copy_         2.13%      49.062us        86.51%       1.991ms     331.902us      32.257us        28.38%      38.242us       6.374us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      17.665us        15.54%      17.665us       5.888us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      14.592us        12.84%      14.592us       4.864us             3  
+                                Activity Buffer Request        76.05%       1.751ms        76.05%       1.751ms       1.751ms       5.985us         5.27%       5.985us       5.985us             1  
+                                    aten::empty_strided         1.21%      27.841us         1.21%      27.841us       4.640us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         9.26%     213.213us         9.26%     213.213us      23.690us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.70%      16.150us         0.91%      21.061us       2.340us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.36%       8.381us         0.36%       8.381us       0.559us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.40%       9.130us         0.40%       9.130us       3.043us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.42%       9.600us         0.42%       9.600us       3.200us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.419us         0.29%       6.669us       2.223us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 833.752us
-Self CUDA time total: 113.020us
+Self CPU time total: 2.302ms
+Self CUDA time total: 113.664us
 
 
 
@@ -4537,29 +4319,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager        14.21%     122.455us        95.83%     825.681us     825.681us       0.000us         0.00%     433.339us     433.339us             1  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     419.771us       106.59%     419.771us     419.771us             1  
-                                           aten::conv1d         0.75%       6.429us        14.10%     121.522us      40.507us       0.000us         0.00%     251.453us      83.818us             3  
-                                      aten::convolution         1.15%       9.929us        13.36%     115.093us      38.364us       0.000us         0.00%     251.453us      83.818us             3  
-                                     aten::_convolution         2.67%      23.042us        12.21%     105.164us      35.055us       0.000us         0.00%     251.453us      83.818us             3  
-                                aten::_conv_depthwise2d         2.60%      22.440us         7.52%      64.810us      21.603us     251.453us        63.85%     251.453us      83.818us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     251.453us        63.85%     251.453us      83.818us             3  
-                                               aten::to         0.70%       6.001us        64.14%     552.672us      92.112us       0.000us         0.00%     181.886us      30.314us             6  
-                                         aten::_to_copy         2.73%      23.540us        63.45%     546.671us      91.112us       0.000us         0.00%     181.886us      30.314us             6  
-                                            aten::copy_         5.94%      51.140us        57.36%     494.211us      82.368us     142.367us        36.15%     181.886us      30.314us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     102.367us        25.99%     102.367us      34.122us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.000us        10.16%      40.000us      13.333us             3  
-                                Activity Buffer Request        29.04%     250.247us        29.04%     250.247us     250.247us      39.519us        10.03%      39.519us      39.519us             1  
-                                    aten::empty_strided         3.36%      28.920us         3.36%      28.920us       4.820us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        24.89%     214.494us        24.89%     214.494us      23.833us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         1.98%      17.062us         2.59%      22.273us       2.475us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.09%       9.391us         1.09%       9.391us       0.626us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.24%      10.660us         1.24%      10.660us       3.553us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.17%      10.040us         1.17%      10.040us       3.347us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.86%       7.370us         1.02%       8.800us       2.933us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         4.70%     113.641us        96.03%       2.320ms       2.320ms       0.000us         0.00%     464.763us     464.763us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     453.786us       106.62%     453.786us     453.786us             1  
+                                           aten::conv1d         0.23%       5.630us         4.62%     111.673us      37.224us       0.000us         0.00%     278.940us      92.980us             3  
+                                      aten::convolution         0.36%       8.651us         4.39%     106.043us      35.348us       0.000us         0.00%     278.940us      92.980us             3  
+                                     aten::_convolution         0.86%      20.739us         4.03%      97.392us      32.464us       0.000us         0.00%     278.940us      92.980us             3  
+                                aten::_conv_depthwise2d         0.90%      21.710us         2.57%      62.062us      20.687us     278.940us        65.54%     278.940us      92.980us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     278.940us        65.54%     278.940us      92.980us             3  
+                                               aten::to         0.24%       5.880us        85.69%       2.071ms     345.102us       0.000us         0.00%     185.823us      30.970us             6  
+                                         aten::_to_copy         0.90%      21.820us        85.45%       2.065ms     344.122us       0.000us         0.00%     185.823us      30.970us             6  
+                                            aten::copy_         1.99%      48.071us        83.40%       2.015ms     335.882us     146.655us        34.46%     185.823us      30.970us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     105.919us        24.89%     105.919us      35.306us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.736us         9.57%      40.736us      13.579us             3  
+                                Activity Buffer Request        72.26%       1.746ms        72.26%       1.746ms       1.746ms      39.168us         9.20%      39.168us      39.168us             1  
+                                    aten::empty_strided         1.14%      27.621us         1.14%      27.621us       4.604us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        10.07%     243.344us        10.07%     243.344us      27.038us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.66%      15.908us         0.86%      20.760us       2.307us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.34%       8.262us         0.34%       8.262us       0.551us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.37%       8.921us         0.37%       8.921us       2.974us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.38%       9.260us         0.38%       9.260us       3.087us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.22%       5.361us         0.27%       6.641us       2.214us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 861.602us
-Self CUDA time total: 393.820us
+Self CPU time total: 2.416ms
+Self CUDA time total: 425.595us
 
 
 
@@ -4569,29 +4351,29 @@ PROFILE TRACE: torch_eager | cuda_B2_D2048_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager        15.32%     134.312us        91.67%     803.971us     803.971us       0.000us         0.00%     487.924us     487.924us             1  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     476.501us       106.34%     476.501us     476.501us             1  
-                                           aten::conv1d         0.67%       5.860us        13.82%     121.173us      40.391us       0.000us         0.00%     299.161us      99.720us             3  
-                                      aten::convolution         1.17%      10.220us        13.15%     115.313us      38.438us       0.000us         0.00%     299.161us      99.720us             3  
-                                     aten::_convolution         2.67%      23.450us        11.98%     105.093us      35.031us       0.000us         0.00%     299.161us      99.720us             3  
-                                aten::_conv_depthwise2d         2.56%      22.451us         7.48%      65.623us      21.874us     299.161us        66.76%     299.161us      99.720us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     299.161us        66.76%     299.161us      99.720us             3  
-                                               aten::to         0.69%       6.051us        59.17%     518.906us      86.484us       0.000us         0.00%     188.763us      31.460us             6  
-                                         aten::_to_copy         2.71%      23.771us        58.48%     512.855us      85.476us       0.000us         0.00%     188.763us      31.460us             6  
-                                            aten::copy_         5.69%      49.880us        52.31%     458.742us      76.457us     148.924us        33.24%     188.763us      31.460us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     108.861us        24.29%     108.861us      36.287us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.063us         8.94%      40.063us      13.354us             3  
-                                Activity Buffer Request        25.01%     219.366us        25.01%     219.366us     219.366us      39.839us         8.89%      39.839us      39.839us             1  
-                                    aten::empty_strided         3.46%      30.342us         3.46%      30.342us       5.057us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        24.34%     213.439us        24.34%     213.439us      23.715us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         1.98%      17.400us         2.59%      22.720us       2.524us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.09%       9.540us         1.09%       9.540us       0.636us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.14%      10.010us         1.14%      10.010us       3.337us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.05%       9.219us         1.05%       9.219us       3.073us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.66%       5.750us         0.82%       7.210us       2.403us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         4.81%     115.230us        95.51%       2.289ms       2.289ms       0.000us         0.00%     473.560us     473.560us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     466.268us       106.59%     466.268us     466.268us             1  
+                                           aten::conv1d         0.23%       5.540us         4.63%     111.002us      37.001us       0.000us         0.00%     298.430us      99.477us             3  
+                                      aten::convolution         0.37%       8.900us         4.40%     105.462us      35.154us       0.000us         0.00%     298.430us      99.477us             3  
+                                     aten::_convolution         0.85%      20.430us         4.03%      96.562us      32.187us       0.000us         0.00%     298.430us      99.477us             3  
+                                aten::_conv_depthwise2d         0.86%      20.562us         2.57%      61.592us      20.531us     298.430us        68.22%     298.430us      99.477us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     298.430us        68.22%     298.430us      99.477us             3  
+                                               aten::to         0.24%       5.669us        85.05%       2.039ms     339.802us       0.000us         0.00%     175.130us      29.188us             6  
+                                         aten::_to_copy         0.96%      22.942us        84.82%       2.033ms     338.857us       0.000us         0.00%     175.130us      29.188us             6  
+                                            aten::copy_         2.01%      48.190us        82.64%       1.981ms     330.170us     139.003us        31.78%     175.130us      29.188us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      98.430us        22.50%      98.430us      32.810us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.573us         9.28%      40.573us      13.524us             3  
+                                Activity Buffer Request        72.81%       1.745ms        72.81%       1.745ms       1.745ms      36.127us         8.26%      36.127us      36.127us             1  
+                                    aten::empty_strided         1.22%      29.180us         1.22%      29.180us       4.863us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.73%     209.224us         8.73%     209.224us      23.247us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.66%      15.770us         0.87%      20.750us       2.306us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.35%       8.340us         0.35%       8.340us       0.556us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.43%      10.290us         0.43%      10.290us       3.430us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.37%       8.960us         0.37%       8.960us       2.987us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.22%       5.340us         0.28%       6.610us       2.203us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 876.983us
-Self CUDA time total: 448.085us
+Self CPU time total: 2.397ms
+Self CUDA time total: 437.433us
 
 
 
@@ -4601,29 +4383,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     338.392us      1804.85%     338.392us     338.392us             1  
-                                            torch_eager        18.33%     161.236us        99.35%     873.703us     873.703us       0.000us         0.00%      20.637us      20.637us             1  
-                                               aten::to         0.69%       6.070us        63.71%     560.224us      93.371us       0.000us         0.00%      13.406us       2.234us             6  
-                                         aten::_to_copy         2.78%      24.471us        63.02%     554.154us      92.359us       0.000us         0.00%      13.406us       2.234us             6  
-                                            aten::copy_         5.94%      52.212us        56.85%     499.953us      83.325us      11.518us        61.43%      13.406us       2.234us             6  
-                                           aten::conv1d         0.64%       5.659us        14.02%     123.282us      41.094us       0.000us         0.00%       7.231us       2.410us             3  
-                                      aten::convolution         1.14%       9.999us        13.38%     117.623us      39.208us       0.000us         0.00%       7.231us       2.410us             3  
-                                     aten::_convolution         2.72%      23.952us        12.24%     107.624us      35.875us       0.000us         0.00%       7.231us       2.410us             3  
-                                aten::_conv_depthwise2d         2.67%      23.519us         7.63%      67.130us      22.377us       7.231us        38.57%       7.231us       2.410us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.231us        38.57%       7.231us       2.410us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       5.854us        31.22%       5.854us       1.951us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.664us        30.21%       5.664us       1.888us             3  
-                                Activity Buffer Request        29.52%     259.596us        29.52%     259.596us     259.596us       1.888us        10.07%       1.888us       1.888us             1  
-                                    aten::empty_strided         3.38%      29.730us         3.38%      29.730us       4.955us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        23.99%     210.946us        23.99%     210.946us      23.438us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.07%      18.190us         2.71%      23.871us       2.652us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.11%       9.761us         1.11%       9.761us       0.651us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.24%      10.890us         1.24%      10.890us       3.630us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.13%       9.920us         1.13%       9.920us       3.307us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.68%       5.972us         0.85%       7.452us       2.484us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     325.149us      1725.02%     325.149us     325.149us             1  
+                                            torch_eager         4.86%     112.628us        99.78%       2.311ms       2.311ms       0.000us         0.00%      20.769us      20.769us             1  
+                                               aten::to         0.26%       5.932us        88.67%       2.054ms     342.251us       0.000us         0.00%      13.536us       2.256us             6  
+                                         aten::_to_copy         1.00%      23.270us        88.41%       2.048ms     341.262us       0.000us         0.00%      13.536us       2.256us             6  
+                                            aten::copy_         2.14%      49.511us        86.15%       1.995ms     332.552us      11.616us        61.63%      13.536us       2.256us             6  
+                                           aten::conv1d         0.24%       5.480us         5.19%     120.221us      40.074us       0.000us         0.00%       7.233us       2.411us             3  
+                                      aten::convolution         0.37%       8.641us         4.95%     114.741us      38.247us       0.000us         0.00%       7.233us       2.411us             3  
+                                     aten::_convolution         0.88%      20.361us         4.58%     106.100us      35.367us       0.000us         0.00%       7.233us       2.411us             3  
+                                aten::_conv_depthwise2d         0.96%      22.180us         3.05%      70.680us      23.560us       7.233us        38.37%       7.233us       2.411us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.233us        38.37%       7.233us       2.411us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       5.920us        31.41%       5.920us       1.973us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.696us        30.22%       5.696us       1.899us             3  
+                                Activity Buffer Request        75.90%       1.758ms        75.90%       1.758ms       1.758ms       1.920us        10.19%       1.920us       1.920us             1  
+                                    aten::empty_strided         1.25%      28.990us         1.25%      28.990us       4.832us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         9.42%     218.162us         9.42%     218.162us      24.240us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.68%      15.833us         0.90%      20.731us       2.303us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.37%       8.468us         0.37%       8.468us       0.565us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.40%       9.220us         0.40%       9.220us       3.073us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.39%       8.980us         0.39%       8.980us       2.993us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.550us         0.30%       7.000us       2.333us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 879.393us
-Self CUDA time total: 18.749us
+Self CPU time total: 2.316ms
+Self CUDA time total: 18.849us
 
 
 
@@ -4633,29 +4415,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     338.934us      1741.87%     338.934us     338.934us             1  
-                                            torch_eager        16.71%     145.362us        99.29%     863.592us     863.592us       0.000us         0.00%      21.314us      21.314us             1  
-                                               aten::to         0.71%       6.200us        65.36%     568.524us      94.754us       0.000us         0.00%      13.282us       2.214us             6  
-                                         aten::_to_copy         2.85%      24.831us        64.65%     562.324us      93.721us       0.000us         0.00%      13.282us       2.214us             6  
-                                            aten::copy_         5.81%      50.550us        58.39%     507.883us      84.647us      11.426us        58.72%      13.282us       2.214us             6  
-                                           aten::conv1d         0.78%       6.753us        14.06%     122.315us      40.772us       0.000us         0.00%       8.032us       2.677us             3  
-                                      aten::convolution         1.19%      10.380us        13.29%     115.562us      38.521us       0.000us         0.00%       8.032us       2.677us             3  
-                                     aten::_convolution         2.63%      22.841us        12.09%     105.182us      35.061us       0.000us         0.00%       8.032us       2.677us             3  
-                                aten::_conv_depthwise2d         2.65%      23.042us         7.65%      66.512us      22.171us       8.032us        41.28%       8.032us       2.677us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       8.032us        41.28%       8.032us       2.677us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       5.825us        29.94%       5.825us       1.942us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.601us        28.79%       5.601us       1.867us             3  
-                                Activity Buffer Request        30.62%     266.307us        30.62%     266.307us     266.307us       1.856us         9.54%       1.856us       1.856us             1  
-                                    aten::empty_strided         3.40%      29.610us         3.40%      29.610us       4.935us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        24.61%     214.076us        24.61%     214.076us      23.786us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.02%      17.612us         2.63%      22.841us       2.538us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.02%       8.840us         1.02%       8.840us       0.589us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.22%      10.630us         1.22%      10.630us       3.543us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.13%       9.790us         1.13%       9.790us       3.263us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.67%       5.798us         0.82%       7.109us       2.370us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     320.511us      1636.76%     320.511us     320.511us             1  
+                                            torch_eager         5.91%     139.372us        99.79%       2.353ms       2.353ms       0.000us         0.00%      21.598us      21.598us             1  
+                                               aten::to         0.25%       6.010us        87.93%       2.073ms     345.496us       0.000us         0.00%      13.663us       2.277us             6  
+                                         aten::_to_copy         0.96%      22.549us        87.67%       2.067ms     344.494us       0.000us         0.00%      13.663us       2.277us             6  
+                                            aten::copy_         2.09%      49.251us        85.51%       2.016ms     335.977us      11.647us        59.48%      13.663us       2.277us             6  
+                                           aten::conv1d         0.26%       6.081us         4.89%     115.321us      38.440us       0.000us         0.00%       7.935us       2.645us             3  
+                                      aten::convolution         0.40%       9.450us         4.63%     109.240us      36.413us       0.000us         0.00%       7.935us       2.645us             3  
+                                     aten::_convolution         0.90%      21.168us         4.23%      99.790us      33.263us       0.000us         0.00%       7.935us       2.645us             3  
+                                aten::_conv_depthwise2d         0.87%      20.610us         2.67%      62.871us      20.957us       7.935us        40.52%       7.935us       2.645us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.935us        40.52%       7.935us       2.645us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       5.983us        30.55%       5.983us       1.994us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.664us        28.92%       5.664us       1.888us             3  
+                                Activity Buffer Request        75.47%       1.779ms        75.47%       1.779ms       1.779ms       2.016us        10.30%       2.016us       2.016us             1  
+                                    aten::empty_strided         1.21%      28.551us         1.21%      28.551us       4.759us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.91%     210.105us         8.91%     210.105us      23.345us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.72%      16.961us         0.93%      21.872us       2.430us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.36%       8.422us         0.36%       8.422us       0.561us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.46%      10.910us         0.46%      10.910us       3.637us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.37%       8.650us         0.37%       8.650us       2.883us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.579us         0.30%       6.970us       2.323us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 869.783us
-Self CUDA time total: 19.458us
+Self CPU time total: 2.358ms
+Self CUDA time total: 19.582us
 
 
 
@@ -4665,29 +4447,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     340.862us      1751.78%     340.862us     340.862us             1  
-                                            torch_eager         8.44%     173.073us        99.74%       2.045ms       2.045ms       0.000us         0.00%      21.635us      21.635us             1  
-                                               aten::to         0.33%       6.670us        84.06%       1.723ms     287.196us       0.000us         0.00%      14.307us       2.385us             6  
-                                         aten::_to_copy         1.21%      24.883us        83.74%       1.717ms     286.084us       0.000us         0.00%      14.307us       2.385us             6  
-                                            aten::copy_         2.36%      48.471us        81.06%       1.662ms     276.949us      12.130us        62.34%      14.307us       2.385us             6  
-                                           aten::conv1d         0.29%       5.970us         5.84%     119.613us      39.871us       0.000us         0.00%       7.328us       2.443us             3  
-                                      aten::convolution         0.48%       9.780us         5.54%     113.643us      37.881us       0.000us         0.00%       7.328us       2.443us             3  
-                                     aten::_convolution         1.14%      23.420us         5.07%     103.863us      34.621us       0.000us         0.00%       7.328us       2.443us             3  
-                                aten::_conv_depthwise2d         1.10%      22.512us         3.15%      64.503us      21.501us       7.328us        37.66%       7.328us       2.443us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.328us        37.66%       7.328us       2.443us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.241us        32.07%       6.241us       2.080us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.889us        30.27%       5.889us       1.963us             3  
-                                Activity Buffer Request        69.34%       1.421ms        69.34%       1.421ms       1.421ms       2.177us        11.19%       2.177us       2.177us             1  
-                                    aten::empty_strided         1.46%      29.930us         1.46%      29.930us       4.988us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.50%     215.256us        10.50%     215.256us      23.917us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.86%      17.669us         1.13%      23.180us       2.576us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.47%       9.581us         0.47%       9.581us       0.639us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.48%       9.759us         0.48%       9.759us       3.253us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.43%       8.742us         0.43%       8.742us       2.914us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.28%       5.760us         0.35%       7.110us       2.370us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     310.009us      1591.01%     310.009us     310.009us             1  
+                                            torch_eager        14.85%     113.881us        99.35%     762.102us     762.102us       0.000us         0.00%      21.693us      21.693us             1  
+                                               aten::to         0.75%       5.742us        67.36%     516.710us      86.118us       0.000us         0.00%      14.398us       2.400us             6  
+                                         aten::_to_copy         2.84%      21.798us        66.61%     510.968us      85.161us       0.000us         0.00%      14.398us       2.400us             6  
+                                            aten::copy_         6.26%      48.021us        59.81%     458.808us      76.468us      12.190us        62.56%      14.398us       2.400us             6  
+                                           aten::conv1d         0.69%       5.290us        14.07%     107.951us      35.984us       0.000us         0.00%       7.295us       2.432us             3  
+                                      aten::convolution         1.14%       8.770us        13.38%     102.661us      34.220us       0.000us         0.00%       7.295us       2.432us             3  
+                                     aten::_convolution         2.56%      19.629us        12.24%      93.891us      31.297us       0.000us         0.00%       7.295us       2.432us             3  
+                                aten::_conv_depthwise2d         2.72%      20.851us         7.84%      60.152us      20.051us       7.295us        37.44%       7.295us       2.432us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.295us        37.44%       7.295us       2.432us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.271us        32.18%       6.271us       2.090us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.919us        30.38%       5.919us       1.973us             3  
+                                Activity Buffer Request        29.70%     227.833us        29.70%     227.833us     227.833us       2.208us        11.33%       2.208us       2.208us             1  
+                                    aten::empty_strided         3.96%      30.362us         3.96%      30.362us       5.060us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        26.62%     204.185us        26.62%     204.185us      22.687us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         2.01%      15.431us         2.57%      19.700us       2.189us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.98%       7.520us         0.98%       7.520us       0.501us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.29%       9.930us         1.29%       9.930us       3.310us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.06%       8.140us         1.06%       8.140us       2.713us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.67%       5.119us         0.83%       6.400us       2.133us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.050ms
-Self CUDA time total: 19.458us
+Self CPU time total: 767.122us
+Self CUDA time total: 19.485us
 
 
 
@@ -4697,29 +4479,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     367.067us      1820.95%     367.067us     367.067us             1  
-                                            torch_eager        17.50%     145.595us        99.30%     826.111us     826.111us       0.000us         0.00%      22.366us      22.366us             1  
-                                               aten::to         0.75%       6.199us        63.72%     530.082us      88.347us       0.000us         0.00%      14.431us       2.405us             6  
-                                         aten::_to_copy         2.95%      24.573us        62.97%     523.883us      87.314us       0.000us         0.00%      14.431us       2.405us             6  
-                                            aten::copy_         6.31%      52.521us        56.15%     467.170us      77.862us      12.223us        60.64%      14.431us       2.405us             6  
-                                           aten::conv1d         0.69%       5.760us        14.59%     121.354us      40.451us       0.000us         0.00%       7.935us       2.645us             3  
-                                      aten::convolution         1.24%      10.281us        13.89%     115.594us      38.531us       0.000us         0.00%       7.935us       2.645us             3  
-                                     aten::_convolution         2.68%      22.269us        12.66%     105.313us      35.104us       0.000us         0.00%       7.935us       2.645us             3  
-                                aten::_conv_depthwise2d         2.73%      22.701us         8.02%      66.711us      22.237us       7.935us        39.36%       7.935us       2.645us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.935us        39.36%       7.935us       2.645us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.304us        31.27%       6.304us       2.101us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.919us        29.36%       5.919us       1.973us             3  
-                                Activity Buffer Request        27.00%     224.665us        27.00%     224.665us     224.665us       2.208us        10.95%       2.208us       2.208us             1  
-                                    aten::empty_strided         3.86%      32.140us         3.86%      32.140us       5.357us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.71%     213.894us        25.71%     213.894us      23.766us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.05%      17.041us         2.71%      22.553us       2.506us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.14%       9.503us         1.14%       9.503us       0.634us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.31%      10.920us         1.31%      10.920us       3.640us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.10%       9.180us         1.10%       9.180us       3.060us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.81%       6.740us         0.98%       8.160us       2.720us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     312.058us      1547.83%     312.058us     312.058us             1  
+                                            torch_eager        19.84%     167.701us        99.34%     839.603us     839.603us       0.000us         0.00%      22.369us      22.369us             1  
+                                               aten::to         0.69%       5.791us        63.55%     537.169us      89.528us       0.000us         0.00%      14.400us       2.400us             6  
+                                         aten::_to_copy         2.59%      21.910us        62.87%     531.378us      88.563us       0.000us         0.00%      14.400us       2.400us             6  
+                                            aten::copy_         5.79%      48.970us        56.91%     481.028us      80.171us      12.192us        60.47%      14.400us       2.400us             6  
+                                           aten::conv1d         0.65%       5.520us        13.10%     110.752us      36.917us       0.000us         0.00%       7.969us       2.656us             3  
+                                      aten::convolution         1.03%       8.700us        12.45%     105.232us      35.077us       0.000us         0.00%       7.969us       2.656us             3  
+                                     aten::_convolution         2.40%      20.311us        11.42%      96.532us      32.177us       0.000us         0.00%       7.969us       2.656us             3  
+                                aten::_conv_depthwise2d         2.39%      20.240us         7.28%      61.521us      20.507us       7.969us        39.53%       7.969us       2.656us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us       7.969us        39.53%       7.969us       2.656us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.272us        31.11%       6.272us       2.091us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       5.920us        29.36%       5.920us       1.973us             3  
+                                Activity Buffer Request        29.19%     246.714us        29.19%     246.714us     246.714us       2.208us        10.95%       2.208us       2.208us             1  
+                                    aten::empty_strided         3.36%      28.440us         3.36%      28.440us       4.740us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        24.70%     208.775us        24.70%     208.775us      23.197us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         1.84%      15.580us         2.41%      20.350us       2.261us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.95%       8.049us         0.95%       8.049us       0.537us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.07%       9.050us         1.07%       9.050us       3.017us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.04%       8.800us         1.04%       8.800us       2.933us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.63%       5.361us         0.79%       6.650us       2.217us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 831.951us
-Self CUDA time total: 20.158us
+Self CPU time total: 845.213us
+Self CUDA time total: 20.161us
 
 
 
@@ -4729,29 +4511,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     363.100us      1005.93%     363.100us     363.100us             1  
-                                            torch_eager        14.77%     122.163us        99.35%     821.971us     821.971us       0.000us         0.00%      38.688us      38.688us             1  
-                                           aten::conv1d         0.72%       5.951us        17.29%     143.024us      47.675us       0.000us         0.00%      20.160us       6.720us             3  
-                                      aten::convolution         1.22%      10.110us        16.57%     137.073us      45.691us       0.000us         0.00%      20.160us       6.720us             3  
-                                     aten::_convolution         3.04%      25.151us        15.35%     126.963us      42.321us       0.000us         0.00%      20.160us       6.720us             3  
-                                aten::_conv_depthwise2d         4.80%      39.711us        10.31%      85.271us      28.424us      20.160us        55.85%      20.160us       6.720us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      20.160us        55.85%      20.160us       6.720us             3  
-                                               aten::to         0.75%       6.172us        63.79%     527.804us      87.967us       0.000us         0.00%      18.528us       3.088us             6  
-                                         aten::_to_copy         2.99%      24.751us        63.05%     521.632us      86.939us       0.000us         0.00%      18.528us       3.088us             6  
-                                            aten::copy_         6.14%      50.790us        56.45%     467.021us      77.837us      15.936us        44.15%      18.528us       3.088us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.512us        23.58%       8.512us       2.837us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.424us        20.57%       7.424us       2.475us             3  
-                                Activity Buffer Request        27.93%     231.066us        27.93%     231.066us     231.066us       2.592us         7.18%       2.592us       2.592us             1  
-                                    aten::empty_strided         3.61%      29.860us         3.61%      29.860us       4.977us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.33%     209.585us        25.33%     209.585us      23.287us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.11%      17.441us         2.75%      22.791us       2.532us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.15%       9.501us         1.15%       9.501us       0.633us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.26%      10.400us         1.26%      10.400us       3.467us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.30%      10.740us         1.30%      10.740us       3.580us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.76%       6.269us         0.93%       7.730us       2.577us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     312.867us       859.95%     312.867us     312.867us             1  
+                                            torch_eager        14.44%     112.752us        99.36%     776.042us     776.042us       0.000us         0.00%      39.006us      39.006us             1  
+                                           aten::conv1d         0.71%       5.580us        13.99%     109.252us      36.417us       0.000us         0.00%      20.512us       6.837us             3  
+                                      aten::convolution         1.09%       8.531us        13.27%     103.672us      34.557us       0.000us         0.00%      20.512us       6.837us             3  
+                                     aten::_convolution         2.62%      20.459us        12.18%      95.141us      31.714us       0.000us         0.00%      20.512us       6.837us             3  
+                                aten::_conv_depthwise2d         2.59%      20.222us         7.70%      60.162us      20.054us      20.512us        56.38%      20.512us       6.837us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      20.512us        56.38%      20.512us       6.837us             3  
+                                               aten::to         0.75%       5.821us        67.81%     529.608us      88.268us       0.000us         0.00%      18.494us       3.082us             6  
+                                         aten::_to_copy         2.86%      22.338us        67.06%     523.787us      87.298us       0.000us         0.00%      18.494us       3.082us             6  
+                                            aten::copy_         6.02%      47.020us        60.45%     472.148us      78.691us      15.870us        43.62%      18.494us       3.082us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.447us        23.22%       8.447us       2.816us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.423us        20.40%       7.423us       2.474us             3  
+                                Activity Buffer Request        30.80%     240.594us        30.80%     240.594us     240.594us       2.624us         7.21%       2.624us       2.624us             1  
+                                    aten::empty_strided         3.75%      29.301us         3.75%      29.301us       4.884us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        26.46%     206.633us        26.46%     206.633us      22.959us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         2.01%      15.720us         2.61%      20.410us       2.268us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         1.02%       7.981us         1.02%       7.981us       0.532us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.13%       8.841us         1.13%       8.841us       2.947us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.15%       9.000us         1.15%       9.000us       3.000us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.68%       5.329us         0.84%       6.560us       2.187us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 827.381us
-Self CUDA time total: 36.096us
+Self CPU time total: 781.073us
+Self CUDA time total: 36.382us
 
 
 
@@ -4761,29 +4543,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D64_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     336.025us       883.88%     336.025us     336.025us             1  
-                                            torch_eager        14.70%     120.902us        99.36%     817.351us     817.351us       0.000us         0.00%      40.610us      40.610us             1  
-                                           aten::conv1d         0.71%       5.820us        14.44%     118.823us      39.608us       0.000us         0.00%      22.304us       7.435us             3  
-                                      aten::convolution         1.12%       9.190us        13.74%     113.003us      37.668us       0.000us         0.00%      22.304us       7.435us             3  
-                                     aten::_convolution         2.83%      23.270us        12.62%     103.813us      34.604us       0.000us         0.00%      22.304us       7.435us             3  
-                                aten::_conv_depthwise2d         2.83%      23.309us         7.79%      64.072us      21.357us      22.304us        58.67%      22.304us       7.435us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      22.304us        58.67%      22.304us       7.435us             3  
-                                               aten::to         0.73%       5.990us        66.75%     549.075us      91.513us       0.000us         0.00%      18.306us       3.051us             6  
-                                         aten::_to_copy         2.91%      23.953us        66.02%     543.085us      90.514us       0.000us         0.00%      18.306us       3.051us             6  
-                                            aten::copy_         6.07%      49.902us        59.57%     490.042us      81.674us      15.713us        41.33%      18.306us       3.051us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.353us        21.97%       8.353us       2.784us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.360us        19.36%       7.360us       2.453us             3  
-                                Activity Buffer Request        30.85%     253.806us        30.85%     253.806us     253.806us       2.593us         6.82%       2.593us       2.593us             1  
-                                    aten::empty_strided         3.54%      29.090us         3.54%      29.090us       4.848us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.29%     208.074us        25.29%     208.074us      23.119us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.19%      18.051us         2.84%      23.371us       2.597us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.11%       9.160us         1.11%       9.160us       0.611us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.21%       9.961us         1.21%       9.961us       3.320us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.10%       9.062us         1.10%       9.062us       3.021us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.80%       6.580us         0.96%       7.920us       2.640us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     353.311us       916.31%     353.311us     353.311us             1  
+                                            torch_eager        17.31%     144.171us        99.40%     827.943us     827.943us       0.000us         0.00%      41.150us      41.150us             1  
+                                           aten::conv1d         0.66%       5.470us        14.12%     117.601us      39.200us       0.000us         0.00%      22.624us       7.541us             3  
+                                      aten::convolution         1.09%       9.120us        13.46%     112.131us      37.377us       0.000us         0.00%      22.624us       7.541us             3  
+                                     aten::_convolution         2.77%      23.100us        12.37%     103.011us      34.337us       0.000us         0.00%      22.624us       7.541us             3  
+                                aten::_conv_depthwise2d         2.63%      21.901us         7.78%      64.791us      21.597us      22.624us        58.68%      22.624us       7.541us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      22.624us        58.68%      22.624us       7.541us             3  
+                                               aten::to         0.71%       5.920us        64.88%     540.450us      90.075us       0.000us         0.00%      18.526us       3.088us             6  
+                                         aten::_to_copy         2.59%      21.613us        64.17%     534.530us      89.088us       0.000us         0.00%      18.526us       3.088us             6  
+                                            aten::copy_         5.88%      48.990us        58.06%     483.646us      80.608us      15.934us        41.32%      18.526us       3.088us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       8.575us        22.24%       8.575us       2.858us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       7.359us        19.09%       7.359us       2.453us             3  
+                                Activity Buffer Request        29.91%     249.164us        29.91%     249.164us     249.164us       2.592us         6.72%       2.592us       2.592us             1  
+                                    aten::empty_strided         3.51%      29.271us         3.51%      29.271us       4.879us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        25.18%     209.712us        25.18%     209.712us      23.301us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         1.99%      16.542us         2.59%      21.611us       2.401us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         1.04%       8.638us         1.04%       8.638us       0.576us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.16%       9.650us         1.16%       9.650us       3.217us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.08%       9.020us         1.08%       9.020us       3.007us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.68%       5.681us         0.85%       7.060us       2.353us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 822.611us
-Self CUDA time total: 38.017us
+Self CPU time total: 832.973us
+Self CUDA time total: 38.558us
 
 
 
@@ -4793,29 +4575,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S128_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     335.486us       522.89%     335.486us     335.486us             1  
-                                            torch_eager        15.29%     123.163us        99.38%     800.491us     800.491us       0.000us         0.00%      68.256us      68.256us             1  
-                                           aten::conv1d         0.73%       5.840us        14.87%     119.763us      39.921us       0.000us         0.00%      41.760us      13.920us             3  
-                                      aten::convolution         1.21%       9.761us        14.14%     113.923us      37.974us       0.000us         0.00%      41.760us      13.920us             3  
-                                     aten::_convolution         2.84%      22.911us        12.93%     104.162us      34.721us       0.000us         0.00%      41.760us      13.920us             3  
-                                aten::_conv_depthwise2d         2.80%      22.570us         8.02%      64.572us      21.524us      41.760us        65.09%      41.760us      13.920us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      41.760us        65.09%      41.760us      13.920us             3  
-                                               aten::to         0.73%       5.842us        65.67%     528.904us      88.151us       0.000us         0.00%      26.496us       4.416us             6  
-                                         aten::_to_copy         2.94%      23.712us        64.94%     523.062us      87.177us       0.000us         0.00%      26.496us       4.416us             6  
-                                            aten::copy_         6.02%      48.492us        58.29%     469.521us      78.253us      22.400us        34.91%      26.496us       4.416us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      11.968us        18.65%      11.968us       3.989us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.432us        16.26%      10.432us       3.477us             3  
-                                Activity Buffer Request        29.33%     236.206us        29.33%     236.206us     236.206us       4.096us         6.38%       4.096us       4.096us             1  
-                                    aten::empty_strided         3.70%      29.829us         3.70%      29.829us       4.971us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.91%     208.693us        25.91%     208.693us      23.188us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.18%      17.569us         2.86%      23.069us       2.563us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.14%       9.222us         1.14%       9.222us       0.615us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.20%       9.631us         1.20%       9.631us       3.210us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.06%       8.501us         1.06%       8.501us       2.834us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.83%       6.660us         0.99%       7.990us       2.663us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     316.829us       488.45%     316.829us     316.829us             1  
+                                            torch_eager        14.19%     114.002us        99.33%     798.183us     798.183us       0.000us         0.00%      68.991us      68.991us             1  
+                                           aten::conv1d         0.68%       5.460us        13.80%     110.892us      36.964us       0.000us         0.00%      42.304us      14.101us             3  
+                                      aten::convolution         1.10%       8.859us        13.12%     105.432us      35.144us       0.000us         0.00%      42.304us      14.101us             3  
+                                     aten::_convolution         2.59%      20.821us        12.02%      96.573us      32.191us       0.000us         0.00%      42.304us      14.101us             3  
+                                aten::_conv_depthwise2d         2.64%      21.190us         7.50%      60.251us      20.084us      42.304us        65.22%      42.304us      14.101us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      42.304us        65.22%      42.304us      14.101us             3  
+                                               aten::to         0.75%       6.059us        68.35%     549.177us      91.530us       0.000us         0.00%      26.687us       4.448us             6  
+                                         aten::_to_copy         2.76%      22.169us        67.59%     543.118us      90.520us       0.000us         0.00%      26.687us       4.448us             6  
+                                            aten::copy_         6.74%      54.161us        61.27%     492.308us      82.051us      22.560us        34.78%      26.687us       4.448us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      12.095us        18.65%      12.095us       4.032us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.465us        16.13%      10.465us       3.488us             3  
+                                Activity Buffer Request        31.75%     255.134us        31.75%     255.134us     255.134us       4.127us         6.36%       4.127us       4.127us             1  
+                                    aten::empty_strided         3.56%      28.641us         3.56%      28.641us       4.773us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        25.49%     204.843us        25.49%     204.843us      22.760us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         2.06%      16.521us         2.65%      21.322us       2.369us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         1.02%       8.171us         1.02%       8.171us       0.545us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.14%       9.170us         1.14%       9.170us       3.057us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.00%       8.061us         1.00%       8.061us       2.687us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.66%       5.330us         0.81%       6.520us       2.173us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 805.451us
-Self CUDA time total: 64.160us
+Self CPU time total: 803.533us
+Self CUDA time total: 64.864us
 
 
 
@@ -4825,29 +4607,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S128_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     340.218us       487.48%     340.218us     340.218us             1  
-                                            torch_eager        15.18%     124.853us        99.38%     817.682us     817.682us       0.000us         0.00%      73.887us      73.887us             1  
-                                           aten::conv1d         0.72%       5.910us        14.57%     119.903us      39.968us       0.000us         0.00%      47.328us      15.776us             3  
-                                      aten::convolution         1.21%       9.960us        13.86%     113.993us      37.998us       0.000us         0.00%      47.328us      15.776us             3  
-                                     aten::_convolution         2.81%      23.101us        12.64%     104.033us      34.678us       0.000us         0.00%      47.328us      15.776us             3  
-                                aten::_conv_depthwise2d         2.62%      21.561us         7.83%      64.432us      21.477us      47.328us        67.81%      47.328us      15.776us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      47.328us        67.81%      47.328us      15.776us             3  
-                                               aten::to         0.75%       6.180us        66.30%     545.475us      90.913us       0.000us         0.00%      26.559us       4.426us             6  
-                                         aten::_to_copy         2.97%      24.459us        65.55%     539.295us      89.882us       0.000us         0.00%      26.559us       4.426us             6  
-                                            aten::copy_         6.14%      50.491us        58.93%     484.862us      80.810us      22.463us        32.19%      26.559us       4.426us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      12.032us        17.24%      12.032us       4.011us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.431us        14.95%      10.431us       3.477us             3  
-                                Activity Buffer Request        30.21%     248.576us        30.21%     248.576us     248.576us       4.096us         5.87%       4.096us       4.096us             1  
-                                    aten::empty_strided         3.64%      29.974us         3.64%      29.974us       4.996us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.32%     208.345us        25.32%     208.345us      23.149us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.09%      17.201us         2.72%      22.401us       2.489us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.11%       9.120us         1.11%       9.120us       0.608us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.32%      10.899us         1.32%      10.899us       3.633us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.15%       9.422us         1.15%       9.422us       3.141us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.80%       6.580us         0.98%       8.070us       2.690us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     328.383us       466.25%     328.383us     328.383us             1  
+                                            torch_eager         5.82%     138.672us        99.78%       2.376ms       2.376ms       0.000us         0.00%      74.527us      74.527us             1  
+                                           aten::conv1d         0.24%       5.689us         4.87%     115.970us      38.657us       0.000us         0.00%      47.969us      15.990us             3  
+                                      aten::convolution         0.43%      10.191us         4.63%     110.281us      36.760us       0.000us         0.00%      47.969us      15.990us             3  
+                                     aten::_convolution         0.91%      21.579us         4.20%     100.090us      33.363us       0.000us         0.00%      47.969us      15.990us             3  
+                                aten::_conv_depthwise2d         0.87%      20.670us         2.63%      62.670us      20.890us      47.969us        68.11%      47.969us      15.990us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us      47.969us        68.11%      47.969us      15.990us             3  
+                                               aten::to         0.27%       6.430us        88.04%       2.097ms     349.464us       0.000us         0.00%      26.558us       4.426us             6  
+                                         aten::_to_copy         0.99%      23.642us        87.77%       2.090ms     348.392us       0.000us         0.00%      26.558us       4.426us             6  
+                                            aten::copy_         2.06%      49.120us        85.54%       2.037ms     339.525us      22.462us        31.89%      26.558us       4.426us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      11.999us        17.04%      11.999us       4.000us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      10.463us        14.86%      10.463us       3.488us             3  
+                                Activity Buffer Request        75.66%       1.802ms        75.66%       1.802ms       1.802ms       4.096us         5.82%       4.096us       4.096us             1  
+                                    aten::empty_strided         1.24%      29.560us         1.24%      29.560us       4.927us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.75%     208.373us         8.75%     208.373us      23.153us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.70%      16.782us         0.92%      21.972us       2.441us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.36%       8.520us         0.36%       8.520us       0.568us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.38%       9.160us         0.38%       9.160us       3.053us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.44%      10.580us         0.44%      10.580us       3.527us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.24%       5.730us         0.29%       7.020us       2.340us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 822.752us
-Self CUDA time total: 69.791us
+Self CPU time total: 2.382ms
+Self CUDA time total: 70.431us
 
 
 
@@ -4857,29 +4639,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S512_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     357.276us       192.10%     357.276us     357.276us             1  
-                                            torch_eager         7.25%     148.445us        99.75%       2.043ms       2.043ms       0.000us         0.00%     196.063us     196.063us             1  
-                                           aten::conv1d         0.28%       5.714us         6.04%     123.725us      41.242us       0.000us         0.00%     133.535us      44.512us             3  
-                                      aten::convolution         0.50%      10.209us         5.76%     118.011us      39.337us       0.000us         0.00%     133.535us      44.512us             3  
-                                     aten::_convolution         1.22%      24.922us         5.26%     107.802us      35.934us       0.000us         0.00%     133.535us      44.512us             3  
-                                aten::_conv_depthwise2d         1.06%      21.740us         3.25%      66.540us      22.180us     133.535us        71.80%     133.535us      44.512us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     133.535us        71.80%     133.535us      44.512us             3  
-                                               aten::to         0.32%       6.558us        85.01%       1.741ms     290.215us       0.000us         0.00%      62.528us      10.421us             6  
-                                         aten::_to_copy         1.28%      26.242us        84.69%       1.735ms     289.122us       0.000us         0.00%      62.528us      10.421us             6  
-                                            aten::copy_         2.37%      48.539us        81.91%       1.678ms     279.634us      52.448us        28.20%      62.528us      10.421us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      29.536us        15.88%      29.536us       9.845us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.912us        12.32%      22.912us       7.637us             3  
-                                Activity Buffer Request        70.45%       1.443ms        70.45%       1.443ms       1.443ms      10.080us         5.42%      10.080us      10.080us             1  
-                                    aten::empty_strided         1.50%      30.691us         1.50%      30.691us       5.115us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        10.22%     209.265us        10.22%     209.265us      23.252us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.93%      19.072us         1.20%      24.640us       2.738us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.45%       9.247us         0.45%       9.247us       0.616us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.55%      11.270us         0.55%      11.270us       3.757us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.51%      10.520us         0.51%      10.520us       3.507us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.29%       5.931us         0.35%       7.230us       2.410us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     336.351us       179.68%     336.351us     336.351us             1  
+                                            torch_eager         5.85%     142.571us        99.79%       2.430ms       2.430ms       0.000us         0.00%     197.311us     197.311us             1  
+                                           aten::conv1d         0.28%       6.741us         4.71%     114.731us      38.244us       0.000us         0.00%     134.368us      44.789us             3  
+                                      aten::convolution         0.38%       9.350us         4.43%     107.990us      35.997us       0.000us         0.00%     134.368us      44.789us             3  
+                                     aten::_convolution         0.88%      21.488us         4.05%      98.640us      32.880us       0.000us         0.00%     134.368us      44.789us             3  
+                                aten::_conv_depthwise2d         0.83%      20.301us         2.51%      61.091us      20.364us     134.368us        71.78%     134.368us      44.789us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     134.368us        71.78%     134.368us      44.789us             3  
+                                               aten::to         0.26%       6.379us        88.22%       2.148ms     358.072us       0.000us         0.00%      62.943us      10.491us             6  
+                                         aten::_to_copy         0.93%      22.632us        87.96%       2.142ms     357.009us       0.000us         0.00%      62.943us      10.491us             6  
+                                            aten::copy_         2.03%      49.489us        85.76%       2.089ms     348.110us      52.831us        28.22%      62.943us      10.491us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      29.727us        15.88%      29.727us       9.909us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      23.104us        12.34%      23.104us       7.701us             3  
+                                Activity Buffer Request        76.11%       1.853ms        76.11%       1.853ms       1.853ms      10.112us         5.40%      10.112us      10.112us             1  
+                                    aten::empty_strided         1.26%      30.760us         1.26%      30.760us       5.127us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel         8.55%     208.274us         8.55%     208.274us      23.142us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.71%      17.184us         0.91%      22.223us       2.469us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.34%       8.338us         0.34%       8.338us       0.556us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.38%       9.180us         0.38%       9.180us       3.060us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.37%       9.020us         0.37%       9.020us       3.007us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.22%       5.460us         0.27%       6.690us       2.230us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.048ms
-Self CUDA time total: 185.983us
+Self CPU time total: 2.435ms
+Self CUDA time total: 187.199us
 
 
 
@@ -4889,29 +4671,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S512_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     358.235us       170.21%     358.235us     358.235us             1  
-                                            torch_eager        15.50%     124.275us        99.34%     796.461us     796.461us       0.000us         0.00%     224.253us     224.253us             1  
-                                           aten::conv1d         0.70%       5.590us        14.78%     118.483us      39.494us       0.000us         0.00%     154.174us      51.391us             3  
-                                      aten::convolution         1.24%       9.921us        14.08%     112.893us      37.631us       0.000us         0.00%     154.174us      51.391us             3  
-                                     aten::_convolution         2.81%      22.549us        12.84%     102.972us      34.324us       0.000us         0.00%     154.174us      51.391us             3  
-                                aten::_conv_depthwise2d         2.82%      22.632us         8.11%      65.062us      21.687us     154.174us        73.26%     154.174us      51.391us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     154.174us        73.26%     154.174us      51.391us             3  
-                                               aten::to         0.74%       5.971us        65.46%     524.833us      87.472us       0.000us         0.00%      70.079us      11.680us             6  
-                                         aten::_to_copy         3.23%      25.880us        64.72%     518.862us      86.477us       0.000us         0.00%      70.079us      11.680us             6  
-                                            aten::copy_         6.33%      50.713us        57.67%     462.401us      77.067us      56.287us        26.74%      70.079us      11.680us             6  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      33.248us        15.80%      33.248us      11.083us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      23.039us        10.95%      23.039us       7.680us             3  
-                                Activity Buffer Request        28.19%     225.995us        28.19%     225.995us     225.995us      13.792us         6.55%      13.792us      13.792us             1  
-                                    aten::empty_strided         3.81%      30.581us         3.81%      30.581us       5.097us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        25.98%     208.263us        25.98%     208.263us      23.140us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         2.24%      17.992us         2.91%      23.301us       2.589us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         1.16%       9.309us         1.16%       9.309us       0.621us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.31%      10.480us         1.31%      10.480us       3.493us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         1.17%       9.380us         1.17%       9.380us       3.127us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.74%       5.910us         0.92%       7.370us       2.457us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     335.323us       159.21%     335.323us     335.323us             1  
+                                            torch_eager        14.44%     115.471us        99.40%     794.842us     794.842us       0.000us         0.00%     223.709us     223.709us             1  
+                                           aten::conv1d         0.70%       5.561us        13.80%     110.362us      36.787us       0.000us         0.00%     154.845us      51.615us             3  
+                                      aten::convolution         1.15%       9.189us        13.11%     104.801us      34.934us       0.000us         0.00%     154.845us      51.615us             3  
+                                     aten::_convolution         2.52%      20.182us        11.96%      95.612us      31.871us       0.000us         0.00%     154.845us      51.615us             3  
+                                aten::_conv_depthwise2d         2.51%      20.101us         7.60%      60.741us      20.247us     154.845us        73.52%     154.845us      51.615us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     154.845us        73.52%     154.845us      51.615us             3  
+                                               aten::to         0.72%       5.750us        68.18%     545.179us      90.863us       0.000us         0.00%      68.864us      11.477us             6  
+                                         aten::_to_copy         2.77%      22.130us        67.46%     539.429us      89.905us       0.000us         0.00%      68.864us      11.477us             6  
+                                            aten::copy_         5.86%      46.830us        60.79%     486.078us      81.013us      55.776us        26.48%      68.864us      11.477us             6  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      32.416us        15.39%      32.416us      10.805us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      23.360us        11.09%      23.360us       7.787us             3  
+                                Activity Buffer Request        31.66%     253.204us        31.66%     253.204us     253.204us      13.088us         6.21%      13.088us      13.088us             1  
+                                    aten::empty_strided         3.90%      31.221us         3.90%      31.221us       5.203us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        26.02%     208.054us        26.02%     208.054us      23.117us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         1.93%      15.399us         2.47%      19.760us       2.196us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.98%       7.800us         0.98%       7.800us       0.520us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.23%       9.810us         1.23%       9.810us       3.270us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         1.10%       8.820us         1.10%       8.820us       2.940us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.69%       5.519us         0.86%       6.899us       2.300us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 801.751us
-Self CUDA time total: 210.461us
+Self CPU time total: 799.662us
+Self CUDA time total: 210.621us
 
 
 
@@ -4921,29 +4703,29 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S2048_W2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         7.15%     131.473us        52.77%     970.085us     970.085us       0.000us         0.00%       1.521ms       1.521ms             1  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.421ms       100.40%       1.421ms       1.421ms             1  
-                                               aten::to         0.36%       6.571us        37.17%     683.219us     113.870us       0.000us         0.00%     824.180us     137.363us             6  
-                                         aten::_to_copy         1.61%      29.612us        36.81%     676.648us     112.775us       0.000us         0.00%     824.180us     137.363us             6  
-                                            aten::copy_         2.81%      51.569us        25.14%     462.051us      77.009us     718.613us        50.76%     824.180us     137.363us             6  
-                                           aten::conv1d         0.36%       6.680us         6.82%     125.423us      41.808us       0.000us         0.00%     696.981us     232.327us             3  
-                                      aten::convolution         0.57%      10.460us         6.46%     118.743us      39.581us       0.000us         0.00%     696.981us     232.327us             3  
-                                     aten::_convolution         1.31%      24.040us         5.89%     108.283us      36.094us       0.000us         0.00%     696.981us     232.327us             3  
-                                aten::_conv_depthwise2d         1.25%      22.981us         3.69%      67.913us      22.638us     696.981us        49.24%     696.981us     232.327us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     696.981us        49.24%     696.981us     232.327us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     410.458us        29.00%     410.458us     136.819us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     308.155us        21.77%     308.155us     102.718us             3  
-                                Activity Buffer Request        11.91%     218.936us        11.91%     218.936us     218.936us     105.567us         7.46%     105.567us     105.567us             1  
-                                    aten::empty_strided         2.01%      37.011us        10.06%     184.985us      30.831us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        11.74%     215.777us        11.74%     215.777us      23.975us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.99%      18.200us         1.31%      24.000us       2.667us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.53%       9.740us         0.53%       9.740us       0.649us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.59%      10.839us         0.59%      10.839us       3.613us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.54%       9.862us         0.54%       9.862us       3.287us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.34%       6.240us         0.42%       7.700us       2.567us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         6.62%     120.362us        52.56%     956.135us     956.135us       0.000us         0.00%       1.509ms       1.509ms             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.411ms       100.41%       1.411ms       1.411ms             1  
+                                               aten::to         0.34%       6.140us        38.13%     693.750us     115.625us       0.000us         0.00%     815.515us     135.919us             6  
+                                         aten::_to_copy         1.53%      27.810us        37.80%     687.610us     114.602us       0.000us         0.00%     815.515us     135.919us             6  
+                                            aten::copy_         2.83%      51.570us        25.68%     467.247us      77.874us     711.740us        50.66%     815.515us     135.919us             6  
+                                           aten::conv1d         0.32%       5.781us         6.36%     115.702us      38.567us       0.000us         0.00%     693.278us     231.093us             3  
+                                      aten::convolution         0.51%       9.289us         6.04%     109.921us      36.640us       0.000us         0.00%     693.278us     231.093us             3  
+                                     aten::_convolution         1.19%      21.630us         5.53%     100.632us      33.544us       0.000us         0.00%     693.278us     231.093us             3  
+                                aten::_conv_depthwise2d         1.16%      21.108us         3.52%      63.951us      21.317us     693.278us        49.34%     693.278us     231.093us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     693.278us        49.34%     693.278us     231.093us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     405.439us        28.86%     405.439us     135.146us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     306.301us        21.80%     306.301us     102.100us             3  
+                                Activity Buffer Request        12.14%     220.924us        12.14%     220.924us     220.924us     103.775us         7.39%     103.775us     103.775us             1  
+                                    aten::empty_strided         1.98%      36.051us        10.58%     192.553us      32.092us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        12.05%     219.204us        12.05%     219.204us      24.356us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.93%      16.940us         1.22%      22.200us       2.467us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.48%       8.651us         0.48%       8.651us       0.577us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.51%       9.201us         0.51%       9.201us       3.067us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.51%       9.191us         0.51%       9.191us       3.064us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.31%       5.621us         0.38%       6.871us       2.290us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.838ms
-Self CUDA time total: 1.416ms
+Self CPU time total: 1.819ms
+Self CUDA time total: 1.405ms
 
 
 
@@ -4953,56 +4735,56 @@ PROFILE TRACE: torch_eager | cuda_B4_D2048_S2048_W4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         6.74%     124.615us        43.66%     806.720us     806.720us       0.000us         0.00%       1.502ms       1.502ms             1  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.433ms       100.41%       1.433ms       1.433ms             1  
-                                               aten::to         0.34%       6.269us        28.35%     523.751us      87.292us       0.000us         0.00%     764.786us     127.464us             6  
-                                         aten::_to_copy         1.27%      23.480us        28.01%     517.482us      86.247us       0.000us         0.00%     764.786us     127.464us             6  
-                                            aten::copy_         2.74%      50.661us        25.15%     464.712us      77.452us     690.099us        48.36%     764.786us     127.464us             6  
-                                           aten::conv1d         0.32%       5.870us         7.00%     129.374us      43.125us       0.000us         0.00%     737.040us     245.680us             3  
-                                      aten::convolution         0.54%       9.999us         6.68%     123.504us      41.168us       0.000us         0.00%     737.040us     245.680us             3  
-                                     aten::_convolution         1.31%      24.293us         6.14%     113.505us      37.835us       0.000us         0.00%     737.040us     245.680us             3  
-                                aten::_conv_depthwise2d         1.62%      30.010us         3.95%      73.060us      24.353us     737.040us        51.64%     737.040us     245.680us             3  
-void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     737.040us        51.64%     737.040us     245.680us             3  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     399.673us        28.01%     399.673us     133.224us             3  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     290.426us        20.35%     290.426us      96.809us             3  
-                                Activity Buffer Request        12.15%     224.466us        12.15%     224.466us     224.466us      74.687us         5.23%      74.687us      74.687us             1  
-                                    aten::empty_strided         1.59%      29.290us         1.59%      29.290us       4.882us       0.000us         0.00%       0.000us       0.000us             6  
-                                       cudaLaunchKernel        11.52%     212.785us        11.52%     212.785us      23.643us       0.000us         0.00%       0.000us       0.000us             9  
-                                        aten::unsqueeze         0.94%      17.281us         1.23%      22.771us       2.530us       0.000us         0.00%       0.000us       0.000us             9  
-                                       aten::as_strided         0.55%      10.081us         0.55%      10.081us       0.672us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         0.57%      10.440us         0.57%      10.440us       3.480us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::resize_         0.51%       9.410us         0.51%       9.410us       3.137us       0.000us         0.00%       0.000us       0.000us             3  
-                                          aten::squeeze         0.33%       6.150us         0.41%       7.641us       2.547us       0.000us         0.00%       0.000us       0.000us             3  
+                                            torch_eager         6.07%     112.213us        42.26%     781.792us     781.792us       0.000us         0.00%       1.498ms       1.498ms             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.428ms       100.39%       1.428ms       1.428ms             1  
+                                               aten::to         0.33%       6.130us        28.74%     531.749us      88.625us       0.000us         0.00%     757.569us     126.261us             6  
+                                         aten::_to_copy         1.23%      22.780us        28.41%     525.619us      87.603us       0.000us         0.00%     757.569us     126.261us             6  
+                                            aten::copy_         2.64%      48.852us        25.56%     472.969us      78.828us     682.049us        47.95%     757.569us     126.261us             6  
+                                           aten::conv1d         0.33%       6.130us         6.13%     113.361us      37.787us       0.000us         0.00%     740.449us     246.816us             3  
+                                      aten::convolution         0.48%       8.889us         5.80%     107.231us      35.744us       0.000us         0.00%     740.449us     246.816us             3  
+                                     aten::_convolution         1.13%      20.931us         5.32%      98.342us      32.781us       0.000us         0.00%     740.449us     246.816us             3  
+                                aten::_conv_depthwise2d         1.15%      21.330us         3.38%      62.491us      20.830us     740.449us        52.05%     740.449us     246.816us             3  
+void at::native::(anonymous namespace)::conv_depthwi...         0.00%       0.000us         0.00%       0.000us       0.000us     740.449us        52.05%     740.449us     246.816us             3  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     397.857us        27.97%     397.857us     132.619us             3  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     284.192us        19.98%     284.192us      94.731us             3  
+                                Activity Buffer Request        12.95%     239.644us        12.95%     239.644us     239.644us      75.520us         5.31%      75.520us      75.520us             1  
+                                    aten::empty_strided         1.61%      29.870us         1.61%      29.870us       4.978us       0.000us         0.00%       0.000us       0.000us             6  
+                                       cudaLaunchKernel        11.17%     206.574us        11.17%     206.574us      22.953us       0.000us         0.00%       0.000us       0.000us             9  
+                                        aten::unsqueeze         0.85%      15.779us         1.12%      20.809us       2.312us       0.000us         0.00%       0.000us       0.000us             9  
+                                       aten::as_strided         0.45%       8.409us         0.45%       8.409us       0.561us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         0.49%       9.120us         0.49%       9.120us       3.040us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::resize_         0.54%       9.940us         0.54%       9.940us       3.313us       0.000us         0.00%       0.000us       0.000us             3  
+                                          aten::squeeze         0.29%       5.381us         0.36%       6.700us       2.233us       0.000us         0.00%       0.000us       0.000us             3  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.848ms
-Self CUDA time total: 1.427ms
+Self CPU time total: 1.850ms
+Self CUDA time total: 1.422ms
 
 
 impl                     wl                  p50(ms)  ok
-torch_eager              cuda_B2_D2048_S128_W2     0.09  True
+torch_eager              cuda_B2_D2048_S128_W2     0.08  True
 torch_eager              cuda_B2_D2048_S128_W4     0.08  True
-torch_eager              cuda_B2_D2048_S2048_W2     0.15  True
+torch_eager              cuda_B2_D2048_S2048_W2     0.16  True
 torch_eager              cuda_B2_D2048_S2048_W4     0.16  True
-torch_eager              cuda_B2_D2048_S512_W2     0.09  True
-torch_eager              cuda_B2_D2048_S512_W4     0.09  True
+torch_eager              cuda_B2_D2048_S512_W2     0.08  True
+torch_eager              cuda_B2_D2048_S512_W4     0.08  True
 torch_eager              cuda_B2_D64_S128_W2     0.07  True
-torch_eager              cuda_B2_D64_S128_W4     0.09  True
-torch_eager              cuda_B2_D64_S2048_W2     0.09  True
-torch_eager              cuda_B2_D64_S2048_W4     0.09  True
-torch_eager              cuda_B2_D64_S512_W2     0.09  True
-torch_eager              cuda_B2_D64_S512_W4     0.09  True
-torch_eager              cuda_B4_D2048_S128_W2     0.09  True
-torch_eager              cuda_B4_D2048_S128_W4     0.09  True
-torch_eager              cuda_B4_D2048_S2048_W2     0.49  True
+torch_eager              cuda_B2_D64_S128_W4     0.08  True
+torch_eager              cuda_B2_D64_S2048_W2     0.08  True
+torch_eager              cuda_B2_D64_S2048_W4     0.08  True
+torch_eager              cuda_B2_D64_S512_W2     0.08  True
+torch_eager              cuda_B2_D64_S512_W4     0.08  True
+torch_eager              cuda_B4_D2048_S128_W2     0.08  True
+torch_eager              cuda_B4_D2048_S128_W4     0.08  True
+torch_eager              cuda_B4_D2048_S2048_W2     0.48  True
 torch_eager              cuda_B4_D2048_S2048_W4     0.50  True
-torch_eager              cuda_B4_D2048_S512_W2     0.10  True
+torch_eager              cuda_B4_D2048_S512_W2     0.09  True
 torch_eager              cuda_B4_D2048_S512_W4     0.10  True
-torch_eager              cuda_B4_D64_S128_W2     0.09  True
+torch_eager              cuda_B4_D64_S128_W2     0.08  True
 torch_eager              cuda_B4_D64_S128_W4     0.08  True
-torch_eager              cuda_B4_D64_S2048_W2     0.09  True
-torch_eager              cuda_B4_D64_S2048_W4     0.09  True
-torch_eager              cuda_B4_D64_S512_W2     0.09  True
-torch_eager              cuda_B4_D64_S512_W4     0.09  True
+torch_eager              cuda_B4_D64_S2048_W2     0.08  True
+torch_eager              cuda_B4_D64_S2048_W4     0.08  True
+torch_eager              cuda_B4_D64_S512_W2     0.08  True
+torch_eager              cuda_B4_D64_S512_W4     0.08  True
 

Artifacts:

diff --git a/causal_conv1d/results/artifacts/combine/latency.svg b/causal_conv1d/results/artifacts/combine/latency.svg index 1051764b171c27ddd8f8651b286d107eb666bd69..77bd33b0b9c7eed09d079a42de7733d3434f87b7 100644 --- a/causal_conv1d/results/artifacts/combine/latency.svg +++ b/causal_conv1d/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6fdf61512b0add92f3d8e4a284ecb814f7a3b11b2db0fe3af610896a05d7072f -size 35426 +oid sha256:62198a37ec11e9842df4a67d55c5b1bec2c5617a8dd04d029f52a460eb48ca2f +size 35428 diff --git a/causal_conv1d/results/combined_results.html b/causal_conv1d/results/combined_results.html index 6a99b42f98995858e618176be6ad4beb1b59c2c4..c8da75e6e0c9a84b90dae7b0b11ce3dd609a1739 100644 --- a/causal_conv1d/results/combined_results.html +++ b/causal_conv1d/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:05.716143 + 2025-11-10T22:12:01.020731 image/svg+xml @@ -4451,70 +4233,70 @@ body[data-tool="eraser"] .main-content { - + - + - 0.1 + 0.1 - + - + - 0.2 + 0.2 - + - + - 0.3 + 0.3 - + - + - 0.4 + 0.4 - + - + - 0.5 + 0.5 @@ -4522,66 +4304,66 @@ body[data-tool="eraser"] .main-content { - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + @@ -4640,7 +4422,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.43s +Cell: combine | 4.73s | Raw @@ -4733,7 +4515,7 @@ hf_kernels_causal_conv1d cuda_B2_D2048_S2048_W2 0.05 True hf_kernels_causal_conv1d cuda_B2_D2048_S2048_W4 0.05 True hf_kernels_causal_conv1d cuda_B2_D2048_S512_W2 0.05 True hf_kernels_causal_conv1d cuda_B2_D2048_S512_W4 0.05 True -hf_kernels_causal_conv1d cuda_B2_D64_S128_W2 0.05 True +hf_kernels_causal_conv1d cuda_B2_D64_S128_W2 0.04 True hf_kernels_causal_conv1d cuda_B2_D64_S128_W4 0.05 True hf_kernels_causal_conv1d cuda_B2_D64_S2048_W2 0.05 True hf_kernels_causal_conv1d cuda_B2_D64_S2048_W4 0.05 True @@ -4751,30 +4533,30 @@ hf_kernels_causal_conv1d cuda_B4_D64_S2048_W2 0.05 True hf_kernels_causal_conv1d cuda_B4_D64_S2048_W4 0.05 True hf_kernels_causal_conv1d cuda_B4_D64_S512_W2 0.05 True hf_kernels_causal_conv1d cuda_B4_D64_S512_W4 0.05 True -torch_eager cuda_B2_D2048_S128_W2 0.09 True +torch_eager cuda_B2_D2048_S128_W2 0.08 True torch_eager cuda_B2_D2048_S128_W4 0.08 True -torch_eager cuda_B2_D2048_S2048_W2 0.15 True +torch_eager cuda_B2_D2048_S2048_W2 0.16 True torch_eager cuda_B2_D2048_S2048_W4 0.16 True -torch_eager cuda_B2_D2048_S512_W2 0.09 True -torch_eager cuda_B2_D2048_S512_W4 0.09 True +torch_eager cuda_B2_D2048_S512_W2 0.08 True +torch_eager cuda_B2_D2048_S512_W4 0.08 True torch_eager cuda_B2_D64_S128_W2 0.07 True -torch_eager cuda_B2_D64_S128_W4 0.09 True -torch_eager cuda_B2_D64_S2048_W2 0.09 True -torch_eager cuda_B2_D64_S2048_W4 0.09 True -torch_eager cuda_B2_D64_S512_W2 0.09 True -torch_eager cuda_B2_D64_S512_W4 0.09 True -torch_eager cuda_B4_D2048_S128_W2 0.09 True -torch_eager cuda_B4_D2048_S128_W4 0.09 True -torch_eager cuda_B4_D2048_S2048_W2 0.49 True +torch_eager cuda_B2_D64_S128_W4 0.08 True +torch_eager cuda_B2_D64_S2048_W2 0.08 True +torch_eager cuda_B2_D64_S2048_W4 0.08 True +torch_eager cuda_B2_D64_S512_W2 0.08 True +torch_eager cuda_B2_D64_S512_W4 0.08 True +torch_eager cuda_B4_D2048_S128_W2 0.08 True +torch_eager cuda_B4_D2048_S128_W4 0.08 True +torch_eager cuda_B4_D2048_S2048_W2 0.48 True torch_eager cuda_B4_D2048_S2048_W4 0.50 True -torch_eager cuda_B4_D2048_S512_W2 0.10 True +torch_eager cuda_B4_D2048_S512_W2 0.09 True torch_eager cuda_B4_D2048_S512_W4 0.10 True -torch_eager cuda_B4_D64_S128_W2 0.09 True +torch_eager cuda_B4_D64_S128_W2 0.08 True torch_eager cuda_B4_D64_S128_W4 0.08 True -torch_eager cuda_B4_D64_S2048_W2 0.09 True -torch_eager cuda_B4_D64_S2048_W4 0.09 True -torch_eager cuda_B4_D64_S512_W2 0.09 True -torch_eager cuda_B4_D64_S512_W4 0.09 True +torch_eager cuda_B4_D64_S2048_W2 0.08 True +torch_eager cuda_B4_D64_S2048_W4 0.08 True +torch_eager cuda_B4_D64_S512_W2 0.08 True +torch_eager cuda_B4_D64_S512_W4 0.08 True GENERATING COMBINED VISUALIZATION @@ -4794,7 +4576,7 @@ Implementations included:
▶ UV Install Logs
@@ -4807,7 +4589,7 @@ Installed 37 packages in 238ms - 2025-10-31T20:14:05.716143 + 2025-11-10T22:12:01.020731 image/svg+xml @@ -5151,70 +4933,70 @@ Installed 37 packages in 238ms - + - + - 0.1 + 0.1 - + - + - 0.2 + 0.2 - + - + - 0.3 + 0.3 - + - + - 0.4 + 0.4 - + - + - 0.5 + 0.5 @@ -5222,66 +5004,66 @@ Installed 37 packages in 238ms - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + diff --git a/deformable_detr/impls/artifacts/benchmark/deformable_detr.jsonl b/deformable_detr/impls/artifacts/benchmark/deformable_detr.jsonl index 52c7930d88f40dd4da2a4cc2aa3b8068bb350deb..677a7c6d6ad24e77e848df2c0120e0c29c50a39d 100644 --- a/deformable_detr/impls/artifacts/benchmark/deformable_detr.jsonl +++ b/deformable_detr/impls/artifacts/benchmark/deformable_detr.jsonl @@ -1,4 +1,4 @@ -{"ts": "2025-10-31T20:13:50Z", "run": "c1c013d99d9f4c1199d0a550b8476fb2", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_Q100_H8_E256_L4_P4", "batch_size": 1, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.3733269999629556, "p50": 3.3932979999917734, "p90": 3.4002180000243243, "mean": 3.393551400040451, "iqr": 0.010580999969533877, "raw_times": [3.3896370000547904, 3.4002180000243243, 3.3932979999917734, 3.3733269999629556, 3.411277000168411], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.4049870000671945, "peak_bytes": 5929472, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} -{"ts": "2025-10-31T20:13:51Z", "run": "c1c013d99d9f4c1199d0a550b8476fb2", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_Q300_H8_E256_L4_P4", "batch_size": 1, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.99112300010529, "p50": 4.007804000139004, "p90": 4.020502999992459, "mean": 4.014501400024528, "iqr": 0.017490000118414173, "raw_times": [4.050064000011844, 4.020502999992459, 4.007804000139004, 4.003012999874045, 3.99112300010529], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.017783999870517, "peak_bytes": 15161856, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} -{"ts": "2025-10-31T20:13:51Z", "run": "c1c013d99d9f4c1199d0a550b8476fb2", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_Q100_H8_E256_L4_P4", "batch_size": 2, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 4.004662999932407, "p50": 4.020202999981848, "p90": 4.030714000009539, "mean": 4.022331200030749, "iqr": 0.011850999953821884, "raw_times": [4.018863000055717, 4.004662999932407, 4.0372130001742335, 4.020202999981848, 4.030714000009539], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.032904000041526, "peak_bytes": 11958784, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} -{"ts": "2025-10-31T20:13:52Z", "run": "c1c013d99d9f4c1199d0a550b8476fb2", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_Q300_H8_E256_L4_P4", "batch_size": 2, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 4.005022999990615, "p50": 4.020072999992408, "p90": 4.0240040000298904, "mean": 4.01746140000796, "iqr": 0.009850999958871398, "raw_times": [4.014153000071019, 4.005022999990615, 4.024053999955868, 4.0240040000298904, 4.020072999992408], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.024974000003567, "peak_bytes": 30977024, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} +{"ts": "2025-11-10T21:59:00Z", "run": "af78c748d8aa44afbf8c01edaace0f7f", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_Q100_H8_E256_L4_P4", "batch_size": 1, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.269501999966451, "p50": 3.2801430000404252, "p90": 3.3024029999637605, "mean": 3.289842799995313, "iqr": 0.02382099995656972, "raw_times": [3.3185839999987365, 3.2801430000404252, 3.269501999966451, 3.278582000007191, 3.3024029999637605], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.341974000022674, "peak_bytes": 5929472, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} +{"ts": "2025-11-10T21:59:00Z", "run": "af78c748d8aa44afbf8c01edaace0f7f", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_Q300_H8_E256_L4_P4", "batch_size": 1, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.9856040000358917, "p50": 4.010704999984682, "p90": 4.045005000023139, "mean": 4.019770599995809, "iqr": 0.05120100007616202, "raw_times": [4.045005000023139, 4.063734999988355, 4.010704999984682, 3.9856040000358917, 3.9938039999469765], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.026463999991847, "peak_bytes": 15161856, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} +{"ts": "2025-11-10T21:59:01Z", "run": "af78c748d8aa44afbf8c01edaace0f7f", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_Q100_H8_E256_L4_P4", "batch_size": 2, "num_queries": 100, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 4.021324000007098, "p50": 4.030125000042517, "p90": 4.037073999995755, "mean": 4.049654600009944, "iqr": 0.01115999998546613, "raw_times": [4.037073999995755, 4.021324000007098, 4.133835999994062, 4.030125000042517, 4.025914000010289], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.049624999993284, "peak_bytes": 11958784, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} +{"ts": "2025-11-10T21:59:01Z", "run": "af78c748d8aa44afbf8c01edaace0f7f", "impl": "torch_eager", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B2_Q300_H8_E256_L4_P4", "batch_size": 2, "num_queries": 300, "num_heads": 8, "embed_dim": 256, "num_levels": 4, "num_points": 4, "spatial_shapes": [[32, 32], [16, 16], [8, 8], [4, 4]], "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 4.128727000022536, "p50": 4.140276000043741, "p90": 4.142176000016207, "mean": 4.14041620002763, "iqr": 0.006619999965096213, "raw_times": [4.155346000004556, 4.140276000043741, 4.1355560000511105, 4.128727000022536, 4.142176000016207], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 4.139206999980161, "peak_bytes": 30977024, "ok": true, "absmax": 0.0, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax": 0.0, "mae": 0.0, "mse": 0.0, "ref": "deformable_detr_torch"}, "err": null} diff --git a/deformable_detr/impls/hf_kernels_deformable_detr.html b/deformable_detr/impls/hf_kernels_deformable_detr.html index 8203846442acfc0a17b0a7372d2971964aac9caf..b7268f57876c7d412631f5cb1017691850c16183 100644 --- a/deformable_detr/impls/hf_kernels_deformable_detr.html +++ b/deformable_detr/impls/hf_kernels_deformable_detr.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.23s +Cell: nv | 0.22s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.23s
-
Fri Oct 31 20:13:34 2025       
+
Mon Nov 10 21:58:17 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   43C    P0             83W /  350W |       0MiB /  46068MiB |     60%      Default |
+| N/A   28C    P0             79W /  350W |       0MiB /  46068MiB |     11%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.23s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 8.30s
+Cell: benchmark | 8.74s
  | 
 
 Raw
@@ -4221,24 +4003,24 @@ PROFILE TRACE: hf_kernels_deformable_detr | cuda_B1_Q100_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     195.201us       770.15%     195.201us     195.201us             1  
-                             hf_kernels_deformable_detr         7.43%     141.524us        99.61%       1.898ms       1.898ms       0.000us         0.00%      26.403us      26.403us             1  
-       _deformable_detr_57c3d32::ms_deform_attn_forward         3.93%      74.960us        92.19%       1.756ms     585.455us      22.464us        88.63%      26.403us       8.801us             3  
-void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      22.464us        88.63%      22.464us       7.488us             3  
-                                            aten::zeros         1.20%      22.800us        85.08%       1.621ms     540.337us       0.000us         0.00%       3.939us       1.313us             3  
-                                            aten::zero_         0.89%      16.910us        82.13%       1.565ms     521.590us       0.000us         0.00%       3.939us       1.313us             3  
-                                            aten::fill_         1.72%      32.820us        81.24%       1.548ms     515.953us       2.882us        11.37%       3.939us       1.313us             3  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.882us        11.37%       2.882us       0.961us             3  
-                                Activity Buffer Request        77.24%       1.472ms        77.24%       1.472ms       1.472ms       1.057us         4.17%       1.057us       1.057us             1  
-                                            aten::empty         1.76%      33.441us         1.76%      33.441us      11.147us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         3.19%      60.842us         3.19%      60.842us      10.140us       0.000us         0.00%       0.000us       0.000us             6  
-                                             aten::view         0.89%      16.922us         0.89%      16.922us       2.820us       0.000us         0.00%       0.000us       0.000us             6  
-                                           aten::select         1.13%      21.591us         1.37%      26.081us       8.694us       0.000us         0.00%       0.000us       0.000us             3  
-                                       aten::as_strided         0.24%       4.490us         0.24%       4.490us       1.497us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.39%       7.340us         0.39%       7.340us       7.340us       0.000us         0.00%       0.000us       0.000us             1  
+                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     189.823us       748.99%     189.823us     189.823us             1  
+                             hf_kernels_deformable_detr         6.28%     137.822us        99.65%       2.188ms       2.188ms       0.000us         0.00%      26.400us      26.400us             1  
+       _deformable_detr_57c3d32::ms_deform_attn_forward         3.04%      66.841us        93.38%       2.051ms     683.551us      22.496us        88.76%      26.400us       8.800us             3  
+void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      22.496us        88.76%      22.496us       7.499us             3  
+                                            aten::zeros         0.83%      18.191us        87.50%       1.922ms     640.537us       0.000us         0.00%       3.904us       1.301us             3  
+                                            aten::zero_         0.64%      14.160us        85.08%       1.868ms     622.823us       0.000us         0.00%       3.904us       1.301us             3  
+                                            aten::fill_         1.45%      31.860us        84.44%       1.854ms     618.103us       2.848us        11.24%       3.904us       1.301us             3  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.848us        11.24%       2.848us       0.949us             3  
+                                Activity Buffer Request        80.97%       1.778ms        80.97%       1.778ms       1.778ms       1.056us         4.17%       1.056us       1.056us             1  
+                                            aten::empty         1.59%      34.950us         1.59%      34.950us      11.650us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         2.83%      62.083us         2.83%      62.083us      10.347us       0.000us         0.00%       0.000us       0.000us             6  
+                                             aten::view         0.81%      17.870us         0.81%      17.870us       2.978us       0.000us         0.00%       0.000us       0.000us             6  
+                                           aten::select         1.01%      22.200us         1.21%      26.600us       8.867us       0.000us         0.00%       0.000us       0.000us             3  
+                                       aten::as_strided         0.20%       4.400us         0.20%       4.400us       1.467us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.35%       7.640us         0.35%       7.640us       7.640us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.905ms
-Self CUDA time total: 25.346us
+Self CPU time total: 2.196ms
+Self CUDA time total: 25.344us
 
 
 
@@ -4248,24 +4030,24 @@ PROFILE TRACE: hf_kernels_deformable_detr | cuda_B1_Q300_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     144.191us       546.22%     144.191us     144.191us             1  
-                             hf_kernels_deformable_detr         4.39%      75.912us        99.67%       1.722ms       1.722ms       0.000us         0.00%      27.358us      27.358us             1  
-       _deformable_detr_57c3d32::ms_deform_attn_forward         2.01%      34.700us        95.28%       1.646ms     548.647us      23.550us        89.21%      27.358us       9.119us             3  
-void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      23.550us        89.21%      23.550us       7.850us             3  
-                                            aten::zeros         0.49%       8.451us        91.07%       1.573ms     524.424us       0.000us         0.00%       3.808us       1.269us             3  
-                                            aten::zero_         0.50%       8.669us        89.54%       1.547ms     515.616us       0.000us         0.00%       3.808us       1.269us             3  
-                                            aten::fill_         1.60%      27.701us        89.04%       1.538ms     512.727us       2.848us        10.79%       3.808us       1.269us             3  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.848us        10.79%       2.848us       0.949us             3  
-                                Activity Buffer Request        85.90%       1.484ms        85.90%       1.484ms       1.484ms       0.960us         3.64%       0.960us       0.960us             1  
-                                            aten::empty         1.04%      17.971us         1.04%      17.971us       5.990us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         2.40%      41.442us         2.40%      41.442us       6.907us       0.000us         0.00%       0.000us       0.000us             6  
-                                             aten::view         0.54%       9.400us         0.54%       9.400us       1.567us       0.000us         0.00%       0.000us       0.000us             6  
-                                           aten::select         0.66%      11.329us         0.79%      13.720us       4.573us       0.000us         0.00%       0.000us       0.000us             3  
-                                       aten::as_strided         0.14%       2.391us         0.14%       2.391us       0.797us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.33%       5.680us         0.33%       5.680us       5.680us       0.000us         0.00%       0.000us       0.000us             1  
+                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     134.592us       507.36%     134.592us     134.592us             1  
+                             hf_kernels_deformable_detr         3.69%      73.590us        99.72%       1.986ms       1.986ms       0.000us         0.00%      27.456us      27.456us             1  
+       _deformable_detr_57c3d32::ms_deform_attn_forward         1.62%      32.200us        96.02%       1.913ms     637.550us      23.712us        89.38%      27.456us       9.152us             3  
+void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      23.712us        89.38%      23.712us       7.904us             3  
+                                            aten::zeros         0.41%       8.111us        92.57%       1.844ms     614.623us       0.000us         0.00%       3.744us       1.248us             3  
+                                            aten::zero_         0.44%       8.741us        91.34%       1.819ms     606.446us       0.000us         0.00%       3.744us       1.248us             3  
+                                            aten::fill_         1.32%      26.360us        90.90%       1.811ms     603.533us       2.816us        10.62%       3.744us       1.248us             3  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.816us        10.62%       2.816us       0.939us             3  
+                                Activity Buffer Request        88.30%       1.759ms        88.30%       1.759ms       1.759ms       0.928us         3.50%       0.928us       0.928us             1  
+                                            aten::empty         0.82%      16.420us         0.82%      16.420us       5.473us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         2.00%      39.862us         2.00%      39.862us       6.644us       0.000us         0.00%       0.000us       0.000us             6  
+                                             aten::view         0.45%       9.050us         0.45%       9.050us       1.508us       0.000us         0.00%       0.000us       0.000us             6  
+                                           aten::select         0.54%      10.840us         0.66%      13.190us       4.397us       0.000us         0.00%       0.000us       0.000us             3  
+                                       aten::as_strided         0.12%       2.350us         0.12%       2.350us       0.783us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.28%       5.611us         0.28%       5.611us       5.611us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.728ms
-Self CUDA time total: 26.398us
+Self CPU time total: 1.992ms
+Self CUDA time total: 26.528us
 
 
 
@@ -4275,24 +4057,24 @@ PROFILE TRACE: hf_kernels_deformable_detr | cuda_B2_Q100_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     140.288us       549.37%     140.288us     140.288us             1  
-                             hf_kernels_deformable_detr         4.34%      74.492us        99.67%       1.709ms       1.709ms       0.000us         0.00%      26.464us      26.464us             1  
-       _deformable_detr_57c3d32::ms_deform_attn_forward         1.96%      33.680us        95.32%       1.635ms     544.984us      22.752us        89.10%      26.464us       8.821us             3  
-void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      22.752us        89.10%      22.752us       7.584us             3  
-                                            aten::zeros         0.50%       8.650us        91.19%       1.564ms     521.367us       0.000us         0.00%       3.712us       1.237us             3  
-                                            aten::zero_         0.47%       8.130us        89.69%       1.538ms     512.773us       0.000us         0.00%       3.712us       1.237us             3  
-                                            aten::fill_         1.63%      27.881us        89.21%       1.530ms     510.063us       2.784us        10.90%       3.712us       1.237us             3  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.784us        10.90%       2.784us       0.928us             3  
-                                Activity Buffer Request        86.04%       1.476ms        86.04%       1.476ms       1.476ms       0.928us         3.63%       0.928us       0.928us             1  
-                                            aten::empty         1.00%      17.131us         1.00%      17.131us       5.710us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel         2.42%      41.510us         2.42%      41.510us       6.918us       0.000us         0.00%       0.000us       0.000us             6  
-                                             aten::view         0.52%       8.991us         0.52%       8.991us       1.498us       0.000us         0.00%       0.000us       0.000us             6  
-                                           aten::select         0.62%      10.681us         0.77%      13.291us       4.430us       0.000us         0.00%       0.000us       0.000us             3  
-                                       aten::as_strided         0.15%       2.610us         0.15%       2.610us       0.870us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.33%       5.730us         0.33%       5.730us       5.730us       0.000us         0.00%       0.000us       0.000us             1  
+                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     138.240us       537.98%     138.240us     138.240us             1  
+                             hf_kernels_deformable_detr         3.56%      70.651us        99.71%       1.981ms       1.981ms       0.000us         0.00%      26.624us      26.624us             1  
+       _deformable_detr_57c3d32::ms_deform_attn_forward         1.67%      33.240us        96.15%       1.910ms     636.753us      22.912us        89.17%      26.624us       8.875us             3  
+void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      22.912us        89.17%      22.912us       7.637us             3  
+                                            aten::zeros         0.41%       8.110us        92.55%       1.839ms     612.899us       0.000us         0.00%       3.712us       1.237us             3  
+                                            aten::zero_         0.40%       7.959us        91.32%       1.814ms     604.749us       0.000us         0.00%       3.712us       1.237us             3  
+                                            aten::fill_         1.22%      24.170us        90.92%       1.806ms     602.096us       2.784us        10.83%       3.712us       1.237us             3  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.784us        10.83%       2.784us       0.928us             3  
+                                Activity Buffer Request        88.35%       1.755ms        88.35%       1.755ms       1.755ms       0.928us         3.61%       0.928us       0.928us             1  
+                                            aten::empty         0.82%      16.340us         0.82%      16.340us       5.447us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel         2.09%      41.501us         2.09%      41.501us       6.917us       0.000us         0.00%       0.000us       0.000us             6  
+                                             aten::view         0.44%       8.661us         0.44%       8.661us       1.444us       0.000us         0.00%       0.000us       0.000us             6  
+                                           aten::select         0.62%      12.301us         0.75%      14.971us       4.990us       0.000us         0.00%       0.000us       0.000us             3  
+                                       aten::as_strided         0.13%       2.670us         0.13%       2.670us       0.890us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.29%       5.820us         0.29%       5.820us       5.820us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.715ms
-Self CUDA time total: 25.536us
+Self CPU time total: 1.987ms
+Self CUDA time total: 25.696us
 
 
 
@@ -4302,42 +4084,41 @@ PROFILE TRACE: hf_kernels_deformable_detr | cuda_B2_Q300_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     151.934us       322.76%     151.934us     151.934us             1  
-                             hf_kernels_deformable_detr         3.86%      74.313us        99.75%       1.919ms       1.919ms       0.000us         0.00%      48.129us      48.129us             1  
-       _deformable_detr_57c3d32::ms_deform_attn_forward         1.79%      34.420us        95.88%       1.844ms     614.769us      43.968us        93.40%      48.129us      16.043us             3  
-void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      43.968us        93.40%      43.968us      14.656us             3  
-                                            aten::zeros         0.45%       8.600us        92.03%       1.770ms     590.092us       0.000us         0.00%       4.161us       1.387us             3  
-                                            aten::zero_         0.45%       8.690us        90.72%       1.745ms     581.642us       0.000us         0.00%       4.161us       1.387us             3  
-                                            aten::fill_         1.44%      27.641us        90.26%       1.736ms     578.745us       3.105us         6.60%       4.161us       1.387us             3  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.105us         6.60%       3.105us       1.035us             3  
-                                Activity Buffer Request        76.84%       1.478ms        76.84%       1.478ms       1.478ms       1.056us         2.24%       1.056us       1.056us             1  
-                                            aten::empty         0.87%      16.750us         0.87%      16.750us       5.583us       0.000us         0.00%       0.000us       0.000us             3  
-                                       cudaLaunchKernel        12.74%     245.037us        12.74%     245.037us      40.839us       0.000us         0.00%       0.000us       0.000us             6  
-                                             aten::view         0.49%       9.420us         0.49%       9.420us       1.570us       0.000us         0.00%       0.000us       0.000us             6  
-                                           aten::select         0.66%      12.781us         0.82%      15.781us       5.260us       0.000us         0.00%       0.000us       0.000us             3  
-                                       aten::as_strided         0.16%       3.000us         0.16%       3.000us       1.000us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize         0.25%       4.890us         0.25%       4.890us       4.890us       0.000us         0.00%       0.000us       0.000us             1  
+                             hf_kernels_deformable_detr         0.00%       0.000us         0.00%       0.000us       0.000us     151.169us       321.37%     151.169us     151.169us             1  
+                             hf_kernels_deformable_detr         3.15%      71.770us        99.78%       2.275ms       2.275ms       0.000us         0.00%      48.031us      48.031us             1  
+       _deformable_detr_57c3d32::ms_deform_attn_forward         1.55%      35.341us        96.63%       2.204ms     734.529us      44.000us        93.54%      48.031us      16.010us             3  
+void ms_deformable_im2col_gpu_kernel<float>(int, flo...         0.00%       0.000us         0.00%       0.000us       0.000us      44.000us        93.54%      44.000us      14.667us             3  
+                                            aten::zeros         0.38%       8.571us        93.48%       2.132ms     710.555us       0.000us         0.00%       4.031us       1.344us             3  
+                                            aten::zero_         0.42%       9.580us        92.38%       2.107ms     702.221us       0.000us         0.00%       4.031us       1.344us             3  
+                                            aten::fill_         1.16%      26.560us        91.96%       2.097ms     699.028us       3.039us         6.46%       4.031us       1.344us             3  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.039us         6.46%       3.039us       1.013us             3  
+                                Activity Buffer Request        80.85%       1.844ms        80.85%       1.844ms       1.844ms       0.992us         2.11%       0.992us       0.992us             1  
+                                            aten::empty         0.72%      16.430us         0.72%      16.430us       5.477us       0.000us         0.00%       0.000us       0.000us             3  
+                                       cudaLaunchKernel        10.56%     240.915us        10.56%     240.915us      40.153us       0.000us         0.00%       0.000us       0.000us             6  
+                                             aten::view         0.41%       9.238us         0.41%       9.238us       1.540us       0.000us         0.00%       0.000us       0.000us             6  
+                                           aten::select         0.48%      10.832us         0.58%      13.262us       4.421us       0.000us         0.00%       0.000us       0.000us             3  
+                                       aten::as_strided         0.11%       2.430us         0.11%       2.430us       0.810us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize         0.22%       4.990us         0.22%       4.990us       4.990us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.924ms
-Self CUDA time total: 47.073us
+Self CPU time total: 2.280ms
+Self CUDA time total: 47.039us
 
 
 impl                     wl                  p50(ms)  ok
-hf_kernels_deformable_detr cuda_B1_Q100_H8_E256_L4_P4     0.04  True
-hf_kernels_deformable_detr cuda_B1_Q300_H8_E256_L4_P4     0.05  True
-hf_kernels_deformable_detr cuda_B2_Q100_H8_E256_L4_P4     0.05  True
+hf_kernels_deformable_detr cuda_B1_Q100_H8_E256_L4_P4     0.03  True
+hf_kernels_deformable_detr cuda_B1_Q300_H8_E256_L4_P4     0.04  True
+hf_kernels_deformable_detr cuda_B2_Q100_H8_E256_L4_P4     0.04  True
 hf_kernels_deformable_detr cuda_B2_Q300_H8_E256_L4_P4     0.05  True
 
▶ UV Install Logs
Fetching 7 files: 0%| | 0/7 [00:00<?, ?it/s] -Fetching 7 files: 14%|█▍ | 1/7 [00:00<00:00, 6.20it/s] -Fetching 7 files: 71%|███████▏ | 5/7 [00:00<00:00, 9.26it/s] -Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 12.59it/s]
+Fetching 7 files: 71%|███████▏ | 5/7 [00:00<00:00, 9.96it/s] +Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 13.94it/s]

Artifacts:

deformable_detr.jsonl diff --git a/deformable_detr/impls/torch_deformable_detr.html b/deformable_detr/impls/torch_deformable_detr.html index 1d330b066f83130623802310ab8c5a5ceec69b71..540bf4b2800b0db3464f9bb9942f043405677bee 100644 --- a/deformable_detr/impls/torch_deformable_detr.html +++ b/deformable_detr/impls/torch_deformable_detr.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.23s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.23s
-
Fri Oct 31 20:13:34 2025       
+
Mon Nov 10 21:58:17 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   43C    P0             83W /  350W |       0MiB /  46068MiB |     60%      Default |
+| N/A   28C    P0             79W /  350W |       0MiB /  46068MiB |     11%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4155,7 +3937,7 @@ Cell: nv | 0.23s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 5.33s
+Cell: benchmark | 5.50s
  | 
 
 Raw
@@ -4295,29 +4077,29 @@ PROFILE TRACE: torch_eager | cuda_B1_Q100_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      20.095ms      1353.99%      20.095ms      20.095ms             1  
-                                            torch_eager        21.57%       4.703ms        99.97%      21.796ms      21.796ms       0.000us         0.00%       1.485ms       1.485ms             1  
-                                            aten::index         4.62%       1.006ms        16.78%       3.660ms      76.241us     237.342us        15.99%     371.712us       7.744us            48  
-                                            aten::copy_         4.87%       1.061ms        11.32%       2.469ms      11.275us     365.385us        24.62%     365.385us       1.668us           219  
-                                              aten::mul         5.80%       1.265ms         9.92%       2.163ms      11.267us     294.264us        19.83%     294.264us       1.533us           192  
-void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     237.342us        15.99%     237.342us       4.945us            48  
-                                               aten::to         0.67%     145.268us        11.20%       2.441ms      14.275us       0.000us         0.00%     231.015us       1.351us           171  
-                                         aten::_to_copy         2.25%     489.538us        10.53%       2.296ms      18.665us       0.000us         0.00%     231.015us       1.878us           123  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     202.558us        13.65%     202.558us       1.688us           120  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     167.074us        11.26%     167.074us       1.989us            84  
-                                       aten::contiguous         0.40%      86.639us         8.70%       1.898ms      19.769us       0.000us         0.00%     134.370us       1.400us            96  
-                                            aten::clone         0.85%     185.683us         8.31%       1.811ms      18.866us       0.000us         0.00%     134.370us       1.400us            96  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     134.370us         9.05%     134.370us       1.400us            96  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     115.390us         7.77%     115.390us       1.202us            96  
-                                          aten::__and__         0.63%     137.184us         4.49%     979.904us      11.666us       0.000us         0.00%     100.670us       1.198us            84  
-                                      aten::bitwise_and         2.39%     521.552us         3.87%     842.720us      10.032us     100.670us         6.78%     100.670us       1.198us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     100.670us         6.78%     100.670us       1.198us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      85.858us         5.78%      85.858us       1.192us            72  
-                                              aten::sub         2.24%     488.685us         3.68%     801.476us      11.132us      78.884us         5.32%      78.884us       1.096us            72  
-                                              aten::add         1.55%     338.597us         2.59%     564.753us       9.413us      74.082us         4.99%      74.082us       1.235us            60  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      20.386ms      1374.87%      20.386ms      20.386ms             1  
+                                            torch_eager        20.04%       4.485ms        99.97%      22.369ms      22.369ms       0.000us         0.00%       1.484ms       1.484ms             1  
+                                            aten::index         4.49%       1.004ms        16.23%       3.633ms      75.679us     237.283us        16.00%     370.795us       7.725us            48  
+                                            aten::copy_         4.62%       1.034ms        11.24%       2.516ms      11.489us     365.611us        24.66%     365.611us       1.669us           219  
+                                              aten::mul         5.81%       1.299ms        10.43%       2.335ms      12.160us     293.820us        19.82%     293.820us       1.530us           192  
+void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     237.283us        16.00%     237.283us       4.943us            48  
+                                               aten::to         0.57%     127.097us        11.08%       2.479ms      14.499us       0.000us         0.00%     232.099us       1.357us           171  
+                                         aten::_to_copy         2.30%     514.876us        10.51%       2.352ms      19.124us       0.000us         0.00%     232.099us       1.887us           123  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     202.015us        13.62%     202.015us       1.683us           120  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     167.684us        11.31%     167.684us       1.996us            84  
+                                       aten::contiguous         0.35%      77.804us         8.37%       1.873ms      19.513us       0.000us         0.00%     133.512us       1.391us            96  
+                                            aten::clone         0.74%     165.226us         8.02%       1.795ms      18.702us       0.000us         0.00%     133.512us       1.391us            96  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     133.512us         9.00%     133.512us       1.391us            96  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     115.524us         7.79%     115.524us       1.203us            96  
+                                          aten::__and__         1.20%     268.284us         4.94%       1.105ms      13.160us       0.000us         0.00%      99.070us       1.179us            84  
+                                      aten::bitwise_and         2.22%     496.516us         3.74%     837.149us       9.966us      99.070us         6.68%      99.070us       1.179us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      99.070us         6.68%      99.070us       1.179us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      86.210us         5.81%      86.210us       1.197us            72  
+                                              aten::sub         2.17%     485.693us         3.77%     844.019us      11.722us      79.300us         5.35%      79.300us       1.101us            72  
+                                              aten::add         1.71%     382.016us         2.87%     642.388us      10.706us      74.367us         5.02%      74.367us       1.239us            60  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 21.803ms
-Self CUDA time total: 1.484ms
+Self CPU time total: 22.377ms
+Self CUDA time total: 1.483ms
 
 
 
@@ -4327,29 +4109,29 @@ PROFILE TRACE: torch_eager | cuda_B1_Q300_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      18.852ms      1182.31%      18.852ms      18.852ms             1  
-                                            torch_eager        20.99%       4.304ms        99.97%      20.495ms      20.495ms       0.000us         0.00%       1.595ms       1.595ms             1  
-                                            aten::index         4.61%     945.020us        16.80%       3.444ms      71.750us     251.167us        15.75%     382.850us       7.976us            48  
-                                            aten::copy_         5.04%       1.033ms        11.78%       2.414ms      11.023us     364.991us        22.89%     364.991us       1.667us           219  
-                                              aten::mul         5.94%       1.218ms        10.22%       2.095ms      10.911us     359.138us        22.52%     359.138us       1.871us           192  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     267.618us        16.78%     267.618us       2.230us           120  
-void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     251.167us        15.75%     251.167us       5.233us            48  
-                                               aten::to         0.59%     120.975us        11.17%       2.290ms      13.390us       0.000us         0.00%     233.308us       1.364us           171  
-                                         aten::_to_copy         2.01%     411.895us        10.58%       2.169ms      17.632us       0.000us         0.00%     233.308us       1.897us           123  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     168.797us        10.59%     168.797us       2.009us            84  
-                                       aten::contiguous         0.41%      84.261us         8.87%       1.818ms      18.936us       0.000us         0.00%     131.683us       1.372us            96  
-                                            aten::clone         0.84%     172.318us         8.46%       1.734ms      18.058us       0.000us         0.00%     131.683us       1.372us            96  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     131.683us         8.26%     131.683us       1.372us            96  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     118.123us         7.41%     118.123us       1.230us            96  
-                                          aten::__and__         0.40%      81.276us         4.41%     903.196us      10.752us       0.000us         0.00%     104.833us       1.248us            84  
-                                      aten::bitwise_and         2.46%     504.088us         4.01%     821.920us       9.785us     104.833us         6.57%     104.833us       1.248us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.833us         6.57%     104.833us       1.248us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.190us         6.53%     104.190us       1.447us            72  
-                                              aten::add         1.62%     331.582us         2.72%     557.857us       9.298us      91.491us         5.74%      91.491us       1.525us            60  
-                                              aten::sub         2.17%     445.533us         3.70%     758.959us      10.541us      80.509us         5.05%      80.509us       1.118us            72  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      18.901ms      1183.82%      18.901ms      18.901ms             1  
+                                            torch_eager        19.58%       4.093ms        99.97%      20.894ms      20.894ms       0.000us         0.00%       1.598ms       1.598ms             1  
+                                            aten::index         4.47%     934.204us        16.39%       3.425ms      71.358us     251.679us        15.76%     384.126us       8.003us            48  
+                                            aten::copy_         4.82%       1.008ms        11.62%       2.429ms      11.090us     366.752us        22.97%     366.752us       1.675us           219  
+                                              aten::mul         6.02%       1.258ms        10.56%       2.208ms      11.499us     358.660us        22.46%     358.660us       1.868us           192  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     266.913us        16.72%     266.913us       2.224us           120  
+void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     251.679us        15.76%     251.679us       5.243us            48  
+                                               aten::to         0.53%     111.534us        10.80%       2.257ms      13.199us       0.000us         0.00%     234.305us       1.370us           171  
+                                         aten::_to_copy         1.86%     389.526us        10.27%       2.146ms      17.443us       0.000us         0.00%     234.305us       1.905us           123  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     169.699us        10.63%     169.699us       2.020us            84  
+                                       aten::contiguous         0.36%      76.248us         8.65%       1.808ms      18.835us       0.000us         0.00%     132.447us       1.380us            96  
+                                            aten::clone         0.75%     157.022us         8.29%       1.732ms      18.040us       0.000us         0.00%     132.447us       1.380us            96  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     132.447us         8.30%     132.447us       1.380us            96  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     117.700us         7.37%     117.700us       1.226us            96  
+                                          aten::__and__         0.39%      80.574us         4.34%     907.528us      10.804us       0.000us         0.00%     104.931us       1.249us            84  
+                                      aten::bitwise_and         2.39%     499.734us         3.96%     826.954us       9.845us     104.931us         6.57%     104.931us       1.249us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.931us         6.57%     104.931us       1.249us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.254us         6.53%     104.254us       1.448us            72  
+                                              aten::add         1.76%     366.940us         2.98%     622.302us      10.372us      91.679us         5.74%      91.679us       1.528us            60  
+                                              aten::sub         2.26%     472.751us         3.91%     817.040us      11.348us      80.412us         5.04%      80.412us       1.117us            72  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 20.501ms
-Self CUDA time total: 1.595ms
+Self CPU time total: 20.900ms
+Self CUDA time total: 1.597ms
 
 
 
@@ -4359,29 +4141,29 @@ PROFILE TRACE: torch_eager | cuda_B2_Q100_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      18.792ms      1222.95%      18.792ms      18.792ms             1  
-                                            torch_eager        21.02%       4.299ms        99.97%      20.449ms      20.449ms       0.000us         0.00%       1.538ms       1.538ms             1  
-                                            aten::index         4.62%     944.347us        16.78%       3.432ms      71.497us     243.904us        15.87%     378.785us       7.891us            48  
-                                            aten::copy_         5.14%       1.051ms        11.72%       2.396ms      10.942us     368.961us        24.01%     368.961us       1.685us           219  
-                                              aten::mul         5.96%       1.219ms        10.23%       2.092ms      10.898us     325.334us        21.17%     325.334us       1.694us           192  
-void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     243.904us        15.87%     243.904us       5.081us            48  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     234.457us        15.26%     234.457us       1.954us           120  
-                                               aten::to         0.61%     125.558us        11.02%       2.255ms      13.184us       0.000us         0.00%     234.080us       1.369us           171  
-                                         aten::_to_copy         1.92%     392.900us        10.41%       2.129ms      17.309us       0.000us         0.00%     234.080us       1.903us           123  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     169.246us        11.01%     169.246us       2.015us            84  
-                                       aten::contiguous         0.42%      85.559us         8.81%       1.802ms      18.772us       0.000us         0.00%     134.881us       1.405us            96  
-                                            aten::clone         0.80%     164.449us         8.39%       1.717ms      17.880us       0.000us         0.00%     134.881us       1.405us            96  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     134.881us         8.78%     134.881us       1.405us            96  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     115.650us         7.53%     115.650us       1.205us            96  
-                                          aten::__and__         0.39%      78.814us         4.36%     891.116us      10.609us       0.000us         0.00%     101.539us       1.209us            84  
-                                      aten::bitwise_and         2.44%     499.687us         3.97%     812.302us       9.670us     101.539us         6.61%     101.539us       1.209us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     101.539us         6.61%     101.539us       1.209us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      96.065us         6.25%      96.065us       1.334us            72  
-                                              aten::add         1.62%     331.717us         2.71%     554.333us       9.239us      83.900us         5.46%      83.900us       1.398us            60  
-                                              aten::sub         2.21%     451.413us         3.69%     755.537us      10.494us      79.361us         5.16%      79.361us       1.102us            72  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      19.237ms      1248.03%      19.237ms      19.237ms             1  
+                                            torch_eager        19.69%       4.158ms        99.97%      21.112ms      21.112ms       0.000us         0.00%       1.542ms       1.542ms             1  
+                                            aten::index         4.41%     930.777us        16.28%       3.439ms      71.641us     244.707us        15.88%     379.074us       7.897us            48  
+                                            aten::copy_         4.79%       1.012ms        11.88%       2.509ms      11.455us     367.613us        23.85%     367.613us       1.679us           219  
+                                              aten::mul         6.03%       1.274ms        10.79%       2.279ms      11.869us     324.897us        21.08%     324.897us       1.692us           192  
+void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     244.707us        15.88%     244.707us       5.098us            48  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     233.822us        15.17%     233.822us       1.949us           120  
+                                               aten::to         0.53%     111.710us        11.01%       2.324ms      13.591us       0.000us         0.00%     233.246us       1.364us           171  
+                                         aten::_to_copy         1.89%     399.701us        10.48%       2.212ms      17.986us       0.000us         0.00%     233.246us       1.896us           123  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     168.798us        10.95%     168.798us       2.010us            84  
+                                       aten::contiguous         0.36%      76.215us         8.56%       1.808ms      18.834us       0.000us         0.00%     134.367us       1.400us            96  
+                                            aten::clone         0.70%     147.727us         8.20%       1.732ms      18.040us       0.000us         0.00%     134.367us       1.400us            96  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     134.367us         8.72%     134.367us       1.400us            96  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     116.097us         7.53%     116.097us       1.209us            96  
+                                          aten::__and__         0.38%      80.351us         4.40%     929.654us      11.067us       0.000us         0.00%     104.257us       1.241us            84  
+                                      aten::bitwise_and         2.34%     493.964us         4.02%     849.303us      10.111us     104.257us         6.76%     104.257us       1.241us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.257us         6.76%     104.257us       1.241us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      96.124us         6.24%      96.124us       1.335us            72  
+                                              aten::add         1.63%     344.862us         2.97%     627.717us      10.462us      83.898us         5.44%      83.898us       1.398us            60  
+                                              aten::sub         2.25%     476.045us         3.91%     826.060us      11.473us      79.295us         5.14%      79.295us       1.101us            72  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 20.454ms
-Self CUDA time total: 1.537ms
+Self CPU time total: 21.118ms
+Self CUDA time total: 1.541ms
 
 
 
@@ -4391,36 +4173,36 @@ PROFILE TRACE: torch_eager | cuda_B2_Q300_H8_E256_L4_P4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      19.115ms      1086.36%      19.115ms      19.115ms             1  
-                                            torch_eager        21.90%       4.346ms        99.98%      19.842ms      19.842ms       0.000us         0.00%       1.761ms       1.761ms             1  
-                                              aten::mul         6.18%       1.226ms        10.60%       2.104ms      10.960us     450.887us        25.63%     450.887us       2.348us           192  
-                                            aten::index         4.92%     977.403us        17.78%       3.530ms      73.537us     282.433us        16.05%     420.451us       8.759us            48  
-                                            aten::copy_         5.20%       1.031ms        12.05%       2.392ms      10.922us     372.637us        21.18%     372.637us       1.702us           219  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     357.955us        20.34%     357.955us       2.983us           120  
-void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     282.433us        16.05%     282.433us       5.884us            48  
-                                               aten::to         0.65%     128.684us        11.66%       2.315ms      13.536us       0.000us         0.00%     234.619us       1.372us           171  
-                                         aten::_to_copy         2.23%     442.466us        11.01%       2.186ms      17.772us       0.000us         0.00%     234.619us       1.907us           123  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     170.397us         9.68%     170.397us       2.029us            84  
-                                       aten::contiguous         0.44%      87.582us         9.26%       1.837ms      19.140us       0.000us         0.00%     138.018us       1.438us            96  
-                                            aten::clone         0.85%     168.452us         8.82%       1.750ms      18.228us       0.000us         0.00%     138.018us       1.438us            96  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     138.018us         7.84%     138.018us       1.438us            96  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     129.055us         7.33%     129.055us       1.792us            72  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     117.244us         6.66%     117.244us       1.221us            96  
-                                              aten::add         1.68%     334.180us         2.81%     557.305us       9.288us     113.660us         6.46%     113.660us       1.894us            60  
-                                          aten::__and__         0.41%      80.800us         4.55%     902.601us      10.745us       0.000us         0.00%     105.726us       1.259us            84  
-                                      aten::bitwise_and         2.56%     508.561us         4.14%     821.801us       9.783us     105.726us         6.01%     105.726us       1.259us            84  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     105.726us         6.01%     105.726us       1.259us            84  
-                                              aten::sub         2.25%     446.108us         3.80%     754.277us      10.476us      82.273us         4.68%      82.273us       1.143us            72  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us      19.519ms      1100.37%      19.519ms      19.519ms             1  
+                                            torch_eager        20.47%       4.142ms        99.97%      20.229ms      20.229ms       0.000us         0.00%       1.775ms       1.775ms             1  
+                                              aten::mul         6.23%       1.261ms        11.26%       2.279ms      11.871us     452.223us        25.49%     452.223us       2.355us           192  
+                                            aten::index         5.19%       1.050ms        17.90%       3.622ms      75.460us     284.479us        16.04%     422.205us       8.796us            48  
+                                            aten::copy_         4.94%       1.000ms        12.35%       2.500ms      11.414us     371.807us        20.96%     371.807us       1.698us           219  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     357.379us        20.15%     357.379us       2.978us           120  
+void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     284.479us        16.04%     284.479us       5.927us            48  
+                                               aten::to         0.55%     111.602us        11.50%       2.327ms      13.611us       0.000us         0.00%     234.081us       1.369us           171  
+                                         aten::_to_copy         2.05%     415.176us        10.95%       2.216ms      18.015us       0.000us         0.00%     234.081us       1.903us           123  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us     168.127us         9.48%     168.127us       2.002us            84  
+                                       aten::contiguous         0.39%      79.104us         9.03%       1.827ms      19.029us       0.000us         0.00%     137.726us       1.435us            96  
+                                            aten::clone         0.75%     151.809us         8.64%       1.748ms      18.205us       0.000us         0.00%     137.726us       1.435us            96  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     137.726us         7.76%     137.726us       1.435us            96  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     129.254us         7.29%     129.254us       1.795us            72  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     120.034us         6.77%     120.034us       1.250us            96  
+                                              aten::add         1.70%     344.853us         3.02%     611.127us      10.185us     113.603us         6.40%     113.603us       1.893us            60  
+                                          aten::__and__         0.42%      84.251us         4.73%     957.185us      11.395us       0.000us         0.00%     108.833us       1.296us            84  
+                                      aten::bitwise_and         2.53%     511.745us         4.31%     872.934us      10.392us     108.833us         6.14%     108.833us       1.296us            84  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     108.833us         6.14%     108.833us       1.296us            84  
+                                              aten::sub         2.33%     472.119us         4.10%     828.789us      11.511us      84.547us         4.77%      84.547us       1.174us            72  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 19.847ms
-Self CUDA time total: 1.760ms
+Self CPU time total: 20.235ms
+Self CUDA time total: 1.774ms
 
 
 impl                     wl                  p50(ms)  ok
-torch_eager              cuda_B1_Q100_H8_E256_L4_P4     3.39  True
+torch_eager              cuda_B1_Q100_H8_E256_L4_P4     3.28  True
 torch_eager              cuda_B1_Q300_H8_E256_L4_P4     4.01  True
-torch_eager              cuda_B2_Q100_H8_E256_L4_P4     4.02  True
-torch_eager              cuda_B2_Q300_H8_E256_L4_P4     4.02  True
+torch_eager              cuda_B2_Q100_H8_E256_L4_P4     4.03  True
+torch_eager              cuda_B2_Q300_H8_E256_L4_P4     4.14  True
 

Artifacts:

diff --git a/deformable_detr/results/artifacts/combine/latency.svg b/deformable_detr/results/artifacts/combine/latency.svg index cfe61b52935bc93cabc302ceb7b7fc02981aa5f7..7ecdecb8f18da4b88afe110c2e3a302d7ba3b552 100644 --- a/deformable_detr/results/artifacts/combine/latency.svg +++ b/deformable_detr/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b38828b5c85834f31812d3f314ebdc3cc2e8481610a6d31b84a4f9b0ad78c0f2 -size 17800 +oid sha256:695c4932ff6f8af1541de47b9136d11cce1234f978728ac618aa9e13e86e4875 +size 14867 diff --git a/deformable_detr/results/combined_results.html b/deformable_detr/results/combined_results.html index a985624a2d9079877fe0cd1dcdefc5494402713c..ec99a4cbffe2279f2769dc3e53fbff9647757bd5 100644 --- a/deformable_detr/results/combined_results.html +++ b/deformable_detr/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:23.345627 + 2025-11-10T22:11:56.387573 image/svg+xml @@ -4126,260 +3908,208 @@ body[data-tool="eraser"] .main-content { - + - + - + - cuda_B1_Q100_H8_E256_L4_P4 + cuda_B1_Q100_H8_E256_L4_P4 - + - + - cuda_B1_Q300_H8_E256_L4_P4 + cuda_B1_Q300_H8_E256_L4_P4 - + - + - cuda_B2_Q100_H8_E256_L4_P4 + cuda_B2_Q100_H8_E256_L4_P4 - + - + - cuda_B2_Q300_H8_E256_L4_P4 + cuda_B2_Q300_H8_E256_L4_P4 - Workload + Workload - + - + - 0.0 + 0 - + - + - 0.5 + 1 - + - + - 1.0 + 2 - + - + - 1.5 + 3 - + - + - 2.0 - - - - - - - - - - - - - 2.5 - - - - - - - - - - - - - 3.0 - - - - - - - - - - - - - 3.5 - - - - - - - - - - - - - 4.0 + 4 - Latency P50 (ms) + Latency P50 (ms) - + - - - - - + + + + + - + - - - - - + + + + + - + - + - + - - Attention Implementation Latency + + Attention Implementation Latency - + - - + + - + - hf_kernels_deformable_detr + hf_kernels_deformable_detr - - + + - + - torch_eager + torch_eager - - + + @@ -4392,7 +4122,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.34s +Cell: combine | 4.45s | Raw @@ -4479,14 +4209,14 @@ Summary: 2 found, 0 skipped, 0 missing COMBINED BENCHMARK SUMMARY impl wl p50(ms) ok -hf_kernels_deformable_detr cuda_B1_Q100_H8_E256_L4_P4 0.04 True -hf_kernels_deformable_detr cuda_B1_Q300_H8_E256_L4_P4 0.05 True -hf_kernels_deformable_detr cuda_B2_Q100_H8_E256_L4_P4 0.05 True +hf_kernels_deformable_detr cuda_B1_Q100_H8_E256_L4_P4 0.03 True +hf_kernels_deformable_detr cuda_B1_Q300_H8_E256_L4_P4 0.04 True +hf_kernels_deformable_detr cuda_B2_Q100_H8_E256_L4_P4 0.04 True hf_kernels_deformable_detr cuda_B2_Q300_H8_E256_L4_P4 0.05 True -torch_eager cuda_B1_Q100_H8_E256_L4_P4 3.39 True +torch_eager cuda_B1_Q100_H8_E256_L4_P4 3.28 True torch_eager cuda_B1_Q300_H8_E256_L4_P4 4.01 True -torch_eager cuda_B2_Q100_H8_E256_L4_P4 4.02 True -torch_eager cuda_B2_Q300_H8_E256_L4_P4 4.02 True +torch_eager cuda_B2_Q100_H8_E256_L4_P4 4.03 True +torch_eager cuda_B2_Q300_H8_E256_L4_P4 4.14 True GENERATING COMBINED VISUALIZATION @@ -4506,7 +4236,7 @@ Implementations included:
▶ UV Install Logs
@@ -4519,7 +4249,7 @@ Installed 37 packages in 216ms - 2025-10-31T20:14:23.345627 + 2025-11-10T22:11:56.387573 image/svg+xml @@ -4538,260 +4268,208 @@ Installed 37 packages in 216ms - + - + - + - cuda_B1_Q100_H8_E256_L4_P4 + cuda_B1_Q100_H8_E256_L4_P4 - + - + - cuda_B1_Q300_H8_E256_L4_P4 + cuda_B1_Q300_H8_E256_L4_P4 - + - + - cuda_B2_Q100_H8_E256_L4_P4 + cuda_B2_Q100_H8_E256_L4_P4 - + - + - cuda_B2_Q300_H8_E256_L4_P4 + cuda_B2_Q300_H8_E256_L4_P4 - Workload + Workload - + - + - 0.0 + 0 - + - + - 0.5 + 1 - + - + - 1.0 + 2 - + - + - 1.5 + 3 - + - + - 2.0 - - - - - - - - - - - - - 2.5 - - - - - - - - - - - - - 3.0 - - - - - - - - - - - - - 3.5 - - - - - - - - - - - - - 4.0 + 4 - Latency P50 (ms) + Latency P50 (ms) - + - - - - - + + + + + - + - - - - - + + + + + - + - + - + - - Attention Implementation Latency + + Attention Implementation Latency - + - - + + - + - hf_kernels_deformable_detr + hf_kernels_deformable_detr - - + + - + - torch_eager + torch_eager - - + + diff --git a/flash_attn/impls/artifacts/benchmark/attention.jsonl b/flash_attn/impls/artifacts/benchmark/attention.jsonl index d381f496ddfa4abddae090de1e302f3856ab3fc4..995c497b8b178232e826548029cfd2170a075a08 100644 --- a/flash_attn/impls/artifacts/benchmark/attention.jsonl +++ b/flash_attn/impls/artifacts/benchmark/attention.jsonl @@ -1,6 +1,6 @@ -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L128_bfloat16", "batch": 1, "seq_len": 4224, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.208432000112225, "p50": 1.215130999980829, "p90": 1.2198710001030122, "mean": 1.215487200033749, "iqr": 0.006680000069536618, "raw_times": [1.2208109999392036, 1.208432000112225, 1.2198710001030122, 1.2131910000334756, 1.215130999980829], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2240119999660237, "peak_bytes": 295567360, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null} -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L256_bfloat16", "batch": 1, "seq_len": 4352, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.26713200006634, "p50": 1.2766830000146001, "p90": 1.277253000125711, "mean": 1.2749268000789016, "iqr": 0.004750000016429112, "raw_times": [1.277253000125711, 1.26713200006634, 1.2766830000146001, 1.281063000078575, 1.2725030001092819], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2717629999769997, "peak_bytes": 304742400, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null} -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L320_bfloat16", "batch": 1, "seq_len": 4416, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.2928539999847999, "p50": 1.3003640001443273, "p90": 1.3163240000721999, "mean": 1.3067478000721167, "iqr": 0.01689100008661626, "raw_times": [1.3003640001443273, 1.2928539999847999, 1.2994329999855836, 1.3163240000721999, 1.3247640001736727], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.3026630001604644, "peak_bytes": 307494912, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null} -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L384_bfloat16", "batch": 1, "seq_len": 4480, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.3232850001259067, "p50": 1.3295650001055037, "p90": 1.3361950000216893, "mean": 1.332684600038192, "iqr": 0.007890999995652237, "raw_times": [1.328304000026037, 1.3361950000216893, 1.3295650001055037, 1.3232850001259067, 1.3460739999118232], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.3245140000890387, "peak_bytes": 311296000, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null} -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L448_bfloat16", "batch": 1, "seq_len": 4544, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.4790479999646777, "p50": 1.4950690001569455, "p90": 1.4989779999723396, "mean": 1.4914904000306706, "iqr": 0.017840000055002747, "raw_times": [1.5032190001420531, 1.4950690001569455, 1.4790479999646777, 1.4811379999173369, 1.4989779999723396], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.5107090000583412, "peak_bytes": 315621376, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.000354766845703125, "mse": 2.7418136596679688e-06, "ref": "sdpa_math_fp32"}, "err": null} -{"ts": "2025-10-31T20:13:46Z", "run": "0cb1af490a594cbca21d4dd4012a3c10", "impl": "torch_flash_ma", "tags": {"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, "wl": {"name": "cuda_attn_L512_bfloat16", "batch": 1, "seq_len": 4608, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.511368999899787, "p50": 1.5117090001695033, "p90": 1.512698999931672, "mean": 1.516499199988175, "iqr": 0.00113999999484804, "raw_times": [1.511368999899787, 1.512698999931672, 1.5117090001695033, 1.511558999936824, 1.5351600000030885], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.5183190000698232, "peak_bytes": 319946752, "ok": true, "absmax": 0.125, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.125, "mae": 0.0003566741943359375, "mse": 2.7567148208618164e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:46Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L128_bfloat16", "batch": 1, "seq_len": 4224, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.9336540001640969, "p50": 0.938484000016615, "p90": 0.9400730000379554, "mean": 0.9383200000229408, "iqr": 0.00204800016945228, "raw_times": [0.9413640000275336, 0.938484000016615, 0.9380249998685031, 0.9400730000379554, 0.9336540001640969], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.9436739999273414, "peak_bytes": 295567360, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.0003604888916015625, "mse": 2.8014183044433594e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:46Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L256_bfloat16", "batch": 1, "seq_len": 4352, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.9720749999360123, "p50": 0.9796540000479581, "p90": 0.9886739999274141, "mean": 0.9813904000111506, "iqr": 0.011588999768719077, "raw_times": [0.9894639999856736, 0.9720749999360123, 0.9886739999274141, 0.977085000158695, 0.9796540000479581], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.9749249998094456, "peak_bytes": 304742400, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00035858154296875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:46Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L320_bfloat16", "batch": 1, "seq_len": 4416, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.026366000132839, "p50": 1.0466650001035305, "p90": 1.048316000151317, "mean": 1.0439156000302319, "iqr": 0.012310000329307513, "raw_times": [1.0622249999414635, 1.0466650001035305, 1.026366000132839, 1.048316000151317, 1.0360059998220095], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.0340549999909854, "peak_bytes": 307494912, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00035858154296875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:46Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L384_bfloat16", "batch": 1, "seq_len": 4480, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.031215000011798, "p50": 1.0415550000288931, "p90": 1.0416950001399528, "mean": 1.0391672000423569, "iqr": 0.009410000075149583, "raw_times": [1.031215000011798, 1.0322850000648032, 1.0416950001399528, 1.0415550000288931, 1.0490859999663371], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.0389750000285858, "peak_bytes": 311296000, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00035858154296875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:46Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L448_bfloat16", "batch": 1, "seq_len": 4544, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.2092779998056358, "p50": 1.2178079998648172, "p90": 1.2180080000234739, "mean": 1.21775799993884, "iqr": 0.002989999984492897, "raw_times": [1.2286779999612918, 1.2180080000234739, 1.2178079998648172, 1.215018000038981, 1.2092779998056358], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2385289999201632, "peak_bytes": 315621376, "ok": true, "absmax": 0.0625, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.0625, "mae": 0.00035858154296875, "mse": 2.7865171432495117e-06, "ref": "sdpa_math_fp32"}, "err": null} +{"ts": "2025-11-10T22:11:47Z", "run": "8d69ef94c7594eb581f8f1e4fd6b3eef", "impl": "hf_kernels_flash_attn3", "tags": {"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, "wl": {"name": "cuda_attn_L512_bfloat16", "batch": 1, "seq_len": 4608, "heads": 24, "head_dim": 128, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.203338000095755, "p50": 1.2106680001124914, "p90": 1.218707999896651, "mean": 1.2165860000095563, "iqr": 0.014340000006995979, "raw_times": [1.204367999889655, 1.218707999896651, 1.203338000095755, 1.2106680001124914, 1.245848000053229], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.2345879999884346, "peak_bytes": 319946752, "ok": true, "absmax": 0.125, "corr": {"ok": true, "rtol": 0.02, "atol": 0.02, "absmax": 0.125, "mae": 0.000362396240234375, "mse": 2.8014183044433594e-06, "ref": "sdpa_math_fp32"}, "err": null} diff --git a/flash_attn/impls/cells/benchmark.py b/flash_attn/impls/cells/benchmark.py index 8f163bdd918898ced9e858cd4197a85572d7ec8e..15f02e2ed444e10eba9708f3f69247414b6c962b 100644 --- a/flash_attn/impls/cells/benchmark.py +++ b/flash_attn/impls/cells/benchmark.py @@ -4,6 +4,7 @@ # "numpy", # "torch==2.8.0", # "kernels-benchmark-tools", +# "kernels", # ] # # [tool.uv.sources] @@ -12,18 +13,19 @@ import torch import sys from kernels_benchmark_tools import KernelTypeEnum, run_benchmark +from kernels import get_kernel +# Load the flash attention 3 kernel +hf_kernels_flash_attn3 = get_kernel("kernels-community/flash-attn3") -def torch_flash(q, k, v): - qt, kt, vt = (x.transpose(1, 2).contiguous() for x in (q, k, v)) - with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.FLASH_ATTENTION): - o = torch.nn.functional.scaled_dot_product_attention(qt, kt, vt) - return o.transpose(1, 2).contiguous() + +def hf_flash_attention3(query, key, value): + return hf_kernels_flash_attn3.flash_attn_func(query, key, value, causal=False)[0] run_benchmark( kernel_type=KernelTypeEnum.ATTENTION, - impl_name="torch_flash_ma", - impl_tags={"family": "torch-sdpa", "backend": "FLASH", "compile": "max-autotune"}, - impl_func=torch_flash, + impl_name="hf_kernels_flash_attn3", + impl_tags={"family": "hf-kernels", "backend": "flash-attn3", "compile": "none"}, + impl_func=hf_flash_attention3, ) \ No newline at end of file diff --git a/flash_attn/impls/flash_attention.html b/flash_attn/impls/flash_attention.html index 1852a8c0fb83365b1e619b7e38354ebd1d45d747..0416b41e5df5afb978df23654a2c3b661c23a59b 100644 --- a/flash_attn/impls/flash_attention.html +++ b/flash_attn/impls/flash_attention.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.21s +Cell: nv | 0.26s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.21s
-
Fri Oct 31 20:13:43 2025       
+
Mon Nov 10 21:58:51 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   43C    P0             83W /  350W |       0MiB /  46068MiB |     11%      Default |
+| N/A   32C    P0            139W /  350W |       0MiB /  46068MiB |     83%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.21s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 3.87s
+Cell: benchmark | 4.03s
  | 
 
 Raw
@@ -4207,29 +3989,29 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L128_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.600ms       101.99%       3.600ms       3.600ms             1  
-                                         torch_flash_ma         6.70%     350.157us        46.68%       2.439ms       2.439ms       0.000us         0.00%       3.570ms       3.570ms             1  
-                     aten::scaled_dot_product_attention         0.81%      42.281us         4.26%     222.626us      74.209us       0.000us         0.00%       2.816ms     938.781us             3  
-              aten::_scaled_dot_product_flash_attention         0.52%      27.002us         3.45%     180.345us      60.115us       0.000us         0.00%       2.816ms     938.781us             3  
-                         aten::_flash_attention_forward         0.79%      41.210us         2.54%     132.453us      44.151us       2.816ms        79.78%       2.816ms     938.781us             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       2.816ms        79.78%       2.816ms     938.781us             3  
-                                       aten::contiguous         0.29%      15.041us        34.44%       1.800ms     149.962us       0.000us         0.00%     753.884us      62.824us            12  
-                                            aten::clone         0.75%      38.969us        34.15%       1.785ms     148.709us       0.000us         0.00%     753.884us      62.824us            12  
-                                            aten::copy_         1.73%      90.324us        31.78%       1.661ms     138.388us     713.788us        20.22%     753.884us      62.824us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     713.788us        20.22%     713.788us      59.482us            12  
-                                Activity Buffer Request        28.08%       1.467ms        28.08%       1.467ms       1.467ms      40.096us         1.14%      40.096us      40.096us             1  
-                                        aten::transpose         1.25%      65.371us         1.68%      87.543us       3.648us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.42%      22.172us         0.42%      22.172us       0.924us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.53%      27.463us         2.06%     107.524us       7.168us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.78%      93.220us         1.78%      93.220us       3.884us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         2.49%     130.035us         2.49%     130.035us       8.669us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.32%      16.730us         0.32%      16.730us       5.577us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.05%       2.690us         0.05%       2.690us       0.448us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.17%       9.000us         0.17%       9.000us       3.000us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        53.32%       2.786ms        53.32%       2.786ms       2.786ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.628ms       101.57%       3.628ms       3.628ms             1  
+                                         torch_flash_ma         5.67%     314.697us        48.49%       2.689ms       2.689ms       0.000us         0.00%       3.612ms       3.612ms             1  
+                     aten::scaled_dot_product_attention         0.72%      39.870us         3.84%     213.234us      71.078us       0.000us         0.00%       2.845ms     948.416us             3  
+              aten::_scaled_dot_product_flash_attention         0.43%      24.020us         3.13%     173.364us      57.788us       0.000us         0.00%       2.845ms     948.416us             3  
+                         aten::_flash_attention_forward         0.70%      39.034us         2.33%     129.042us      43.014us       2.845ms        79.65%       2.845ms     948.416us             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       2.845ms        79.65%       2.845ms     948.416us             3  
+                                       aten::contiguous         0.22%      12.191us        37.88%       2.101ms     175.086us       0.000us         0.00%     766.879us      63.907us            12  
+                                            aten::clone         0.59%      32.480us        37.66%       2.089ms     174.070us       0.000us         0.00%     766.879us      63.907us            12  
+                                            aten::copy_         1.56%      86.776us        35.66%       1.978ms     164.799us     726.879us        20.35%     766.879us      63.907us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     726.879us        20.35%     726.879us      60.573us            12  
+                                Activity Buffer Request        32.26%       1.789ms        32.26%       1.789ms       1.789ms      40.000us         1.12%      40.000us      40.000us             1  
+                                        aten::transpose         1.07%      59.612us         1.46%      80.772us       3.365us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.38%      21.160us         0.38%      21.160us       0.882us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.40%      22.459us         1.80%      99.659us       6.644us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.66%      92.037us         1.66%      92.037us       3.835us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         2.29%     126.900us         2.29%     126.900us       8.460us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.28%      15.620us         0.28%      15.620us       5.207us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.04%       2.280us         0.04%       2.280us       0.380us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.20%      11.200us         0.20%      11.200us       3.733us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        51.51%       2.857ms        51.51%       2.857ms       2.857ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 5.225ms
-Self CUDA time total: 3.530ms
+Self CPU time total: 5.546ms
+Self CUDA time total: 3.572ms
 
 
 
@@ -4239,29 +4021,29 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L256_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         4.88%     260.255us        42.26%       2.252ms       2.252ms       0.000us         0.00%       3.798ms       3.798ms             1  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.753ms       100.28%       3.753ms       3.753ms             1  
-                     aten::scaled_dot_product_attention         0.49%      25.890us         3.50%     186.735us      62.245us       0.000us         0.00%       2.976ms     991.858us             3  
-              aten::_scaled_dot_product_flash_attention         0.33%      17.842us         3.02%     160.845us      53.615us       0.000us         0.00%       2.976ms     991.858us             3  
-                         aten::_flash_attention_forward         0.74%      39.289us         2.26%     120.363us      40.121us       2.976ms        79.51%       2.976ms     991.858us             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       2.976ms        79.51%       2.976ms     991.858us             3  
-                                       aten::contiguous         0.20%      10.403us        33.03%       1.760ms     146.680us       0.000us         0.00%     822.042us      68.504us            12  
-                                            aten::clone         0.53%      28.238us        32.84%       1.750ms     145.813us       0.000us         0.00%     822.042us      68.504us            12  
-                                            aten::copy_         1.51%      80.312us        31.12%       1.659ms     138.210us     766.874us        20.49%     822.042us      68.504us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     766.874us        20.49%     766.874us      63.906us            12  
-                                Activity Buffer Request        28.02%       1.493ms        28.02%       1.493ms       1.493ms      55.168us         1.47%      55.168us      55.168us             1  
-                                        aten::transpose         0.94%      50.313us         1.27%      67.673us       2.820us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.33%      17.360us         0.33%      17.360us       0.723us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.40%      21.528us         1.56%      83.370us       5.558us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.43%      76.263us         1.43%      76.263us       3.178us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         2.08%     110.943us         2.08%     110.943us       7.396us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.27%      14.621us         0.27%      14.621us       4.874us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.03%       1.781us         0.03%       1.781us       0.297us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.08%       4.011us         0.08%       4.011us       1.337us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        57.74%       3.077ms        57.74%       3.077ms       3.077ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         4.57%     259.472us        46.25%       2.626ms       2.626ms       0.000us         0.00%       3.786ms       3.786ms             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.742ms       100.27%       3.742ms       3.742ms             1  
+                     aten::scaled_dot_product_attention         0.42%      24.011us         3.41%     193.713us      64.571us       0.000us         0.00%       2.968ms     989.492us             3  
+              aten::_scaled_dot_product_flash_attention         0.33%      18.660us         2.99%     169.702us      56.567us       0.000us         0.00%       2.968ms     989.492us             3  
+                         aten::_flash_attention_forward         0.83%      47.240us         2.21%     125.672us      41.891us       2.968ms        79.55%       2.968ms     989.492us             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       2.968ms        79.55%       2.968ms     989.492us             3  
+                                       aten::contiguous         0.19%      10.613us        37.48%       2.128ms     177.333us       0.000us         0.00%     817.342us      68.112us            12  
+                                            aten::clone         0.52%      29.369us        37.29%       2.117ms     176.448us       0.000us         0.00%     817.342us      68.112us            12  
+                                            aten::copy_         1.41%      80.272us        35.64%       2.023ms     168.619us     762.942us        20.45%     817.342us      68.112us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     762.942us        20.45%     762.942us      63.579us            12  
+                                Activity Buffer Request        32.67%       1.855ms        32.67%       1.855ms       1.855ms      54.400us         1.46%      54.400us      54.400us             1  
+                                        aten::transpose         0.90%      51.353us         1.23%      69.912us       2.913us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.33%      18.559us         0.33%      18.559us       0.773us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.37%      20.909us         1.47%      83.391us       5.559us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.39%      78.982us         1.39%      78.982us       3.291us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         1.94%     110.382us         1.94%     110.382us       7.359us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.24%      13.461us         0.24%      13.461us       4.487us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.05%       2.710us         0.05%       2.710us       0.452us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.09%       4.940us         0.09%       4.940us       1.647us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        53.75%       3.052ms        53.75%       3.052ms       3.052ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 5.329ms
-Self CUDA time total: 3.742ms
+Self CPU time total: 5.678ms
+Self CUDA time total: 3.731ms
 
 
 
@@ -4271,29 +4053,29 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L320_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         4.87%     262.676us        41.62%       2.245ms       2.245ms       0.000us         0.00%       3.882ms       3.882ms             1  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.834ms       100.29%       3.834ms       3.834ms             1  
-                     aten::scaled_dot_product_attention         0.50%      26.770us         3.49%     188.015us      62.672us       0.000us         0.00%       3.044ms       1.015ms             3  
-              aten::_scaled_dot_product_flash_attention         0.35%      18.803us         2.99%     161.245us      53.748us       0.000us         0.00%       3.044ms       1.015ms             3  
-                         aten::_flash_attention_forward         0.74%      39.829us         2.21%     119.102us      39.701us       3.044ms        79.61%       3.044ms       1.015ms             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.044ms        79.61%       3.044ms       1.015ms             3  
-                                       aten::contiguous         0.18%       9.451us        32.36%       1.746ms     145.465us       0.000us         0.00%     838.367us      69.864us            12  
-                                            aten::clone         0.54%      28.881us        32.18%       1.736ms     144.678us       0.000us         0.00%     838.367us      69.864us            12  
-                                            aten::copy_         1.51%      81.201us        30.48%       1.644ms     137.016us     779.615us        20.39%     838.367us      69.864us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     779.615us        20.39%     779.615us      64.968us            12  
-                                Activity Buffer Request        27.31%       1.473ms        27.31%       1.473ms       1.473ms      58.752us         1.54%      58.752us      58.752us             1  
-                                        aten::transpose         1.01%      54.592us         1.34%      72.471us       3.020us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.33%      17.879us         0.33%      17.879us       0.745us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.37%      20.117us         1.53%      82.751us       5.517us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.41%      76.295us         1.41%      76.295us       3.179us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         2.13%     114.795us         2.13%     114.795us       7.653us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.27%      14.801us         0.27%      14.801us       4.934us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.04%       2.110us         0.04%       2.110us       0.352us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.07%       3.990us         0.07%       3.990us       1.330us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        58.38%       3.149ms        58.38%       3.149ms       3.149ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         4.60%     260.065us        44.20%       2.500ms       2.500ms       0.000us         0.00%       3.871ms       3.871ms             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.823ms       100.27%       3.823ms       3.823ms             1  
+                     aten::scaled_dot_product_attention         0.46%      25.840us         3.28%     185.632us      61.877us       0.000us         0.00%       3.035ms       1.012ms             3  
+              aten::_scaled_dot_product_flash_attention         0.32%      17.999us         2.82%     159.792us      53.264us       0.000us         0.00%       3.035ms       1.012ms             3  
+                         aten::_flash_attention_forward         0.73%      41.121us         2.09%     118.472us      39.491us       3.035ms        79.59%       3.035ms       1.012ms             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.035ms        79.59%       3.035ms       1.012ms             3  
+                                       aten::contiguous         0.19%      10.499us        35.53%       2.010ms     167.521us       0.000us         0.00%     836.093us      69.674us            12  
+                                            aten::clone         0.50%      28.109us        35.35%       2.000ms     166.646us       0.000us         0.00%     836.093us      69.674us            12  
+                                            aten::copy_         1.42%      80.472us        33.72%       1.908ms     158.959us     778.333us        20.41%     836.093us      69.674us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     778.333us        20.41%     778.333us      64.861us            12  
+                                Activity Buffer Request        30.89%       1.747ms        30.89%       1.747ms       1.747ms      57.760us         1.51%      57.760us      57.760us             1  
+                                        aten::transpose         0.88%      49.936us         1.20%      67.813us       2.826us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.32%      17.877us         0.32%      17.877us       0.745us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.36%      20.321us         1.47%      83.262us       5.551us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.37%      77.333us         1.37%      77.333us       3.222us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         1.81%     102.481us         1.81%     102.481us       6.832us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.25%      14.120us         0.25%      14.120us       4.707us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.03%       1.688us         0.03%       1.688us       0.281us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.09%       5.331us         0.09%       5.331us       1.777us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        55.80%       3.157ms        55.80%       3.157ms       3.157ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 5.395ms
-Self CUDA time total: 3.823ms
+Self CPU time total: 5.657ms
+Self CUDA time total: 3.813ms
 
 
 
@@ -4303,29 +4085,29 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L384_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         4.61%     261.106us        43.54%       2.469ms       2.469ms       0.000us         0.00%       3.945ms       3.945ms             1  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.898ms       100.28%       3.898ms       3.898ms             1  
-                     aten::scaled_dot_product_attention         0.46%      26.241us         3.40%     192.654us      64.218us       0.000us         0.00%       3.100ms       1.033ms             3  
-              aten::_scaled_dot_product_flash_attention         0.34%      19.509us         2.94%     166.413us      55.471us       0.000us         0.00%       3.100ms       1.033ms             3  
-                         aten::_flash_attention_forward         0.74%      42.081us         2.16%     122.633us      40.878us       3.100ms        79.76%       3.100ms       1.033ms             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.100ms        79.76%       3.100ms       1.033ms             3  
-                                       aten::contiguous         0.20%      11.161us        34.71%       1.968ms     163.994us       0.000us         0.00%     844.704us      70.392us            12  
-                                            aten::clone         0.52%      29.682us        34.51%       1.957ms     163.064us       0.000us         0.00%     844.704us      70.392us            12  
-                                            aten::copy_         1.45%      82.261us        32.81%       1.860ms     155.026us     786.784us        20.24%     844.704us      70.392us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     786.784us        20.24%     786.784us      65.565us            12  
-                                Activity Buffer Request        26.26%       1.489ms        26.26%       1.489ms       1.489ms      57.920us         1.49%      57.920us      57.920us             1  
-                                        aten::transpose         0.95%      53.820us         1.26%      71.322us       2.972us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.31%      17.502us         0.31%      17.502us       0.729us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.39%      21.943us         1.53%      86.983us       5.799us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.40%      79.202us         1.40%      79.202us       3.300us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         5.55%     314.487us         5.55%     314.487us      20.966us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.26%      14.830us         0.26%      14.830us       4.943us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.04%       2.010us         0.04%       2.010us       0.335us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.07%       4.040us         0.07%       4.040us       1.347us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        56.46%       3.201ms        56.46%       3.201ms       3.201ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         4.36%     258.876us        46.43%       2.758ms       2.758ms       0.000us         0.00%       3.960ms       3.960ms             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       3.911ms       100.27%       3.911ms       3.911ms             1  
+                     aten::scaled_dot_product_attention         0.42%      24.860us         4.02%     238.593us      79.531us       0.000us         0.00%       3.109ms       1.036ms             3  
+              aten::_scaled_dot_product_flash_attention         0.32%      19.211us         3.60%     213.733us      71.244us       0.000us         0.00%       3.109ms       1.036ms             3  
+                         aten::_flash_attention_forward         0.74%      43.768us         2.88%     170.772us      56.924us       3.109ms        79.70%       3.109ms       1.036ms             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.109ms        79.70%       3.109ms       1.036ms             3  
+                                       aten::contiguous         0.17%      10.099us        37.27%       2.213ms     184.454us       0.000us         0.00%     850.560us      70.880us            12  
+                                            aten::clone         0.48%      28.250us        37.10%       2.203ms     183.613us       0.000us         0.00%     850.560us      70.880us            12  
+                                            aten::copy_         1.36%      80.903us        35.54%       2.111ms     175.896us     791.680us        20.30%     850.560us      70.880us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     791.680us        20.30%     791.680us      65.973us            12  
+                                Activity Buffer Request        29.13%       1.730ms        29.13%       1.730ms       1.730ms      58.880us         1.51%      58.880us      58.880us             1  
+                                        aten::transpose         0.86%      50.781us         1.18%      70.362us       2.932us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.33%      19.581us         0.33%      19.581us       0.816us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.35%      20.589us         1.40%      83.331us       5.555us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.32%      78.663us         1.32%      78.663us       3.278us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         5.47%     324.743us         5.47%     324.743us      21.650us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.23%      13.800us         0.23%      13.800us       4.600us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.80%      47.662us         0.80%      47.662us       7.944us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.10%       5.930us         0.10%       5.930us       1.977us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        53.57%       3.181ms        53.57%       3.181ms       3.181ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 5.670ms
-Self CUDA time total: 3.887ms
+Self CPU time total: 5.939ms
+Self CUDA time total: 3.901ms
 
 
 
@@ -4335,29 +4117,29 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L448_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         5.12%     312.519us        40.82%       2.493ms       2.493ms       0.000us         0.00%       4.416ms       4.416ms             1  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       4.365ms       100.24%       4.365ms       4.365ms             1  
-                     aten::scaled_dot_product_attention         0.42%      25.922us         3.20%     195.246us      65.082us       0.000us         0.00%       3.547ms       1.182ms             3  
-              aten::_scaled_dot_product_flash_attention         0.34%      20.847us         2.77%     169.324us      56.441us       0.000us         0.00%       3.547ms       1.182ms             3  
-                         aten::_flash_attention_forward         0.72%      44.243us         2.07%     126.303us      42.101us       3.547ms        81.45%       3.547ms       1.182ms             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.547ms        81.45%       3.547ms       1.182ms             3  
-                                       aten::contiguous         0.17%      10.559us        31.73%       1.938ms     161.473us       0.000us         0.00%     869.122us      72.427us            12  
-                                            aten::clone         0.47%      28.763us        31.56%       1.927ms     160.593us       0.000us         0.00%     869.122us      72.427us            12  
-                                            aten::copy_         1.36%      83.033us        30.01%       1.832ms     152.707us     807.906us        18.55%     869.122us      72.427us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     807.906us        18.55%     807.906us      67.326us            12  
-                                Activity Buffer Request        24.51%       1.497ms        24.51%       1.497ms       1.497ms      61.216us         1.41%      61.216us      61.216us             1  
-                                        aten::transpose         0.85%      52.195us         1.14%      69.864us       2.911us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.29%      17.669us         0.29%      17.669us       0.736us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.34%      20.921us         1.44%      87.791us       5.853us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.30%      79.270us         1.30%      79.270us       3.303us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         4.55%     277.575us         4.55%     277.575us      18.505us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.27%      16.520us         0.27%      16.520us       5.507us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.03%       1.960us         0.03%       1.960us       0.327us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.07%       4.040us         0.07%       4.040us       1.347us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        59.18%       3.614ms        59.18%       3.614ms       3.614ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         4.85%     313.852us        44.01%       2.846ms       2.846ms       0.000us         0.00%       4.405ms       4.405ms             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       4.356ms       100.24%       4.356ms       4.356ms             1  
+                     aten::scaled_dot_product_attention         0.40%      25.602us         2.92%     188.673us      62.891us       0.000us         0.00%       3.542ms       1.181ms             3  
+              aten::_scaled_dot_product_flash_attention         0.29%      18.450us         2.52%     163.071us      54.357us       0.000us         0.00%       3.542ms       1.181ms             3  
+                         aten::_flash_attention_forward         0.66%      42.791us         1.88%     121.422us      40.474us       3.542ms        81.52%       3.542ms       1.181ms             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.542ms        81.52%       3.542ms       1.181ms             3  
+                                       aten::contiguous         0.15%       9.702us        35.55%       2.299ms     191.596us       0.000us         0.00%     862.461us      71.872us            12  
+                                            aten::clone         0.45%      28.857us        35.40%       2.289ms     190.788us       0.000us         0.00%     862.461us      71.872us            12  
+                                            aten::copy_         1.23%      79.423us        33.92%       2.194ms     182.809us     803.166us        18.48%     862.461us      71.872us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     803.166us        18.48%     803.166us      66.930us            12  
+                                Activity Buffer Request        28.18%       1.822ms        28.18%       1.822ms       1.822ms      59.295us         1.36%      59.295us      59.295us             1  
+                                        aten::transpose         0.77%      49.902us         1.04%      67.461us       2.811us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.27%      17.559us         0.27%      17.559us       0.732us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.33%      21.611us         1.34%      86.704us       5.780us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.24%      80.042us         1.24%      80.042us       3.335us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         4.86%     314.554us         4.86%     314.554us      20.970us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.23%      14.691us         0.23%      14.691us       4.897us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.03%       1.700us         0.03%       1.700us       0.283us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.08%       4.940us         0.08%       4.940us       1.647us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        55.99%       3.621ms        55.99%       3.621ms       3.621ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 6.107ms
-Self CUDA time total: 4.355ms
+Self CPU time total: 6.467ms
+Self CUDA time total: 4.345ms
 
 
 
@@ -4367,38 +4149,38 @@ PROFILE TRACE: torch_flash_ma | cuda_attn_L512_bfloat16
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                         torch_flash_ma         3.85%     236.256us        38.02%       2.335ms       2.335ms       0.000us         0.00%       4.535ms       4.535ms             1  
-                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       4.485ms       100.25%       4.485ms       4.485ms             1  
-                     aten::scaled_dot_product_attention         0.43%      26.452us         2.98%     183.275us      61.092us       0.000us         0.00%       3.655ms       1.218ms             3  
-              aten::_scaled_dot_product_flash_attention         0.30%      18.620us         2.55%     156.823us      52.274us       0.000us         0.00%       3.655ms       1.218ms             3  
-                         aten::_flash_attention_forward         0.59%      36.060us         1.88%     115.323us      38.441us       3.655ms        81.69%       3.655ms       1.218ms             3  
-void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.655ms        81.69%       3.655ms       1.218ms             3  
-                                       aten::contiguous         0.16%       9.770us        30.40%       1.867ms     155.567us       0.000us         0.00%     880.065us      73.339us            12  
-                                            aten::clone         0.46%      28.179us        30.24%       1.857ms     154.753us       0.000us         0.00%     880.065us      73.339us            12  
-                                            aten::copy_         1.36%      83.563us        28.74%       1.765ms     147.054us     819.137us        18.31%     880.065us      73.339us            12  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     819.137us        18.31%     819.137us      68.261us            12  
-                                Activity Buffer Request        23.24%       1.427ms        23.24%       1.427ms       1.427ms      60.928us         1.36%      60.928us      60.928us             1  
-                                        aten::transpose         0.86%      52.980us         1.16%      71.060us       2.961us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.29%      18.080us         0.29%      18.080us       0.753us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::empty_like         0.34%      20.930us         1.37%      83.913us       5.594us       0.000us         0.00%       0.000us       0.000us            15  
-                                            aten::empty         1.25%      77.043us         1.25%      77.043us       3.210us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         4.54%     278.990us         4.54%     278.990us      18.599us       0.000us         0.00%       0.000us       0.000us            15  
-                                    aten::empty_strided         0.24%      14.661us         0.24%      14.661us       4.887us       0.000us         0.00%       0.000us       0.000us             3  
-                                 cudaDeviceGetAttribute         0.03%       1.978us         0.03%       1.978us       0.330us       0.000us         0.00%       0.000us       0.000us             6  
-                                   cudaFuncSetAttribute         0.06%       3.901us         0.06%       3.901us       1.300us       0.000us         0.00%       0.000us       0.000us             3  
-                                  cudaDeviceSynchronize        61.98%       3.806ms        61.98%       3.806ms       3.806ms       0.000us         0.00%       0.000us       0.000us             1  
+                                         torch_flash_ma         3.49%     226.744us        41.30%       2.682ms       2.682ms       0.000us         0.00%       4.507ms       4.507ms             1  
+                                         torch_flash_ma         0.00%       0.000us         0.00%       0.000us       0.000us       4.456ms       100.23%       4.456ms       4.456ms             1  
+                     aten::scaled_dot_product_attention         0.39%      25.000us         2.68%     173.753us      57.918us       0.000us         0.00%       3.635ms       1.212ms             3  
+              aten::_scaled_dot_product_flash_attention         0.28%      18.340us         2.29%     148.753us      49.584us       0.000us         0.00%       3.635ms       1.212ms             3  
+                         aten::_flash_attention_forward         0.53%      34.164us         1.68%     109.263us      36.421us       3.635ms        81.77%       3.635ms       1.212ms             3  
+void pytorch_flash::flash_fwd_kernel<Flash_fwd_kerne...         0.00%       0.000us         0.00%       0.000us       0.000us       3.635ms        81.77%       3.635ms       1.212ms             3  
+                                       aten::contiguous         0.14%       8.821us        34.49%       2.240ms     186.626us       0.000us         0.00%     871.422us      72.619us            12  
+                                            aten::clone         0.41%      26.612us        34.36%       2.231ms     185.890us       0.000us         0.00%     871.422us      72.619us            12  
+                                            aten::copy_         1.18%      76.909us        32.95%       2.140ms     178.308us     810.270us        18.23%     871.422us      72.619us            12  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     810.270us        18.23%     810.270us      67.523us            12  
+                                Activity Buffer Request        27.48%       1.784ms        27.48%       1.784ms       1.784ms      61.152us         1.38%      61.152us      61.152us             1  
+                                        aten::transpose         0.71%      45.940us         0.97%      63.019us       2.626us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.26%      17.079us         0.26%      17.079us       0.712us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::empty_like         0.30%      19.781us         1.27%      82.742us       5.516us       0.000us         0.00%       0.000us       0.000us            15  
+                                            aten::empty         1.21%      78.423us         1.21%      78.423us       3.268us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         4.62%     300.294us         4.62%     300.294us      20.020us       0.000us         0.00%       0.000us       0.000us            15  
+                                    aten::empty_strided         0.21%      13.430us         0.21%      13.430us       4.477us       0.000us         0.00%       0.000us       0.000us             3  
+                                 cudaDeviceGetAttribute         0.02%       1.610us         0.02%       1.610us       0.268us       0.000us         0.00%       0.000us       0.000us             6  
+                                   cudaFuncSetAttribute         0.07%       4.648us         0.07%       4.648us       1.549us       0.000us         0.00%       0.000us       0.000us             3  
+                                  cudaDeviceSynchronize        58.70%       3.811ms        58.70%       3.811ms       3.811ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 6.141ms
-Self CUDA time total: 4.474ms
+Self CPU time total: 6.493ms
+Self CUDA time total: 4.445ms
 
 
 impl                     wl                  p50(ms)  ok
-torch_flash_ma           cuda_attn_L128_bfloat16     1.22  True
+torch_flash_ma           cuda_attn_L128_bfloat16     1.23  True
 torch_flash_ma           cuda_attn_L256_bfloat16     1.28  True
 torch_flash_ma           cuda_attn_L320_bfloat16     1.30  True
 torch_flash_ma           cuda_attn_L384_bfloat16     1.33  True
-torch_flash_ma           cuda_attn_L448_bfloat16     1.50  True
-torch_flash_ma           cuda_attn_L512_bfloat16     1.51  True
+torch_flash_ma           cuda_attn_L448_bfloat16     1.48  True
+torch_flash_ma           cuda_attn_L512_bfloat16     1.52  True
 

Artifacts:

diff --git a/flash_attn/impls/hf_kernels_flash_attn.html b/flash_attn/impls/hf_kernels_flash_attn.html index b43f3b2c4b9504821051f29d094124c270a7e0ee..5870a369a7d98065dcc33b82820a8e2eb3bc4295 100644 --- a/flash_attn/impls/hf_kernels_flash_attn.html +++ b/flash_attn/impls/hf_kernels_flash_attn.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4104,9 +3886,9 @@ body[data-tool="eraser"] .main-content { ▼ code ▼ output - ▶ uv-logs + ▶ uv-logs | -Cell: benchmark | 5.83s +Cell: benchmark | 10.24s | Raw @@ -4133,7 +3915,7 @@ Cell: benchmark | 5.83s from kernels import get_kernel # Load the flash attention kernel -hf_kernels_flash_attn = get_kernel("kernels-community/flash-attn") +hf_kernels_flash_attn = get_kernel("kernels-community/flash-attn2") def hf_flash_attention(query, key, value): @@ -4161,21 +3943,21 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L128_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 3.51% 153.413us 41.11% 1.797ms 1.797ms 0.000us 0.00% 3.733ms 3.733ms 1 - _flash_attn_9e27194::fwd 1.62% 70.702us 37.60% 1.644ms 547.894us 2.785ms 100.00% 3.733ms 1.244ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 2.786ms 100.05% 2.786ms 2.786ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 2.785ms 100.00% 2.785ms 928.303us 3 - Activity Buffer Request 32.92% 1.439ms 32.92% 1.439ms 1.439ms 947.706us 34.03% 947.706us 947.706us 1 - cudaDeviceGetAttribute 0.11% 4.891us 0.11% 4.891us 0.326us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.37% 16.181us 1.17% 51.061us 17.020us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.80% 34.880us 0.80% 34.880us 11.627us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.59% 25.681us 0.59% 25.681us 2.853us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.26% 11.340us 0.26% 11.340us 3.780us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.93% 40.731us 0.93% 40.731us 13.577us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 58.89% 2.575ms 58.89% 2.575ms 2.575ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 3.32% 167.194us 47.96% 2.415ms 2.415ms 0.000us 0.00% 3.817ms 3.817ms 1 + _flash_attn_9e27194::fwd 1.37% 69.029us 44.64% 2.247ms 749.145us 2.847ms 100.00% 3.817ms 1.272ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 2.849ms 100.05% 2.849ms 2.849ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 2.847ms 100.00% 2.847ms 949.099us 3 + Activity Buffer Request 39.70% 1.999ms 39.70% 1.999ms 1.999ms 970.081us 34.07% 970.081us 970.081us 1 + cudaDeviceGetAttribute 0.09% 4.410us 0.09% 4.410us 0.294us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.38% 19.301us 1.08% 54.311us 18.104us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.70% 35.010us 0.70% 35.010us 11.670us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.51% 25.771us 0.51% 25.771us 2.863us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 1.06% 53.231us 1.06% 53.231us 17.744us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.83% 41.840us 0.83% 41.840us 13.947us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 52.04% 2.620ms 52.04% 2.620ms 2.620ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.372ms -Self CUDA time total: 2.785ms +Self CPU time total: 5.035ms +Self CUDA time total: 2.847ms @@ -4185,21 +3967,21 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L256_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 1.94% 86.682us 37.50% 1.676ms 1.676ms 0.000us 0.00% 3.929ms 3.929ms 1 - _flash_attn_9e27194::fwd 1.06% 47.570us 35.56% 1.589ms 529.734us 2.938ms 100.00% 3.929ms 1.310ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 2.939ms 100.05% 2.939ms 2.939ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 2.938ms 100.00% 2.938ms 979.209us 3 - Activity Buffer Request 32.66% 1.460ms 32.66% 1.460ms 1.460ms 991.166us 33.74% 991.166us 991.166us 1 - cudaDeviceGetAttribute 0.10% 4.450us 0.10% 4.450us 0.297us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.19% 8.440us 0.55% 24.690us 8.230us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.36% 16.250us 0.36% 16.250us 5.417us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.51% 22.872us 0.51% 22.872us 2.541us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.07% 3.350us 0.07% 3.350us 1.117us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.60% 26.611us 0.60% 26.611us 8.870us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 62.50% 2.794ms 62.50% 2.794ms 2.794ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 1.71% 88.920us 43.78% 2.280ms 2.280ms 0.000us 0.00% 4.110ms 4.110ms 1 + _flash_attn_9e27194::fwd 0.90% 46.653us 42.07% 2.191ms 730.229us 3.068ms 100.00% 4.110ms 1.370ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.070ms 100.05% 3.070ms 3.070ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.068ms 100.00% 3.068ms 1.023ms 3 + Activity Buffer Request 39.69% 2.067ms 39.69% 2.067ms 2.067ms 1.041ms 33.93% 1.041ms 1.041ms 1 + cudaDeviceGetAttribute 0.07% 3.649us 0.07% 3.649us 0.243us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.14% 7.310us 0.43% 22.581us 7.527us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.29% 15.271us 0.29% 15.271us 5.090us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.41% 21.500us 0.41% 21.500us 2.389us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 0.07% 3.620us 0.07% 3.620us 1.207us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.50% 25.800us 0.50% 25.800us 8.600us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 56.22% 2.927ms 56.22% 2.927ms 2.927ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.469ms -Self CUDA time total: 2.938ms +Self CPU time total: 5.207ms +Self CUDA time total: 3.068ms @@ -4209,21 +3991,21 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L320_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 2.38% 109.313us 36.70% 1.683ms 1.683ms 0.000us 0.00% 4.081ms 4.081ms 1 - _flash_attn_9e27194::fwd 1.05% 48.167us 34.31% 1.574ms 524.567us 3.048ms 100.00% 4.081ms 1.360ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.049ms 100.05% 3.049ms 3.049ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.048ms 100.00% 3.048ms 1.016ms 3 - Activity Buffer Request 31.46% 1.443ms 31.46% 1.443ms 1.443ms 1.033ms 33.90% 1.033ms 1.033ms 1 - cudaDeviceGetAttribute 0.09% 4.231us 0.09% 4.231us 0.282us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.16% 7.250us 0.52% 23.960us 7.987us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.36% 16.710us 0.36% 16.710us 5.570us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.46% 21.300us 0.46% 21.300us 2.367us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.08% 3.561us 0.08% 3.561us 1.187us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.64% 29.473us 0.64% 29.473us 9.824us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 63.30% 2.903ms 63.30% 2.903ms 2.903ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 1.71% 88.010us 40.24% 2.065ms 2.065ms 0.000us 0.00% 4.290ms 4.290ms 1 + _flash_attn_9e27194::fwd 1.03% 52.730us 38.53% 1.977ms 659.108us 3.209ms 100.00% 4.290ms 1.430ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.211ms 100.05% 3.211ms 3.211ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.209ms 100.00% 3.209ms 1.070ms 3 + Activity Buffer Request 35.96% 1.846ms 35.96% 1.846ms 1.846ms 1.081ms 33.68% 1.081ms 1.081ms 1 + cudaDeviceGetAttribute 0.07% 3.699us 0.07% 3.699us 0.247us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.13% 6.760us 0.45% 22.961us 7.654us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.32% 16.201us 0.32% 16.201us 5.400us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.41% 20.833us 0.41% 20.833us 2.315us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 0.07% 3.580us 0.07% 3.580us 1.193us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.54% 27.851us 0.54% 27.851us 9.284us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 59.76% 3.067ms 59.76% 3.067ms 3.067ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.586ms -Self CUDA time total: 3.048ms +Self CPU time total: 5.132ms +Self CUDA time total: 3.209ms @@ -4233,21 +4015,21 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L384_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 2.13% 103.094us 38.83% 1.884ms 1.884ms 0.000us 0.00% 4.165ms 4.165ms 1 - _flash_attn_9e27194::fwd 0.99% 47.838us 36.71% 1.781ms 593.521us 3.114ms 100.00% 4.165ms 1.388ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.116ms 100.05% 3.116ms 3.116ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.114ms 100.00% 3.114ms 1.038ms 3 - Activity Buffer Request 29.59% 1.435ms 29.59% 1.435ms 1.435ms 1.051ms 33.75% 1.051ms 1.051ms 1 - cudaDeviceGetAttribute 0.08% 3.800us 0.08% 3.800us 0.253us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.16% 7.891us 0.53% 25.811us 8.604us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.37% 17.920us 0.37% 17.920us 5.973us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.45% 21.731us 0.45% 21.731us 2.415us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.08% 3.740us 0.08% 3.740us 1.247us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 4.99% 242.187us 4.99% 242.187us 80.729us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 61.17% 2.967ms 61.17% 2.967ms 2.967ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 2.41% 90.762us 19.01% 717.141us 717.141us 0.000us 0.00% 4.279ms 4.279ms 1 + _flash_attn_9e27194::fwd 1.23% 46.533us 16.60% 626.379us 208.793us 3.197ms 100.00% 4.279ms 1.426ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.199ms 100.05% 3.199ms 3.199ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.197ms 100.00% 3.197ms 1.066ms 3 + Activity Buffer Request 7.66% 288.965us 7.66% 288.965us 288.965us 1.082ms 33.83% 1.082ms 1.082ms 1 + cudaDeviceGetAttribute 0.10% 3.648us 0.10% 3.648us 0.243us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.18% 6.920us 0.61% 22.930us 7.643us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.42% 16.010us 0.42% 16.010us 5.337us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.56% 21.260us 0.56% 21.260us 2.362us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 0.10% 3.650us 0.10% 3.650us 1.217us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 6.35% 239.393us 6.35% 239.393us 79.798us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 80.99% 3.055ms 80.99% 3.055ms 3.055ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.851ms -Self CUDA time total: 3.114ms +Self CPU time total: 3.772ms +Self CUDA time total: 3.197ms @@ -4257,21 +4039,21 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L448_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 2.00% 105.522us 34.61% 1.828ms 1.828ms 0.000us 0.00% 4.806ms 4.806ms 1 - _flash_attn_9e27194::fwd 0.94% 49.622us 32.62% 1.723ms 574.192us 3.597ms 100.00% 4.806ms 1.602ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.599ms 100.05% 3.599ms 3.599ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.597ms 100.00% 3.597ms 1.199ms 3 - Activity Buffer Request 27.37% 1.446ms 27.37% 1.446ms 1.446ms 1.209ms 33.59% 1.209ms 1.209ms 1 - cudaDeviceGetAttribute 0.08% 3.991us 0.08% 3.991us 0.266us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.14% 7.250us 0.47% 24.620us 8.207us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.33% 17.370us 0.33% 17.370us 5.790us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.41% 21.681us 0.41% 21.681us 2.409us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.07% 3.770us 0.07% 3.770us 1.257us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 3.28% 173.384us 3.28% 173.384us 57.795us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 65.39% 3.453ms 65.39% 3.453ms 3.453ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 1.57% 90.561us 37.72% 2.178ms 2.178ms 0.000us 0.00% 4.999ms 4.999ms 1 + _flash_attn_9e27194::fwd 0.83% 48.040us 36.16% 2.087ms 695.661us 3.741ms 100.00% 4.999ms 1.666ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.743ms 100.05% 3.743ms 3.743ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.741ms 100.00% 3.741ms 1.247ms 3 + Activity Buffer Request 30.45% 1.758ms 30.45% 1.758ms 1.758ms 1.258ms 33.63% 1.258ms 1.258ms 1 + cudaDeviceGetAttribute 0.06% 3.722us 0.06% 3.722us 0.248us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.14% 7.831us 0.41% 23.771us 7.924us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.28% 15.940us 0.28% 15.940us 5.313us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.36% 20.578us 0.36% 20.578us 2.286us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 0.06% 3.590us 0.06% 3.590us 1.197us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 3.98% 229.604us 3.98% 229.604us 76.535us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 62.28% 3.595ms 62.28% 3.595ms 3.595ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.281ms -Self CUDA time total: 3.597ms +Self CPU time total: 5.772ms +Self CUDA time total: 3.741ms @@ -4281,36 +4063,40 @@ PROFILE TRACE: hf_kernels_flash_attn | cuda_attn_L512_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn 2.02% 107.892us 33.82% 1.810ms 1.810ms 0.000us 0.00% 4.930ms 4.930ms 1 - _flash_attn_9e27194::fwd 0.91% 48.918us 31.80% 1.702ms 567.268us 3.687ms 100.00% 4.930ms 1.643ms 3 - hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.689ms 100.04% 3.689ms 3.689ms 1 -void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.687ms 100.00% 3.687ms 1.229ms 3 - Activity Buffer Request 26.86% 1.437ms 26.86% 1.437ms 1.437ms 1.242ms 33.69% 1.242ms 1.242ms 1 - cudaDeviceGetAttribute 0.07% 3.881us 0.07% 3.881us 0.259us 0.000us 0.00% 0.000us 0.000us 15 - aten::empty_like 0.14% 7.591us 0.49% 26.111us 8.704us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty_strided 0.35% 18.520us 0.35% 18.520us 6.173us 0.000us 0.00% 0.000us 0.000us 3 - aten::empty 0.39% 20.640us 0.39% 20.640us 2.293us 0.000us 0.00% 0.000us 0.000us 9 - cudaFuncSetAttribute 0.07% 3.561us 0.07% 3.561us 1.187us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 3.01% 161.306us 3.01% 161.306us 53.769us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 66.18% 3.542ms 66.18% 3.542ms 3.542ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn 2.13% 89.030us 15.70% 656.370us 656.370us 0.000us 0.00% 4.900ms 4.900ms 1 + _flash_attn_9e27194::fwd 1.15% 48.015us 13.57% 567.340us 189.113us 3.667ms 100.00% 4.900ms 1.633ms 3 + hf_kernels_flash_attn 0.00% 0.000us 0.00% 0.000us 0.000us 3.669ms 100.04% 3.669ms 3.669ms 1 +void flash::flash_fwd_kernel<Flash_fwd_kernel_traits... 0.00% 0.000us 0.00% 0.000us 0.000us 3.667ms 100.00% 3.667ms 1.222ms 3 + Activity Buffer Request 5.94% 248.154us 5.94% 248.154us 248.154us 1.233ms 33.62% 1.233ms 1.233ms 1 + cudaDeviceGetAttribute 0.08% 3.539us 0.08% 3.539us 0.236us 0.000us 0.00% 0.000us 0.000us 15 + aten::empty_like 0.16% 6.860us 0.56% 23.209us 7.736us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty_strided 0.39% 16.349us 0.39% 16.349us 5.450us 0.000us 0.00% 0.000us 0.000us 3 + aten::empty 0.49% 20.571us 0.49% 20.571us 2.286us 0.000us 0.00% 0.000us 0.000us 9 + cudaFuncSetAttribute 0.09% 3.630us 0.09% 3.630us 1.210us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 5.27% 220.222us 5.27% 220.222us 73.407us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 84.30% 3.524ms 84.30% 3.524ms 3.524ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.351ms -Self CUDA time total: 3.687ms +Self CPU time total: 4.180ms +Self CUDA time total: 3.667ms impl wl p50(ms) ok -hf_kernels_flash_attn cuda_attn_L128_bfloat16 0.95 True -hf_kernels_flash_attn cuda_attn_L256_bfloat16 1.00 True -hf_kernels_flash_attn cuda_attn_L320_bfloat16 1.05 True -hf_kernels_flash_attn cuda_attn_L384_bfloat16 1.06 True -hf_kernels_flash_attn cuda_attn_L448_bfloat16 1.23 True -hf_kernels_flash_attn cuda_attn_L512_bfloat16 1.23 True +hf_kernels_flash_attn cuda_attn_L128_bfloat16 0.99 True +hf_kernels_flash_attn cuda_attn_L256_bfloat16 1.04 True +hf_kernels_flash_attn cuda_attn_L320_bfloat16 1.07 True +hf_kernels_flash_attn cuda_attn_L384_bfloat16 1.08 True +hf_kernels_flash_attn cuda_attn_L448_bfloat16 1.26 True +hf_kernels_flash_attn cuda_attn_L512_bfloat16 1.25 True
-
-Fetching 20 files: 0%| | 0/20 [00:00<?, ?it/s] -Fetching 20 files: 10%|█ | 2/20 [00:01<00:15, 1.19it/s] -Fetching 20 files: 100%|██████████| 20/20 [00:01<00:00, 11.87it/s] +
+
▶ UV Install Logs
+
+
Fetching 20 files: 0%| | 0/20 [00:00<?, ?it/s] +Fetching 20 files: 10%|█ | 2/20 [00:01<00:17, 1.01it/s] +Fetching 20 files: 100%|██████████| 20/20 [00:01<00:00, 10.06it/s]

Artifacts:

attention.jsonl diff --git a/flash_attn/impls/hf_kernels_flash_attn3.html b/flash_attn/impls/hf_kernels_flash_attn3.html index a1db1794336426cb37d9956eacf119e09a093fa1..45bc81a39c5977cc20f0ab022ea421a9c7a59c63 100644 --- a/flash_attn/impls/hf_kernels_flash_attn3.html +++ b/flash_attn/impls/hf_kernels_flash_attn3.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: benchmark | 5.53s +Cell: benchmark | 5.83s | Raw @@ -4160,19 +3942,19 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L128_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 3.85% 171.193us 46.01% 2.045ms 2.045ms 0.000us 0.00% 3.614ms 3.614ms 1 - FlashAttnFunc 3.07% 136.295us 42.15% 1.874ms 624.570us 0.000us 0.00% 3.614ms 1.205ms 3 - _flash_attn3_48fe103_dirty::fwd 1.94% 86.341us 39.09% 1.737ms 579.138us 2.720ms 100.00% 3.614ms 1.205ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.722ms 100.05% 2.722ms 2.722ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.720ms 100.00% 2.720ms 906.698us 3 - Activity Buffer Request 34.72% 1.543ms 34.72% 1.543ms 1.543ms 893.600us 32.85% 893.600us 893.600us 1 - aten::empty 1.07% 47.441us 1.07% 47.441us 7.907us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.31% 13.761us 0.31% 13.761us 4.587us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 1.05% 46.772us 1.05% 46.772us 15.591us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 53.99% 2.400ms 53.99% 2.400ms 2.400ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 3.53% 162.212us 48.20% 2.217ms 2.217ms 0.000us 0.00% 3.575ms 3.575ms 1 + FlashAttnFunc 2.60% 119.532us 44.67% 2.055ms 684.947us 0.000us 0.00% 3.575ms 1.192ms 3 + _flash_attn3_48fe103_dirty::fwd 1.56% 71.632us 42.08% 1.935ms 645.103us 2.671ms 100.00% 3.575ms 1.192ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.672ms 100.06% 2.672ms 2.672ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.671ms 100.00% 2.671ms 890.241us 3 + Activity Buffer Request 38.25% 1.759ms 38.25% 1.759ms 1.759ms 904.001us 33.85% 904.001us 904.001us 1 + aten::empty 0.93% 42.731us 0.93% 42.731us 7.122us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.32% 14.640us 0.32% 14.640us 4.880us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 1.03% 47.150us 1.03% 47.150us 15.717us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 51.80% 2.383ms 51.80% 2.383ms 2.383ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.445ms -Self CUDA time total: 2.720ms +Self CPU time total: 4.600ms +Self CUDA time total: 2.671ms @@ -4182,19 +3964,19 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L256_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 2.41% 104.370us 41.13% 1.784ms 1.784ms 0.000us 0.00% 3.700ms 3.700ms 1 - FlashAttnFunc 2.00% 86.685us 38.73% 1.679ms 559.738us 0.000us 0.00% 3.700ms 1.233ms 3 - _flash_attn3_48fe103_dirty::fwd 1.21% 52.631us 36.73% 1.593ms 530.843us 2.768ms 100.00% 3.700ms 1.233ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.769ms 100.06% 2.769ms 2.769ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.768ms 100.00% 2.768ms 922.559us 3 - Activity Buffer Request 34.10% 1.479ms 34.10% 1.479ms 1.479ms 932.127us 33.68% 932.127us 932.127us 1 - aten::empty 0.60% 25.981us 0.60% 25.981us 4.330us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.12% 5.050us 0.12% 5.050us 1.683us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.70% 30.140us 0.70% 30.140us 10.047us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 58.87% 2.553ms 58.87% 2.553ms 2.553ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 2.14% 101.412us 45.76% 2.172ms 2.172ms 0.000us 0.00% 3.747ms 3.747ms 1 + FlashAttnFunc 1.91% 90.691us 43.62% 2.071ms 690.247us 0.000us 0.00% 3.747ms 1.249ms 3 + _flash_attn3_48fe103_dirty::fwd 1.11% 52.911us 41.71% 1.980ms 660.016us 2.794ms 100.00% 3.747ms 1.249ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.796ms 100.06% 2.796ms 2.796ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.794ms 100.00% 2.794ms 931.376us 3 + Activity Buffer Request 39.32% 1.866ms 39.32% 1.866ms 1.866ms 953.126us 34.11% 953.126us 953.126us 1 + aten::empty 0.55% 26.341us 0.55% 26.341us 4.390us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.11% 5.160us 0.11% 5.160us 1.720us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.62% 29.260us 0.62% 29.260us 9.753us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 54.24% 2.575ms 54.24% 2.575ms 2.575ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.336ms -Self CUDA time total: 2.768ms +Self CPU time total: 4.747ms +Self CUDA time total: 2.794ms @@ -4204,19 +3986,19 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L320_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 2.29% 102.411us 40.10% 1.791ms 1.791ms 0.000us 0.00% 3.875ms 3.875ms 1 - FlashAttnFunc 2.01% 89.903us 37.81% 1.688ms 562.801us 0.000us 0.00% 3.875ms 1.292ms 3 - _flash_attn3_48fe103_dirty::fwd 1.18% 52.613us 35.79% 1.599ms 532.834us 2.892ms 100.00% 3.875ms 1.292ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.893ms 100.05% 2.893ms 2.893ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.892ms 100.00% 2.892ms 963.972us 3 - Activity Buffer Request 33.24% 1.485ms 33.24% 1.485ms 1.485ms 983.097us 33.99% 983.097us 983.097us 1 - aten::empty 0.58% 25.770us 0.58% 25.770us 4.295us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.11% 4.820us 0.11% 4.820us 1.607us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.69% 30.740us 0.69% 30.740us 10.247us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 59.90% 2.675ms 59.90% 2.675ms 2.675ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 2.17% 102.652us 42.70% 2.019ms 2.019ms 0.000us 0.00% 3.920ms 3.920ms 1 + FlashAttnFunc 1.91% 90.472us 40.53% 1.916ms 638.683us 0.000us 0.00% 3.920ms 1.307ms 3 + _flash_attn3_48fe103_dirty::fwd 0.99% 47.030us 38.62% 1.826ms 608.525us 2.928ms 100.00% 3.920ms 1.307ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.930ms 100.05% 2.930ms 2.930ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.928ms 100.00% 2.928ms 976.037us 3 + Activity Buffer Request 36.27% 1.715ms 36.27% 1.715ms 1.715ms 991.995us 33.88% 991.995us 991.995us 1 + aten::empty 0.57% 26.980us 0.57% 26.980us 4.497us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.11% 4.990us 0.11% 4.990us 1.663us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.68% 32.070us 0.68% 32.070us 10.690us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 57.30% 2.709ms 57.30% 2.709ms 2.709ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.466ms -Self CUDA time total: 2.892ms +Self CPU time total: 4.728ms +Self CUDA time total: 2.928ms @@ -4226,19 +4008,19 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L384_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 2.68% 125.944us 42.11% 1.982ms 1.982ms 0.000us 0.00% 3.932ms 3.932ms 1 - FlashAttnFunc 1.98% 92.983us 39.44% 1.856ms 618.639us 0.000us 0.00% 3.932ms 1.311ms 3 - _flash_attn3_48fe103_dirty::fwd 1.14% 53.661us 37.46% 1.763ms 587.645us 2.953ms 100.00% 3.932ms 1.311ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.954ms 100.06% 2.954ms 2.954ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.953ms 100.00% 2.953ms 984.176us 3 - Activity Buffer Request 30.48% 1.434ms 30.48% 1.434ms 1.434ms 979.803us 33.19% 979.803us 979.803us 1 - aten::empty 0.58% 27.450us 0.58% 27.450us 4.575us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.11% 5.150us 0.11% 5.150us 1.717us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 5.15% 242.396us 5.15% 242.396us 80.799us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 57.89% 2.724ms 57.89% 2.724ms 2.724ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 2.33% 117.613us 45.39% 2.290ms 2.290ms 0.000us 0.00% 3.984ms 3.984ms 1 + FlashAttnFunc 1.82% 91.609us 43.06% 2.172ms 724.120us 0.000us 0.00% 3.984ms 1.328ms 3 + _flash_attn3_48fe103_dirty::fwd 0.95% 47.941us 41.24% 2.081ms 693.584us 2.967ms 100.00% 3.984ms 1.328ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 2.968ms 100.05% 2.968ms 2.968ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.967ms 100.00% 2.967ms 988.843us 3 + Activity Buffer Request 35.42% 1.787ms 35.42% 1.787ms 1.787ms 1.017ms 34.30% 1.017ms 1.017ms 1 + aten::empty 0.56% 28.180us 0.56% 28.180us 4.697us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.10% 5.080us 0.10% 5.080us 1.693us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 4.21% 212.544us 4.21% 212.544us 70.848us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 54.61% 2.755ms 54.61% 2.755ms 2.755ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.706ms -Self CUDA time total: 2.953ms +Self CPU time total: 5.045ms +Self CUDA time total: 2.967ms @@ -4248,19 +4030,19 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L448_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 2.36% 122.892us 37.59% 1.960ms 1.960ms 0.000us 0.00% 4.622ms 4.622ms 1 - FlashAttnFunc 1.74% 90.533us 35.23% 1.837ms 612.429us 0.000us 0.00% 4.622ms 1.541ms 3 - _flash_attn3_48fe103_dirty::fwd 0.97% 50.750us 33.49% 1.747ms 582.252us 3.470ms 100.00% 4.622ms 1.541ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 3.472ms 100.05% 3.472ms 3.472ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.470ms 100.00% 3.470ms 1.157ms 3 - Activity Buffer Request 27.49% 1.433ms 27.49% 1.433ms 1.433ms 1.152ms 33.20% 1.152ms 1.152ms 1 - aten::empty 0.51% 26.592us 0.51% 26.592us 4.432us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.10% 5.060us 0.10% 5.060us 1.687us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 4.43% 230.856us 4.43% 230.856us 76.952us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 62.41% 3.255ms 62.41% 3.255ms 3.255ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 2.35% 128.980us 39.64% 2.179ms 2.179ms 0.000us 0.00% 4.722ms 4.722ms 1 + FlashAttnFunc 1.64% 90.214us 37.30% 2.050ms 683.484us 0.000us 0.00% 4.722ms 1.574ms 3 + _flash_attn3_48fe103_dirty::fwd 0.87% 47.980us 35.66% 1.960ms 653.413us 3.530ms 100.00% 4.722ms 1.574ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 3.532ms 100.04% 3.532ms 3.532ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.530ms 100.00% 3.530ms 1.177ms 3 + Activity Buffer Request 31.21% 1.716ms 31.21% 1.716ms 1.716ms 1.192ms 33.75% 1.192ms 1.192ms 1 + aten::empty 0.49% 26.830us 0.49% 26.830us 4.472us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.09% 5.100us 0.09% 5.100us 1.700us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 2.99% 164.492us 2.99% 164.492us 54.831us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 60.36% 3.318ms 60.36% 3.318ms 3.318ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.215ms -Self CUDA time total: 3.470ms +Self CPU time total: 5.497ms +Self CUDA time total: 3.530ms @@ -4270,33 +4052,33 @@ PROFILE TRACE: hf_kernels_flash_attn3 | cuda_attn_L512_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_flash_attn3 2.32% 120.892us 37.51% 1.951ms 1.951ms 0.000us 0.00% 4.639ms 4.639ms 1 - FlashAttnFunc 1.74% 90.773us 35.18% 1.830ms 610.133us 0.000us 0.00% 4.639ms 1.546ms 3 - _flash_attn3_48fe103_dirty::fwd 0.99% 51.351us 33.44% 1.740ms 579.875us 3.468ms 100.00% 4.639ms 1.546ms 3 - hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 3.469ms 100.05% 3.469ms 3.469ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.468ms 100.00% 3.468ms 1.156ms 3 - Activity Buffer Request 27.26% 1.418ms 27.26% 1.418ms 1.418ms 1.172ms 33.79% 1.172ms 1.172ms 1 - aten::empty 0.51% 26.560us 0.51% 26.560us 4.427us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.10% 5.101us 0.10% 5.101us 1.700us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 4.58% 238.367us 4.58% 238.367us 79.456us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 62.49% 3.251ms 62.49% 3.251ms 3.251ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_flash_attn3 2.16% 118.543us 39.14% 2.150ms 2.150ms 0.000us 0.00% 4.736ms 4.736ms 1 + FlashAttnFunc 1.66% 91.361us 36.98% 2.032ms 677.186us 0.000us 0.00% 4.736ms 1.579ms 3 + _flash_attn3_48fe103_dirty::fwd 0.85% 46.593us 35.32% 1.940ms 646.733us 3.555ms 100.00% 4.736ms 1.579ms 3 + hf_kernels_flash_attn3 0.00% 0.000us 0.00% 0.000us 0.000us 3.556ms 100.04% 3.556ms 3.556ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.555ms 100.00% 3.555ms 1.185ms 3 + Activity Buffer Request 30.64% 1.683ms 30.64% 1.683ms 1.683ms 1.181ms 33.22% 1.181ms 1.181ms 1 + aten::empty 0.50% 27.560us 0.50% 27.560us 4.593us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.09% 5.069us 0.09% 5.069us 1.690us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 3.23% 177.672us 3.23% 177.672us 59.224us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 60.86% 3.344ms 60.86% 3.344ms 3.344ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.202ms -Self CUDA time total: 3.468ms +Self CPU time total: 5.494ms +Self CUDA time total: 3.555ms impl wl p50(ms) ok -hf_kernels_flash_attn3 cuda_attn_L128_bfloat16 0.92 True -hf_kernels_flash_attn3 cuda_attn_L256_bfloat16 0.96 True -hf_kernels_flash_attn3 cuda_attn_L320_bfloat16 1.01 True -hf_kernels_flash_attn3 cuda_attn_L384_bfloat16 1.03 True -hf_kernels_flash_attn3 cuda_attn_L448_bfloat16 1.20 True -hf_kernels_flash_attn3 cuda_attn_L512_bfloat16 1.18 True +hf_kernels_flash_attn3 cuda_attn_L128_bfloat16 0.94 True +hf_kernels_flash_attn3 cuda_attn_L256_bfloat16 0.98 True +hf_kernels_flash_attn3 cuda_attn_L320_bfloat16 1.05 True +hf_kernels_flash_attn3 cuda_attn_L384_bfloat16 1.04 True +hf_kernels_flash_attn3 cuda_attn_L448_bfloat16 1.22 True +hf_kernels_flash_attn3 cuda_attn_L512_bfloat16 1.21 True
Fetching 4 files: 0%| | 0/4 [00:00<?, ?it/s] -Fetching 4 files: 50%|█████ | 2/4 [00:01<00:01, 1.42it/s] -Fetching 4 files: 100%|██████████| 4/4 [00:01<00:00, 2.84it/s] +Fetching 4 files: 50%|█████ | 2/4 [00:01<00:01, 1.28it/s] +Fetching 4 files: 100%|██████████| 4/4 [00:01<00:00, 2.57it/s]

Artifacts:

diff --git a/flash_attn/impls/mem_efficient_attention.html b/flash_attn/impls/mem_efficient_attention.html index e6d938b9f4ce572baa96778a2f0d11d329ead530..f5f07241a84247a93bbab4ba9cdd19d066182d82 100644 --- a/flash_attn/impls/mem_efficient_attention.html +++ b/flash_attn/impls/mem_efficient_attention.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: benchmark | 3.94s +Cell: benchmark | 4.18s | Raw @@ -4159,28 +3941,28 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L128_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 5.14% 365.276us 32.53% 2.313ms 2.313ms 0.000us 0.00% 5.511ms 5.511ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 5.492ms 100.58% 5.492ms 5.492ms 1 - aten::scaled_dot_product_attention 0.43% 30.401us 2.47% 175.534us 58.511us 0.000us 0.00% 4.841ms 1.614ms 3 - aten::_scaled_dot_product_efficient_attention 0.33% 23.489us 2.04% 145.133us 48.378us 0.000us 0.00% 4.841ms 1.614ms 3 - aten::_efficient_attention_forward 0.51% 36.572us 1.40% 99.733us 33.244us 4.841ms 88.65% 4.841ms 1.614ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 4.841ms 88.65% 4.841ms 1.614ms 3 - aten::contiguous 0.18% 12.851us 23.99% 1.706ms 189.523us 0.000us 0.00% 670.241us 74.471us 9 - aten::clone 0.46% 32.742us 23.80% 1.693ms 188.095us 0.000us 0.00% 670.241us 74.471us 9 - aten::copy_ 1.05% 74.801us 22.33% 1.588ms 176.415us 619.776us 11.35% 670.241us 74.471us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 619.776us 11.35% 619.776us 68.864us 9 - Activity Buffer Request 20.17% 1.434ms 20.17% 1.434ms 1.434ms 50.465us 0.92% 50.465us 50.465us 1 - aten::transpose 0.93% 66.224us 1.25% 88.644us 3.693us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.32% 22.420us 0.32% 22.420us 0.934us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.25% 17.919us 1.02% 72.382us 8.042us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 1.14% 81.114us 1.14% 81.114us 3.863us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 1.46% 103.973us 1.46% 103.973us 8.664us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.04% 2.960us 0.04% 2.960us 0.987us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.12% 8.310us 0.12% 8.310us 2.770us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 67.47% 4.798ms 67.47% 4.798ms 4.798ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 4.45% 324.566us 35.26% 2.573ms 2.573ms 0.000us 0.00% 5.439ms 5.439ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 5.406ms 100.38% 5.406ms 5.406ms 1 + aten::scaled_dot_product_attention 0.42% 30.389us 2.31% 168.211us 56.070us 0.000us 0.00% 4.771ms 1.590ms 3 + aten::_scaled_dot_product_efficient_attention 0.30% 21.751us 1.89% 137.822us 45.941us 0.000us 0.00% 4.771ms 1.590ms 3 + aten::_efficient_attention_forward 0.46% 33.370us 1.30% 95.011us 31.670us 4.771ms 88.58% 4.771ms 1.590ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 4.771ms 88.58% 4.771ms 1.590ms 3 + aten::contiguous 0.14% 10.493us 27.68% 2.020ms 224.395us 0.000us 0.00% 668.482us 74.276us 9 + aten::clone 0.39% 28.130us 27.53% 2.009ms 223.229us 0.000us 0.00% 668.482us 74.276us 9 + aten::copy_ 1.01% 73.701us 26.23% 1.914ms 212.678us 614.946us 11.42% 668.482us 74.276us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 614.946us 11.42% 614.946us 68.327us 9 + Activity Buffer Request 24.11% 1.759ms 24.11% 1.759ms 1.759ms 53.536us 0.99% 53.536us 53.536us 1 + aten::transpose 0.83% 60.400us 1.12% 81.609us 3.400us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.29% 21.209us 0.29% 21.209us 0.884us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.20% 14.439us 0.92% 66.830us 7.426us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 1.09% 79.191us 1.09% 79.191us 3.771us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 1.43% 104.332us 1.43% 104.332us 8.694us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.04% 3.220us 0.04% 3.220us 1.073us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.12% 8.781us 0.12% 8.781us 2.927us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 64.74% 4.724ms 64.74% 4.724ms 4.724ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 7.111ms -Self CUDA time total: 5.460ms +Self CPU time total: 7.297ms +Self CUDA time total: 5.386ms @@ -4190,28 +3972,28 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L256_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 3.28% 242.746us 28.00% 2.075ms 2.075ms 0.000us 0.00% 5.933ms 5.933ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 5.886ms 100.14% 5.886ms 5.886ms 1 - aten::scaled_dot_product_attention 0.25% 18.240us 1.89% 140.073us 46.691us 0.000us 0.00% 5.241ms 1.747ms 3 - aten::_scaled_dot_product_efficient_attention 0.25% 18.689us 1.64% 121.833us 40.611us 0.000us 0.00% 5.241ms 1.747ms 3 - aten::_efficient_attention_forward 0.38% 28.462us 1.09% 81.063us 27.021us 5.241ms 89.17% 5.241ms 1.747ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.241ms 89.17% 5.241ms 1.747ms 3 - aten::contiguous 0.10% 7.041us 22.26% 1.650ms 183.285us 0.000us 0.00% 691.103us 76.789us 9 - aten::clone 0.29% 21.342us 22.17% 1.643ms 182.503us 0.000us 0.00% 691.103us 76.789us 9 - aten::copy_ 0.86% 63.451us 21.24% 1.574ms 174.872us 636.671us 10.83% 691.103us 76.789us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 636.671us 10.83% 636.671us 70.741us 9 - Activity Buffer Request 19.50% 1.445ms 19.50% 1.445ms 1.445ms 54.432us 0.93% 54.432us 54.432us 1 - aten::transpose 0.64% 47.650us 0.87% 64.701us 2.696us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.23% 17.051us 0.23% 17.051us 0.710us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.16% 11.589us 0.64% 47.330us 5.259us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 0.82% 60.521us 0.82% 60.521us 2.882us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 1.19% 88.044us 1.19% 88.044us 7.337us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.03% 2.420us 0.03% 2.420us 0.807us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.04% 3.030us 0.04% 3.030us 1.010us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 72.00% 5.335ms 72.00% 5.335ms 5.335ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 3.09% 234.954us 31.65% 2.404ms 2.404ms 0.000us 0.00% 5.782ms 5.782ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 5.735ms 100.14% 5.735ms 5.735ms 1 + aten::scaled_dot_product_attention 0.22% 16.961us 1.81% 137.382us 45.794us 0.000us 0.00% 5.091ms 1.697ms 3 + aten::_scaled_dot_product_efficient_attention 0.25% 19.139us 1.59% 120.421us 40.140us 0.000us 0.00% 5.091ms 1.697ms 3 + aten::_efficient_attention_forward 0.36% 27.009us 1.04% 78.740us 26.247us 5.091ms 88.89% 5.091ms 1.697ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.091ms 88.89% 5.091ms 1.697ms 3 + aten::contiguous 0.11% 8.479us 26.21% 1.991ms 221.170us 0.000us 0.00% 690.720us 76.747us 9 + aten::clone 0.29% 22.002us 26.10% 1.982ms 220.228us 0.000us 0.00% 690.720us 76.747us 9 + aten::copy_ 0.83% 62.671us 25.16% 1.911ms 212.305us 636.032us 11.11% 690.720us 76.747us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 636.032us 11.11% 636.032us 70.670us 9 + Activity Buffer Request 23.48% 1.783ms 23.48% 1.783ms 1.783ms 54.688us 0.95% 54.688us 54.688us 1 + aten::transpose 0.64% 48.410us 0.84% 63.823us 2.659us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.20% 15.413us 0.20% 15.413us 0.642us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.15% 11.729us 0.65% 49.301us 5.478us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 0.82% 62.552us 0.82% 62.552us 2.979us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 1.14% 86.431us 1.14% 86.431us 7.203us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.03% 2.280us 0.03% 2.280us 0.760us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.04% 2.990us 0.04% 2.990us 0.997us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 68.35% 5.191ms 68.35% 5.191ms 5.191ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 7.410ms -Self CUDA time total: 5.878ms +Self CPU time total: 7.595ms +Self CUDA time total: 5.727ms @@ -4221,28 +4003,28 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L320_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 3.21% 244.055us 27.47% 2.092ms 2.092ms 0.000us 0.00% 6.130ms 6.130ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.080ms 100.14% 6.080ms 6.080ms 1 - aten::scaled_dot_product_attention 0.23% 17.641us 1.86% 141.944us 47.315us 0.000us 0.00% 5.414ms 1.805ms 3 - aten::_scaled_dot_product_efficient_attention 0.25% 19.359us 1.63% 124.303us 41.434us 0.000us 0.00% 5.414ms 1.805ms 3 - aten::_efficient_attention_forward 0.37% 28.219us 1.06% 80.592us 26.864us 5.414ms 89.17% 5.414ms 1.805ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.414ms 89.17% 5.414ms 1.805ms 3 - aten::contiguous 0.11% 8.060us 21.81% 1.661ms 184.510us 0.000us 0.00% 716.192us 79.577us 9 - aten::clone 0.29% 22.431us 21.70% 1.653ms 183.615us 0.000us 0.00% 716.192us 79.577us 9 - aten::copy_ 0.81% 61.641us 20.75% 1.580ms 175.564us 657.728us 10.83% 716.192us 79.577us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 657.728us 10.83% 657.728us 73.081us 9 - Activity Buffer Request 19.08% 1.453ms 19.08% 1.453ms 1.453ms 58.464us 0.96% 58.464us 58.464us 1 - aten::transpose 0.69% 52.203us 0.92% 69.763us 2.907us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.23% 17.560us 0.23% 17.560us 0.732us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.15% 11.581us 0.66% 50.023us 5.558us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 0.84% 63.785us 0.84% 63.785us 3.037us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 1.14% 86.832us 1.14% 86.832us 7.236us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.03% 2.250us 0.03% 2.250us 0.750us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.04% 3.260us 0.04% 3.260us 1.087us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 72.53% 5.522ms 72.53% 5.522ms 5.522ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 3.06% 239.384us 30.93% 2.420ms 2.420ms 0.000us 0.00% 5.994ms 5.994ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 5.947ms 100.14% 5.947ms 5.947ms 1 + aten::scaled_dot_product_attention 0.22% 17.549us 1.74% 135.892us 45.297us 0.000us 0.00% 5.295ms 1.765ms 3 + aten::_scaled_dot_product_efficient_attention 0.23% 18.333us 1.51% 118.343us 39.448us 0.000us 0.00% 5.295ms 1.765ms 3 + aten::_efficient_attention_forward 0.35% 27.055us 1.01% 79.012us 26.337us 5.295ms 89.16% 5.295ms 1.765ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.295ms 89.16% 5.295ms 1.765ms 3 + aten::contiguous 0.10% 7.948us 25.59% 2.002ms 222.464us 0.000us 0.00% 699.457us 77.717us 9 + aten::clone 0.26% 20.152us 25.49% 1.994ms 221.581us 0.000us 0.00% 699.457us 77.717us 9 + aten::copy_ 0.79% 62.172us 24.60% 1.924ms 213.808us 643.713us 10.84% 699.457us 77.717us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 643.713us 10.84% 643.713us 71.524us 9 + Activity Buffer Request 22.96% 1.796ms 22.96% 1.796ms 1.796ms 55.744us 0.94% 55.744us 55.744us 1 + aten::transpose 0.61% 48.091us 0.81% 63.198us 2.633us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.19% 15.107us 0.19% 15.107us 0.629us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.14% 11.152us 0.64% 49.811us 5.535us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 0.80% 62.567us 0.80% 62.567us 2.979us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 1.12% 87.709us 1.12% 87.709us 7.309us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.03% 2.429us 0.03% 2.429us 0.810us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.05% 3.800us 0.05% 3.800us 1.267us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 69.07% 5.404ms 69.07% 5.404ms 5.404ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 7.614ms -Self CUDA time total: 6.072ms +Self CPU time total: 7.823ms +Self CUDA time total: 5.939ms @@ -4252,28 +4034,28 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L384_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 3.16% 248.365us 29.29% 2.300ms 2.300ms 0.000us 0.00% 6.163ms 6.163ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.114ms 100.14% 6.114ms 6.114ms 1 - aten::scaled_dot_product_attention 0.24% 19.232us 1.82% 142.774us 47.591us 0.000us 0.00% 5.452ms 1.817ms 3 - aten::_scaled_dot_product_efficient_attention 0.25% 19.461us 1.57% 123.542us 41.181us 0.000us 0.00% 5.452ms 1.817ms 3 - aten::_efficient_attention_forward 0.37% 29.029us 1.03% 80.672us 26.891us 5.452ms 89.29% 5.452ms 1.817ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.452ms 89.29% 5.452ms 1.817ms 3 - aten::contiguous 0.10% 7.931us 23.78% 1.867ms 207.435us 0.000us 0.00% 711.072us 79.008us 9 - aten::clone 0.30% 23.532us 23.68% 1.859ms 206.554us 0.000us 0.00% 711.072us 79.008us 9 - aten::copy_ 0.81% 63.779us 22.73% 1.785ms 198.306us 653.792us 10.71% 711.072us 79.008us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 653.792us 10.71% 653.792us 72.644us 9 - Activity Buffer Request 18.59% 1.459ms 18.59% 1.459ms 1.459ms 57.280us 0.94% 57.280us 57.280us 1 - aten::transpose 0.62% 48.610us 0.83% 65.130us 2.714us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.21% 16.520us 0.21% 16.520us 0.688us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.16% 12.281us 0.65% 50.702us 5.634us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 0.80% 62.502us 0.80% 62.502us 2.976us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 3.60% 282.729us 3.60% 282.729us 23.561us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.03% 2.471us 0.03% 2.471us 0.824us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.05% 4.120us 0.05% 4.120us 1.373us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 70.71% 5.551ms 70.71% 5.551ms 5.551ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 3.00% 242.264us 30.89% 2.499ms 2.499ms 0.000us 0.00% 6.191ms 6.191ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.141ms 100.14% 6.141ms 6.141ms 1 + aten::scaled_dot_product_attention 0.23% 18.320us 1.69% 136.812us 45.604us 0.000us 0.00% 5.471ms 1.824ms 3 + aten::_scaled_dot_product_efficient_attention 0.23% 18.630us 1.46% 118.492us 39.497us 0.000us 0.00% 5.471ms 1.824ms 3 + aten::_efficient_attention_forward 0.33% 26.674us 0.96% 77.952us 25.984us 5.471ms 89.22% 5.471ms 1.824ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.471ms 89.22% 5.471ms 1.824ms 3 + aten::contiguous 0.10% 8.440us 25.67% 2.076ms 230.653us 0.000us 0.00% 719.363us 79.929us 9 + aten::clone 0.28% 22.639us 25.56% 2.067ms 229.716us 0.000us 0.00% 719.363us 79.929us 9 + aten::copy_ 0.78% 63.183us 24.67% 1.995ms 221.702us 660.931us 10.78% 719.363us 79.929us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 660.931us 10.78% 660.931us 73.437us 9 + Activity Buffer Request 21.08% 1.705ms 21.08% 1.705ms 1.705ms 58.432us 0.95% 58.432us 58.432us 1 + aten::transpose 0.61% 49.449us 0.81% 65.670us 2.736us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.20% 16.221us 0.20% 16.221us 0.676us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.15% 11.742us 0.61% 49.481us 5.498us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 0.77% 62.526us 0.77% 62.526us 2.977us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 3.07% 248.624us 3.07% 248.624us 20.719us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.03% 2.250us 0.03% 2.250us 0.750us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.04% 3.020us 0.04% 3.020us 1.007us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 69.11% 5.590ms 69.11% 5.590ms 5.590ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 7.851ms -Self CUDA time total: 6.106ms +Self CPU time total: 8.088ms +Self CUDA time total: 6.132ms @@ -4283,28 +4065,28 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L448_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 3.01% 243.675us 28.03% 2.272ms 2.272ms 0.000us 0.00% 6.451ms 6.451ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.399ms 100.13% 6.399ms 6.399ms 1 - aten::scaled_dot_product_attention 0.23% 18.671us 1.77% 143.224us 47.741us 0.000us 0.00% 5.726ms 1.909ms 3 - aten::_scaled_dot_product_efficient_attention 0.24% 19.652us 1.54% 124.553us 41.518us 0.000us 0.00% 5.726ms 1.909ms 3 - aten::_efficient_attention_forward 0.35% 28.317us 0.99% 80.642us 26.881us 5.726ms 89.60% 5.726ms 1.909ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.726ms 89.60% 5.726ms 1.909ms 3 - aten::contiguous 0.10% 7.791us 22.70% 1.840ms 204.460us 0.000us 0.00% 725.025us 80.558us 9 - aten::clone 0.29% 23.489us 22.61% 1.832ms 203.594us 0.000us 0.00% 725.025us 80.558us 9 - aten::copy_ 0.81% 65.293us 21.68% 1.757ms 195.223us 664.641us 10.40% 725.025us 80.558us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 664.641us 10.40% 664.641us 73.849us 9 - Activity Buffer Request 17.77% 1.440ms 17.77% 1.440ms 1.440ms 60.384us 0.94% 60.384us 60.384us 1 - aten::transpose 0.63% 51.151us 0.85% 69.251us 2.885us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.22% 18.100us 0.22% 18.100us 0.754us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.15% 11.960us 0.64% 51.852us 5.761us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 0.79% 64.314us 0.79% 64.314us 3.063us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 3.36% 272.117us 3.36% 272.117us 22.676us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.03% 2.500us 0.03% 2.500us 0.833us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.06% 4.532us 0.06% 4.532us 1.511us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 71.97% 5.833ms 71.97% 5.833ms 5.833ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 2.96% 243.644us 31.20% 2.571ms 2.571ms 0.000us 0.00% 6.270ms 6.270ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.220ms 100.13% 6.220ms 6.220ms 1 + aten::scaled_dot_product_attention 0.22% 18.340us 1.66% 136.411us 45.470us 0.000us 0.00% 5.544ms 1.848ms 3 + aten::_scaled_dot_product_efficient_attention 0.23% 18.620us 1.43% 118.071us 39.357us 0.000us 0.00% 5.544ms 1.848ms 3 + aten::_efficient_attention_forward 0.33% 26.920us 0.94% 77.841us 25.947us 5.544ms 89.24% 5.544ms 1.848ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.544ms 89.24% 5.544ms 1.848ms 3 + aten::contiguous 0.10% 8.441us 26.08% 2.149ms 238.754us 0.000us 0.00% 726.626us 80.736us 9 + aten::clone 0.27% 22.559us 25.98% 2.140ms 237.816us 0.000us 0.00% 726.626us 80.736us 9 + aten::copy_ 0.77% 63.181us 25.09% 2.068ms 229.736us 668.130us 10.76% 726.626us 80.736us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 668.130us 10.76% 668.130us 74.237us 9 + Activity Buffer Request 21.61% 1.780ms 21.61% 1.780ms 1.780ms 58.496us 0.94% 58.496us 58.496us 1 + aten::transpose 0.58% 47.889us 0.77% 63.801us 2.658us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.19% 15.912us 0.19% 15.912us 0.663us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.14% 11.871us 0.61% 50.162us 5.574us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 0.75% 62.051us 0.75% 62.051us 2.955us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 2.98% 245.563us 2.98% 245.563us 20.464us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.03% 2.280us 0.03% 2.280us 0.760us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.04% 3.301us 0.04% 3.301us 1.100us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 68.80% 5.669ms 68.80% 5.669ms 5.669ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 8.105ms -Self CUDA time total: 6.391ms +Self CPU time total: 8.240ms +Self CUDA time total: 6.212ms @@ -4314,37 +4096,37 @@ PROFILE TRACE: torch_mem_eff | cuda_attn_L512_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - torch_mem_eff 2.88% 242.135us 27.00% 2.269ms 2.269ms 0.000us 0.00% 6.759ms 6.759ms 1 - torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.705ms 100.12% 6.705ms 6.705ms 1 - aten::scaled_dot_product_attention 0.21% 17.851us 1.72% 144.884us 48.295us 0.000us 0.00% 6.024ms 2.008ms 3 - aten::_scaled_dot_product_efficient_attention 0.23% 19.591us 1.51% 127.033us 42.344us 0.000us 0.00% 6.024ms 2.008ms 3 - aten::_efficient_attention_forward 0.34% 28.520us 0.97% 81.532us 27.177us 6.024ms 89.96% 6.024ms 2.008ms 3 -fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 6.024ms 89.96% 6.024ms 2.008ms 3 - aten::contiguous 0.10% 8.099us 21.87% 1.838ms 204.242us 0.000us 0.00% 734.178us 81.575us 9 - aten::clone 0.28% 23.122us 21.78% 1.830ms 203.342us 0.000us 0.00% 734.178us 81.575us 9 - aten::copy_ 0.74% 62.180us 20.86% 1.753ms 194.799us 672.322us 10.04% 734.178us 81.575us 9 -void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 672.322us 10.04% 672.322us 74.702us 9 - Activity Buffer Request 17.19% 1.445ms 17.19% 1.445ms 1.445ms 61.856us 0.92% 61.856us 61.856us 1 - aten::transpose 0.62% 52.351us 0.83% 70.022us 2.918us 0.000us 0.00% 0.000us 0.000us 24 - aten::as_strided 0.21% 17.671us 0.21% 17.671us 0.736us 0.000us 0.00% 0.000us 0.000us 24 - aten::empty_like 0.15% 12.653us 0.64% 53.763us 5.974us 0.000us 0.00% 0.000us 0.000us 9 - aten::empty 0.79% 66.761us 0.79% 66.761us 3.179us 0.000us 0.00% 0.000us 0.000us 21 - cudaLaunchKernel 3.19% 267.907us 3.19% 267.907us 22.326us 0.000us 0.00% 0.000us 0.000us 12 - cudaStreamIsCapturing 0.03% 2.430us 0.03% 2.430us 0.810us 0.000us 0.00% 0.000us 0.000us 3 - cudaFuncSetAttribute 0.04% 3.350us 0.04% 3.350us 1.117us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 73.00% 6.134ms 73.00% 6.134ms 6.134ms 0.000us 0.00% 0.000us 0.000us 1 + torch_mem_eff 2.78% 238.352us 29.12% 2.495ms 2.495ms 0.000us 0.00% 6.680ms 6.680ms 1 + torch_mem_eff 0.00% 0.000us 0.00% 0.000us 0.000us 6.628ms 100.13% 6.628ms 6.628ms 1 + aten::scaled_dot_product_attention 0.31% 26.242us 1.71% 146.743us 48.914us 0.000us 0.00% 5.945ms 1.982ms 3 + aten::_scaled_dot_product_efficient_attention 0.23% 19.839us 1.41% 120.501us 40.167us 0.000us 0.00% 5.945ms 1.982ms 3 + aten::_efficient_attention_forward 0.31% 26.859us 0.92% 78.900us 26.300us 5.945ms 89.80% 5.945ms 1.982ms 3 +fmha_cutlassF_bf16_aligned_64x128_rf_sm80(PyTorchMem... 0.00% 0.000us 0.00% 0.000us 0.000us 5.945ms 89.80% 5.945ms 1.982ms 3 + aten::contiguous 0.09% 7.528us 24.13% 2.068ms 229.726us 0.000us 0.00% 735.685us 81.743us 9 + aten::clone 0.24% 20.962us 24.04% 2.060ms 228.889us 0.000us 0.00% 735.685us 81.743us 9 + aten::copy_ 0.75% 64.071us 23.20% 1.988ms 220.897us 675.044us 10.20% 735.685us 81.743us 9 +void at::native::elementwise_kernel<128, 4, at::nati... 0.00% 0.000us 0.00% 0.000us 0.000us 675.044us 10.20% 675.044us 75.005us 9 + Activity Buffer Request 19.86% 1.702ms 19.86% 1.702ms 1.702ms 60.641us 0.92% 60.641us 60.641us 1 + aten::transpose 0.56% 47.940us 0.74% 63.783us 2.658us 0.000us 0.00% 0.000us 0.000us 24 + aten::as_strided 0.18% 15.843us 0.18% 15.843us 0.660us 0.000us 0.00% 0.000us 0.000us 24 + aten::empty_like 0.13% 11.513us 0.59% 50.972us 5.664us 0.000us 0.00% 0.000us 0.000us 9 + aten::empty 0.75% 64.430us 0.75% 64.430us 3.068us 0.000us 0.00% 0.000us 0.000us 21 + cudaLaunchKernel 2.85% 243.883us 2.85% 243.883us 20.324us 0.000us 0.00% 0.000us 0.000us 12 + cudaStreamIsCapturing 0.03% 2.530us 0.03% 2.530us 0.843us 0.000us 0.00% 0.000us 0.000us 3 + cudaFuncSetAttribute 0.04% 3.050us 0.04% 3.050us 1.017us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 70.88% 6.073ms 70.88% 6.073ms 6.073ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 8.404ms -Self CUDA time total: 6.697ms +Self CPU time total: 8.568ms +Self CUDA time total: 6.620ms impl wl p50(ms) ok -torch_mem_eff cuda_attn_L128_bfloat16 1.85 True -torch_mem_eff cuda_attn_L256_bfloat16 1.95 True -torch_mem_eff cuda_attn_L320_bfloat16 1.99 True -torch_mem_eff cuda_attn_L384_bfloat16 2.07 True -torch_mem_eff cuda_attn_L448_bfloat16 2.06 True -torch_mem_eff cuda_attn_L512_bfloat16 2.25 True +torch_mem_eff cuda_attn_L128_bfloat16 1.83 True +torch_mem_eff cuda_attn_L256_bfloat16 1.94 True +torch_mem_eff cuda_attn_L320_bfloat16 1.96 True +torch_mem_eff cuda_attn_L384_bfloat16 2.03 True +torch_mem_eff cuda_attn_L448_bfloat16 2.02 True +torch_mem_eff cuda_attn_L512_bfloat16 2.23 True

Artifacts:

diff --git a/flash_attn/impls/sage_attention.html b/flash_attn/impls/sage_attention.html index c964f0f922939bcdffdf70f7e986e24de2938dac..6358ec975b53c371d4ab1abb8760cb58631b1257 100644 --- a/flash_attn/impls/sage_attention.html +++ b/flash_attn/impls/sage_attention.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4104,9 +3886,9 @@ body[data-tool="eraser"] .main-content { ▼ code ▼ output - ▶ uv-logs + ▶ uv-logs | -Cell: benchmark | 4.69s +Cell: benchmark | 4.59s | Raw @@ -4156,27 +3938,23 @@ Cell: benchmark | 4.69s
Running attention benchmark on cuda with 6 workloads.
 impl                     wl                  p50(ms)  ok
 sage_int8_fp16           cuda_attn_L128_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 sage_int8_fp16           cuda_attn_L256_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 sage_int8_fp16           cuda_attn_L320_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 sage_int8_fp16           cuda_attn_L384_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 sage_int8_fp16           cuda_attn_L448_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 sage_int8_fp16           cuda_attn_L512_bfloat16    FAIL  False
-  Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd'
+  Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd'
 
-
-
▶ UV Install Logs
- +
+Fetching 11 files: 0%| | 0/11 [00:00<?, ?it/s] +Fetching 11 files: 73%|███████▎ | 8/11 [00:00<00:00, 11.98it/s] +Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 16.47it/s]
-
Fetching 11 files: 0%| | 0/11 [00:00<?, ?it/s] -Fetching 11 files: 73%|███████▎ | 8/11 [00:00<00:00, 11.73it/s] -Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 16.12it/s]

Artifacts:

attention.jsonl diff --git a/flash_attn/impls/xformers.html b/flash_attn/impls/xformers.html index 3e1c781413a91f403396426a1c99ea9ec7673187..091690152ddc57b2216a17d1440a96b4ad207eff 100644 --- a/flash_attn/impls/xformers.html +++ b/flash_attn/impls/xformers.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: benchmark | 33.71s +Cell: benchmark | 5.74s | Raw @@ -4158,21 +3940,21 @@ PROFILE TRACE: xformers_meff | cuda_attn_L128_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 10.98% 488.134us 52.82% 2.349ms 2.349ms 0.000us 0.00% 3.539ms 3.539ms 1 - xformers_flash3::flash_fwd 4.45% 198.034us 41.02% 1.824ms 608.009us 0.000us 0.00% 3.539ms 1.180ms 3 - flash_attn_3::fwd 1.81% 80.354us 36.57% 1.626ms 541.997us 2.647ms 100.00% 3.539ms 1.180ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.648ms 100.06% 2.648ms 2.648ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.647ms 100.00% 2.647ms 882.203us 3 - Activity Buffer Request 32.65% 1.452ms 32.65% 1.452ms 1.452ms 892.891us 33.74% 892.891us 892.891us 1 - aten::empty 0.78% 34.470us 0.78% 34.470us 5.745us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.26% 11.370us 0.26% 11.370us 3.790us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 1.08% 47.851us 1.08% 47.851us 15.950us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.28% 12.261us 0.82% 36.420us 6.070us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.54% 24.159us 0.54% 24.159us 4.026us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 47.18% 2.098ms 47.18% 2.098ms 2.098ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 9.51% 455.697us 55.56% 2.663ms 2.663ms 0.000us 0.00% 3.558ms 3.558ms 1 + xformers_flash3::flash_fwd 4.08% 195.443us 45.35% 2.174ms 724.544us 0.000us 0.00% 3.558ms 1.186ms 3 + flash_attn_3::fwd 1.49% 71.640us 41.28% 1.978ms 659.396us 2.651ms 100.00% 3.558ms 1.186ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.653ms 100.06% 2.653ms 2.653ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.651ms 100.00% 2.651ms 883.711us 3 + Activity Buffer Request 37.88% 1.816ms 37.88% 1.816ms 1.816ms 906.719us 34.20% 906.719us 906.719us 1 + aten::empty 0.75% 35.911us 0.75% 35.911us 5.985us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.26% 12.331us 0.26% 12.331us 4.110us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.89% 42.730us 0.89% 42.730us 14.243us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.24% 11.531us 0.69% 33.171us 5.529us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.45% 21.640us 0.45% 21.640us 3.607us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 44.44% 2.130ms 44.44% 2.130ms 2.130ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.447ms -Self CUDA time total: 2.647ms +Self CPU time total: 4.793ms +Self CUDA time total: 2.651ms @@ -4182,21 +3964,21 @@ PROFILE TRACE: xformers_meff | cuda_attn_L256_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 7.22% 318.208us 46.97% 2.070ms 2.070ms 0.000us 0.00% 3.700ms 3.700ms 1 - xformers_flash3::flash_fwd 3.33% 146.973us 39.20% 1.728ms 575.898us 0.000us 0.00% 3.700ms 1.233ms 3 - flash_attn_3::fwd 1.20% 53.004us 35.87% 1.581ms 526.907us 2.767ms 100.00% 3.700ms 1.233ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.769ms 100.05% 2.769ms 2.769ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.767ms 100.00% 2.767ms 922.499us 3 - Activity Buffer Request 33.12% 1.459ms 33.12% 1.459ms 1.459ms 932.857us 33.71% 932.857us 932.857us 1 - aten::empty 0.65% 28.790us 0.65% 28.790us 4.798us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.13% 5.860us 0.13% 5.860us 1.953us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.76% 33.580us 0.76% 33.580us 11.193us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.21% 9.291us 0.54% 23.901us 3.983us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.33% 14.610us 0.33% 14.610us 2.435us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 53.03% 2.337ms 53.03% 2.337ms 2.337ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 6.26% 307.825us 49.96% 2.457ms 2.457ms 0.000us 0.00% 3.857ms 3.857ms 1 + xformers_flash3::flash_fwd 2.96% 145.722us 43.25% 2.127ms 708.950us 0.000us 0.00% 3.857ms 1.286ms 3 + flash_attn_3::fwd 1.03% 50.571us 40.29% 1.981ms 660.376us 2.878ms 100.00% 3.857ms 1.286ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.879ms 100.06% 2.879ms 2.879ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.878ms 100.00% 2.878ms 959.213us 3 + Activity Buffer Request 37.86% 1.862ms 37.86% 1.862ms 1.862ms 979.202us 34.03% 979.202us 979.202us 1 + aten::empty 0.61% 29.881us 0.61% 29.881us 4.980us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.11% 5.570us 0.11% 5.570us 1.857us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.67% 33.080us 0.67% 33.080us 11.027us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.18% 8.899us 0.46% 22.400us 3.733us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.27% 13.501us 0.27% 13.501us 2.250us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 50.04% 2.461ms 50.04% 2.461ms 2.461ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.407ms -Self CUDA time total: 2.767ms +Self CPU time total: 4.918ms +Self CUDA time total: 2.878ms @@ -4206,21 +3988,21 @@ PROFILE TRACE: xformers_meff | cuda_attn_L320_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 6.87% 306.279us 45.67% 2.036ms 2.036ms 0.000us 0.00% 3.803ms 3.803ms 1 - xformers_flash3::flash_fwd 3.28% 146.193us 38.29% 1.707ms 568.871us 0.000us 0.00% 3.803ms 1.268ms 3 - flash_attn_3::fwd 1.22% 54.360us 35.01% 1.560ms 520.140us 2.841ms 100.00% 3.803ms 1.268ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.843ms 100.05% 2.843ms 2.843ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.841ms 100.00% 2.841ms 947.064us 3 - Activity Buffer Request 32.21% 1.435ms 32.21% 1.435ms 1.435ms 961.848us 33.85% 961.848us 961.848us 1 - aten::empty 0.68% 30.200us 0.68% 30.200us 5.033us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.12% 5.560us 0.12% 5.560us 1.853us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.78% 34.863us 0.78% 34.863us 11.621us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.20% 8.808us 0.51% 22.610us 3.768us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.31% 13.802us 0.31% 13.802us 2.300us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 54.33% 2.422ms 54.33% 2.422ms 2.422ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 6.21% 306.054us 48.92% 2.410ms 2.410ms 0.000us 0.00% 3.933ms 3.933ms 1 + xformers_flash3::flash_fwd 2.99% 147.392us 42.27% 2.082ms 693.957us 0.000us 0.00% 3.933ms 1.311ms 3 + flash_attn_3::fwd 1.07% 52.480us 39.27% 1.934ms 644.826us 2.941ms 100.00% 3.933ms 1.311ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.942ms 100.05% 2.942ms 2.942ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.941ms 100.00% 2.941ms 980.234us 3 + Activity Buffer Request 36.76% 1.811ms 36.76% 1.811ms 1.811ms 991.807us 33.73% 991.807us 991.807us 1 + aten::empty 0.60% 29.531us 0.60% 29.531us 4.922us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.13% 6.550us 0.13% 6.550us 2.183us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.71% 35.120us 0.71% 35.120us 11.707us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.17% 8.281us 0.44% 21.831us 3.638us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.28% 13.550us 0.28% 13.550us 2.258us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 51.08% 2.516ms 51.08% 2.516ms 2.516ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.457ms -Self CUDA time total: 2.841ms +Self CPU time total: 4.926ms +Self CUDA time total: 2.941ms @@ -4230,21 +4012,21 @@ PROFILE TRACE: xformers_meff | cuda_attn_L384_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 6.67% 311.798us 48.16% 2.253ms 2.253ms 0.000us 0.00% 3.854ms 3.854ms 1 - xformers_flash3::flash_fwd 3.68% 172.144us 40.98% 1.917ms 638.949us 0.000us 0.00% 3.854ms 1.285ms 3 - flash_attn_3::fwd 1.19% 55.670us 37.30% 1.745ms 581.568us 2.881ms 100.00% 3.854ms 1.285ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.883ms 100.05% 2.883ms 2.883ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.881ms 100.00% 2.881ms 960.465us 3 - Activity Buffer Request 30.77% 1.440ms 30.77% 1.440ms 1.440ms 972.603us 33.75% 972.603us 972.603us 1 - aten::empty 0.63% 29.580us 0.63% 29.580us 4.930us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.12% 5.801us 0.12% 5.801us 1.934us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 4.58% 214.036us 4.58% 214.036us 71.345us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.19% 9.019us 0.51% 24.051us 4.009us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.32% 15.032us 0.32% 15.032us 2.505us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 51.84% 2.425ms 51.84% 2.425ms 2.425ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 6.17% 315.944us 50.13% 2.567ms 2.567ms 0.000us 0.00% 4.004ms 4.004ms 1 + xformers_flash3::flash_fwd 2.87% 146.993us 43.50% 2.228ms 742.605us 0.000us 0.00% 4.004ms 1.335ms 3 + flash_attn_3::fwd 0.96% 49.370us 40.63% 2.081ms 693.607us 2.988ms 100.00% 4.004ms 1.335ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 2.990ms 100.05% 2.990ms 2.990ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 2.988ms 100.00% 2.988ms 996.112us 3 + Activity Buffer Request 35.27% 1.806ms 35.27% 1.806ms 1.806ms 1.016ms 34.00% 1.016ms 1.016ms 1 + aten::empty 0.59% 30.371us 0.59% 30.371us 5.062us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.11% 5.580us 0.11% 5.580us 1.860us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 3.69% 189.213us 3.69% 189.213us 63.071us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.19% 9.850us 0.46% 23.640us 3.940us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.27% 13.790us 0.27% 13.790us 2.298us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 49.87% 2.554ms 49.87% 2.554ms 2.554ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.678ms -Self CUDA time total: 2.881ms +Self CPU time total: 5.122ms +Self CUDA time total: 2.988ms @@ -4254,21 +4036,21 @@ PROFILE TRACE: xformers_meff | cuda_attn_L448_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 5.88% 304.576us 42.22% 2.188ms 2.188ms 0.000us 0.00% 4.552ms 4.552ms 1 - xformers_flash3::flash_fwd 2.84% 147.154us 35.91% 1.861ms 620.213us 0.000us 0.00% 4.552ms 1.517ms 3 - flash_attn_3::fwd 1.02% 52.961us 33.07% 1.713ms 571.161us 3.412ms 100.00% 4.552ms 1.517ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 3.414ms 100.04% 3.414ms 3.414ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.412ms 100.00% 3.412ms 1.137ms 3 - Activity Buffer Request 27.95% 1.448ms 27.95% 1.448ms 1.448ms 1.140ms 33.41% 1.140ms 1.140ms 1 - aten::empty 0.56% 29.272us 0.56% 29.272us 4.879us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.12% 6.180us 0.12% 6.180us 2.060us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 3.41% 176.624us 3.41% 176.624us 58.875us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.17% 9.052us 0.44% 22.882us 3.814us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.27% 13.830us 0.27% 13.830us 2.305us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 57.78% 2.994ms 57.78% 2.994ms 2.994ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 5.38% 306.205us 45.35% 2.581ms 2.581ms 0.000us 0.00% 4.704ms 4.704ms 1 + xformers_flash3::flash_fwd 2.54% 144.312us 39.58% 2.253ms 750.894us 0.000us 0.00% 4.704ms 1.568ms 3 + flash_attn_3::fwd 0.92% 52.341us 37.04% 2.108ms 702.790us 3.526ms 100.00% 4.704ms 1.568ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 3.528ms 100.05% 3.528ms 3.528ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.526ms 100.00% 3.526ms 1.175ms 3 + Activity Buffer Request 32.26% 1.836ms 32.26% 1.836ms 1.836ms 1.177ms 33.39% 1.177ms 1.177ms 1 + aten::empty 0.52% 29.660us 0.52% 29.660us 4.943us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.10% 5.499us 0.10% 5.499us 1.833us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 3.24% 184.684us 3.24% 184.684us 61.561us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.15% 8.640us 0.39% 22.430us 3.738us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.24% 13.790us 0.24% 13.790us 2.298us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 54.65% 3.111ms 54.65% 3.111ms 3.111ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.182ms -Self CUDA time total: 3.412ms +Self CPU time total: 5.692ms +Self CUDA time total: 3.526ms @@ -4278,83 +4060,37 @@ PROFILE TRACE: xformers_meff | cuda_attn_L512_bfloat16 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - xformers_meff 5.58% 285.697us 41.87% 2.143ms 2.143ms 0.000us 0.00% 4.544ms 4.544ms 1 - xformers_flash3::flash_fwd 2.91% 148.714us 35.83% 1.834ms 611.255us 0.000us 0.00% 4.544ms 1.515ms 3 - flash_attn_3::fwd 1.04% 53.311us 32.92% 1.685ms 561.684us 3.402ms 100.00% 4.544ms 1.515ms 3 - xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 3.403ms 100.05% 3.403ms 3.403ms 1 -void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.402ms 100.00% 3.402ms 1.134ms 3 - Activity Buffer Request 27.78% 1.422ms 27.78% 1.422ms 1.422ms 1.142ms 33.57% 1.142ms 1.142ms 1 - aten::empty 0.58% 29.640us 0.58% 29.640us 4.940us 0.000us 0.00% 0.000us 0.000us 6 - cudaFuncSetAttribute 0.12% 5.990us 0.12% 5.990us 1.997us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 3.40% 174.134us 3.40% 174.134us 58.045us 0.000us 0.00% 0.000us 0.000us 3 - aten::reshape 0.17% 8.543us 0.45% 23.191us 3.865us 0.000us 0.00% 0.000us 0.000us 6 - aten::view 0.29% 14.648us 0.29% 14.648us 2.441us 0.000us 0.00% 0.000us 0.000us 6 - cudaDeviceSynchronize 58.13% 2.975ms 58.13% 2.975ms 2.975ms 0.000us 0.00% 0.000us 0.000us 1 + xformers_meff 5.52% 307.264us 44.82% 2.494ms 2.494ms 0.000us 0.00% 4.662ms 4.662ms 1 + xformers_flash3::flash_fwd 2.63% 146.303us 38.91% 2.164ms 721.461us 0.000us 0.00% 4.662ms 1.554ms 3 + flash_attn_3::fwd 0.91% 50.371us 36.28% 2.018ms 672.693us 3.490ms 100.00% 4.662ms 1.554ms 3 + xformers_meff 0.00% 0.000us 0.00% 0.000us 0.000us 3.491ms 100.04% 3.491ms 3.491ms 1 +void cutlass::device_kernel<flash::enable_sm80_to_sm... 0.00% 0.000us 0.00% 0.000us 0.000us 3.490ms 100.00% 3.490ms 1.163ms 3 + Activity Buffer Request 31.37% 1.745ms 31.37% 1.745ms 1.745ms 1.172ms 33.59% 1.172ms 1.172ms 1 + aten::empty 0.54% 29.920us 0.54% 29.920us 4.987us 0.000us 0.00% 0.000us 0.000us 6 + cudaFuncSetAttribute 0.10% 5.750us 0.10% 5.750us 1.917us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 3.36% 187.102us 3.36% 187.102us 62.367us 0.000us 0.00% 0.000us 0.000us 3 + aten::reshape 0.15% 8.539us 0.39% 21.890us 3.648us 0.000us 0.00% 0.000us 0.000us 6 + aten::view 0.24% 13.351us 0.24% 13.351us 2.225us 0.000us 0.00% 0.000us 0.000us 6 + cudaDeviceSynchronize 55.18% 3.069ms 55.18% 3.069ms 3.069ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 5.118ms -Self CUDA time total: 3.402ms +Self CPU time total: 5.563ms +Self CUDA time total: 3.490ms impl wl p50(ms) ok -xformers_meff cuda_attn_L128_bfloat16 1.00 True -xformers_meff cuda_attn_L256_bfloat16 1.03 True -xformers_meff cuda_attn_L320_bfloat16 1.08 True -xformers_meff cuda_attn_L384_bfloat16 1.08 True -xformers_meff cuda_attn_L448_bfloat16 1.25 True -xformers_meff cuda_attn_L512_bfloat16 1.23 True +xformers_meff cuda_attn_L128_bfloat16 0.99 True +xformers_meff cuda_attn_L256_bfloat16 1.05 True +xformers_meff cuda_attn_L320_bfloat16 1.09 True +xformers_meff cuda_attn_L384_bfloat16 1.09 True +xformers_meff cuda_attn_L448_bfloat16 1.27 True +xformers_meff cuda_attn_L512_bfloat16 1.28 True
▶ UV Install Logs
diff --git a/flash_attn/results/artifacts/combine/latency.svg b/flash_attn/results/artifacts/combine/latency.svg index 31d30c5dcfa68f4fc35593a1422ddd982b5374d8..bb2245dfc90473a095da0e7b8d1d0831331609a9 100644 --- a/flash_attn/results/artifacts/combine/latency.svg +++ b/flash_attn/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:520b28a43c879f6952cf0ddeade1438dbb5bd7caf01b6509254a4c68e9446ee6 -size 24783 +oid sha256:d3a72737b2e51b137700eeffb292bcf730686137439c6c61e905fd6d06c1d87d +size 24785 diff --git a/flash_attn/results/combined_results.html b/flash_attn/results/combined_results.html index 0682107b1540718d4e870417450dee78797760de..4f874afb4f36cd28a709c333fc39d5b07e4a8002 100644 --- a/flash_attn/results/combined_results.html +++ b/flash_attn/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:18.946177 + 2025-11-10T22:12:19.411851 image/svg+xml @@ -4217,96 +3999,96 @@ body[data-tool="eraser"] .main-content { - + - + - 1.0 + 1.0 - + - + - 1.2 + 1.2 - + - + - 1.4 + 1.4 - + - + - 1.6 + 1.6 - + - + - 1.8 + 1.8 - + - + - 2.0 + 2.0 - + - + - 2.2 + 2.2 @@ -4314,73 +4096,73 @@ body[data-tool="eraser"] .main-content { - + - - - - - - + + + + + + - + - - - - - + + + + + - + - - - - - - + + + + + + - + - - - - - - + + + + + + - + - - - - - + + + + + @@ -4465,7 +4247,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.31s +Cell: combine | 4.53s | Raw @@ -4548,7 +4330,7 @@ LOADING BENCHMARK DATA ✓ Flash (PyTorch SDPA) : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/4b81c2b991fc4a0f70c4117e933abc4007fd7f3f55394d7778a4074adf29df04 ✓ MemEff (PyTorch SDPA) : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/a23b7ad9cfb9e9968ec4a8f126174dc4a3ab5e6999c65a44570f93656598bd2f ✓ xFormers : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/269846603898e0ee1872d7a8b40fca43ba558b2f3400f8a7bedb1ee79df7da58 -✓ HF Kernels Flash Attn : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/c1c92a22d205ca145ffb0083188c0f8eef512cfd6aa091b1e49d6329fbd08849 +✓ HF Kernels Flash Attn : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/a489d302a4d791c1f52cf174509f175d34c40f30517dcfa7c498bb8e0bf92d22 ✓ HF Kernels Flash Attn3 : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/8d741e4aa09c527ddf0f50ffa03a7e840559990c66178bfb9cf04bd97f3efd20 ✓ SageAttention : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/f6be24aff45575cad8d1df490ac5fe9ec944103fb255665c71719ca2d7efea4e @@ -4559,7 +4341,7 @@ LOADING BENCHMARK DATA ✓ Found xFormers Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/269846603898e0ee1872d7a8b40fca43ba558b2f3400f8a7bedb1ee79df7da58/attention.jsonl ✓ Found HF Kernels Flash Attn - Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/c1c92a22d205ca145ffb0083188c0f8eef512cfd6aa091b1e49d6329fbd08849/attention.jsonl + Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/a489d302a4d791c1f52cf174509f175d34c40f30517dcfa7c498bb8e0bf92d22/attention.jsonl ✓ Found HF Kernels Flash Attn3 Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/8d741e4aa09c527ddf0f50ffa03a7e840559990c66178bfb9cf04bd97f3efd20/attention.jsonl ✓ Found SageAttention @@ -4572,48 +4354,48 @@ Summary: 6 found, 0 skipped, 0 missing COMBINED BENCHMARK SUMMARY impl wl p50(ms) ok -hf_kernels_flash_attn cuda_attn_L128_bfloat16 0.95 True -hf_kernels_flash_attn cuda_attn_L256_bfloat16 1.00 True -hf_kernels_flash_attn cuda_attn_L320_bfloat16 1.05 True -hf_kernels_flash_attn cuda_attn_L384_bfloat16 1.06 True -hf_kernels_flash_attn cuda_attn_L448_bfloat16 1.23 True -hf_kernels_flash_attn cuda_attn_L512_bfloat16 1.23 True -hf_kernels_flash_attn3 cuda_attn_L128_bfloat16 0.92 True -hf_kernels_flash_attn3 cuda_attn_L256_bfloat16 0.96 True -hf_kernels_flash_attn3 cuda_attn_L320_bfloat16 1.01 True -hf_kernels_flash_attn3 cuda_attn_L384_bfloat16 1.03 True -hf_kernels_flash_attn3 cuda_attn_L448_bfloat16 1.20 True -hf_kernels_flash_attn3 cuda_attn_L512_bfloat16 1.18 True +hf_kernels_flash_attn cuda_attn_L128_bfloat16 0.99 True +hf_kernels_flash_attn cuda_attn_L256_bfloat16 1.04 True +hf_kernels_flash_attn cuda_attn_L320_bfloat16 1.07 True +hf_kernels_flash_attn cuda_attn_L384_bfloat16 1.08 True +hf_kernels_flash_attn cuda_attn_L448_bfloat16 1.26 True +hf_kernels_flash_attn cuda_attn_L512_bfloat16 1.25 True +hf_kernels_flash_attn3 cuda_attn_L128_bfloat16 0.94 True +hf_kernels_flash_attn3 cuda_attn_L256_bfloat16 0.98 True +hf_kernels_flash_attn3 cuda_attn_L320_bfloat16 1.05 True +hf_kernels_flash_attn3 cuda_attn_L384_bfloat16 1.04 True +hf_kernels_flash_attn3 cuda_attn_L448_bfloat16 1.22 True +hf_kernels_flash_attn3 cuda_attn_L512_bfloat16 1.21 True sage_int8_fp16 cuda_attn_L128_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' sage_int8_fp16 cuda_attn_L256_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' sage_int8_fp16 cuda_attn_L320_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' sage_int8_fp16 cuda_attn_L384_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' sage_int8_fp16 cuda_attn_L448_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' sage_int8_fp16 cuda_attn_L512_bfloat16 FAIL False - Error: module 'sage_attention_ef0573391bb63704' has no attribute 'fwd' -torch_flash_ma cuda_attn_L128_bfloat16 1.22 True + Error: module 'sage_attention_d37081df98a5208e' has no attribute 'fwd' +torch_flash_ma cuda_attn_L128_bfloat16 1.23 True torch_flash_ma cuda_attn_L256_bfloat16 1.28 True torch_flash_ma cuda_attn_L320_bfloat16 1.30 True torch_flash_ma cuda_attn_L384_bfloat16 1.33 True -torch_flash_ma cuda_attn_L448_bfloat16 1.50 True -torch_flash_ma cuda_attn_L512_bfloat16 1.51 True -torch_mem_eff cuda_attn_L128_bfloat16 1.85 True -torch_mem_eff cuda_attn_L256_bfloat16 1.95 True -torch_mem_eff cuda_attn_L320_bfloat16 1.99 True -torch_mem_eff cuda_attn_L384_bfloat16 2.07 True -torch_mem_eff cuda_attn_L448_bfloat16 2.06 True -torch_mem_eff cuda_attn_L512_bfloat16 2.25 True -xformers_meff cuda_attn_L128_bfloat16 1.00 True -xformers_meff cuda_attn_L256_bfloat16 1.03 True -xformers_meff cuda_attn_L320_bfloat16 1.08 True -xformers_meff cuda_attn_L384_bfloat16 1.08 True -xformers_meff cuda_attn_L448_bfloat16 1.25 True -xformers_meff cuda_attn_L512_bfloat16 1.23 True +torch_flash_ma cuda_attn_L448_bfloat16 1.48 True +torch_flash_ma cuda_attn_L512_bfloat16 1.52 True +torch_mem_eff cuda_attn_L128_bfloat16 1.83 True +torch_mem_eff cuda_attn_L256_bfloat16 1.94 True +torch_mem_eff cuda_attn_L320_bfloat16 1.96 True +torch_mem_eff cuda_attn_L384_bfloat16 2.03 True +torch_mem_eff cuda_attn_L448_bfloat16 2.02 True +torch_mem_eff cuda_attn_L512_bfloat16 2.23 True +xformers_meff cuda_attn_L128_bfloat16 0.99 True +xformers_meff cuda_attn_L256_bfloat16 1.05 True +xformers_meff cuda_attn_L320_bfloat16 1.09 True +xformers_meff cuda_attn_L384_bfloat16 1.09 True +xformers_meff cuda_attn_L448_bfloat16 1.27 True +xformers_meff cuda_attn_L512_bfloat16 1.28 True GENERATING COMBINED VISUALIZATION @@ -4637,7 +4419,7 @@ Implementations included:
▶ UV Install Logs
@@ -4650,7 +4432,7 @@ Installed 37 packages in 225ms - 2025-10-31T20:14:18.946177 + 2025-11-10T22:12:19.411851 image/svg+xml @@ -4760,96 +4542,96 @@ Installed 37 packages in 225ms - + - + - 1.0 + 1.0 - + - + - 1.2 + 1.2 - + - + - 1.4 + 1.4 - + - + - 1.6 + 1.6 - + - + - 1.8 + 1.8 - + - + - 2.0 + 2.0 - + - + - 2.2 + 2.2 @@ -4857,73 +4639,73 @@ Installed 37 packages in 225ms - + - - - - - - + + + + + + - + - - - - - + + + + + - + - - - - - - + + + + + + - + - - - - - - + + + + + + - + - - - - - + + + + + diff --git a/index.html b/index.html index 11cdf1eef85f4dda68d9e978af612e8aae0078bb..c420d25f6116cc545e85d40f5b94a4fe08fba4b7 100644 --- a/index.html +++ b/index.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4124,10 +3906,13 @@ Each section includes:

RUN YOURSELF

To run the benchmarks locally, clone the repository and use uvx to build and run the benchmarks:

Note benches are made to run on a machine with a compatible NVIDIA GPU and CUDA installed, other hardware may not not work as expected.

-
git clone https://github.com/huggingface/kernels-benchmarks.git
-cd kernels-benchmarks
-uvx https://github.com/drbh/uvnote.git build benches
-
+
+
+git clone https://github.com/huggingface/kernels-benchmarks.git
+cd kernels-benchmarks
+uvx https://github.com/drbh/uvnote.git build benches
+
+

METHODOLOGY

Each benchmark is run with the @@ -4176,7 +3961,7 @@ uvx https://github.com/drbh/uvnote.git

diff --git a/layer_norm/impls/artifacts/benchmark/layer_norm.jsonl b/layer_norm/impls/artifacts/benchmark/layer_norm.jsonl index 1c38cebcfad6cb20f64b80a0e6db2e230635d410..95c6a0c25f212838f051a570aa081cdc8ded7e36 100644 --- a/layer_norm/impls/artifacts/benchmark/layer_norm.jsonl +++ b/layer_norm/impls/artifacts/benchmark/layer_norm.jsonl @@ -1,4 +1,4 @@ -{"ts": "2025-10-31T20:00:11Z", "run": "f2cf664f6646484f88815be637f5bc9d", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D4096", "batch": 16, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.807951000012963, "p50": 0.8174310000299556, "p90": 0.8198709999760467, "mean": 0.8162470000002031, "iqr": 0.0038399999766625115, "raw_times": [0.8160309999993842, 0.8198709999760467, 0.8174310000299556, 0.807951000012963, 0.819950999982666], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8318710000025931, "peak_bytes": 2415935488, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} -{"ts": "2025-10-31T20:00:11Z", "run": "f2cf664f6646484f88815be637f5bc9d", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D8192", "batch": 16, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6729929999996784, "p50": 1.6790130000003956, "p90": 1.685203000022284, "mean": 1.6802827999867986, "iqr": 0.007120000077520672, "raw_times": [1.685203000022284, 1.6790130000003956, 1.6729929999996784, 1.686121999966872, 1.6780829999447633], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6821429999822612, "peak_bytes": 4831870976, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1086463928222656e-05, "ref": "layer_norm_ref"}, "err": null} -{"ts": "2025-10-31T20:00:12Z", "run": "f2cf664f6646484f88815be637f5bc9d", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D4096", "batch": 16, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6009309999844845, "p50": 1.6056009999942944, "p90": 1.611341000000266, "mean": 1.606853000009778, "iqr": 0.008409999963987502, "raw_times": [1.6009309999844845, 1.6056009999942944, 1.613461000033567, 1.6029310000362784, 1.611341000000266], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6386120000220217, "peak_bytes": 4831854592, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} -{"ts": "2025-10-31T20:00:12Z", "run": "f2cf664f6646484f88815be637f5bc9d", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D8192", "batch": 16, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.3123249999675863, "p50": 3.327974000001177, "p90": 3.3289149999973233, "mean": 3.3240905999946335, "iqr": 0.010180999993281148, "raw_times": [3.3325050000030387, 3.3289149999973233, 3.3123249999675863, 3.318734000004042, 3.327974000001177], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.335275000040383, "peak_bytes": 9663709184, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1026859283447266e-05, "ref": "layer_norm_ref"}, "err": null} +{"ts": "2025-11-10T22:11:32Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D4096", "batch": 16, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.8238129998971999, "p50": 0.8308330000090791, "p90": 0.8364840000467666, "mean": 0.8310172000165039, "iqr": 0.012130999948567478, "raw_times": [0.8243530000981991, 0.8396030000312749, 0.8364840000467666, 0.8238129998971999, 0.8308330000090791], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8376129999305704, "peak_bytes": 2415935488, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} +{"ts": "2025-11-10T22:11:32Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S2048_D8192", "batch": 16, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.6412459999628481, "p50": 1.64963599991097, "p90": 1.6500760000326409, "mean": 1.6485119999288145, "iqr": 0.005011000212107319, "raw_times": [1.6500760000326409, 1.6450649998205336, 1.6412459999628481, 1.6565369999170798, 1.64963599991097], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6503259998899011, "peak_bytes": 4831870976, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1086463928222656e-05, "ref": "layer_norm_ref"}, "err": null} +{"ts": "2025-11-10T22:11:33Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D4096", "batch": 16, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1.635675999978048, "p50": 1.649695999958567, "p90": 1.6533860000436107, "mean": 1.6475760000048467, "iqr": 0.008750000006330083, "raw_times": [1.649695999958567, 1.635675999978048, 1.6533860000436107, 1.6544860000067274, 1.6446360000372806], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1.6516960001808911, "peak_bytes": 4831854592, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_ref"}, "err": null} +{"ts": "2025-11-10T22:11:33Z", "run": "08926e8525be4ec6b9adc7957d91ab7e", "impl": "hf_kernels_layer_norm", "tags": {"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, "wl": {"name": "LN_B16_S4096_D8192", "batch": 16, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 3.2460209999953804, "p50": 3.252669999938007, "p90": 3.25303099998564, "mean": 3.2530550000046787, "iqr": 0.002919999815276242, "raw_times": [3.2634419999340025, 3.25303099998564, 3.252669999938007, 3.2501110001703637, 3.2460209999953804], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 3.265330999965954, "peak_bytes": 9663709184, "ok": true, "absmax": 0.03125, "corr": {"ok": true, "rtol": 0.001, "atol": 0.03125, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1026859283447266e-05, "ref": "layer_norm_ref"}, "err": null} diff --git a/layer_norm/impls/cells/benchmark.py b/layer_norm/impls/cells/benchmark.py index 6a00a9f99d8d044ab5f9dc0f5019344cef0612b9..d871d1b25fedf8b294c567e9ac582decb62f3cde 100644 --- a/layer_norm/impls/cells/benchmark.py +++ b/layer_norm/impls/cells/benchmark.py @@ -3,6 +3,7 @@ # dependencies = [ # "numpy", # "torch==2.8.0", +# "kernels", # "kernels-benchmark-tools", # ] # @@ -12,15 +13,37 @@ import torch import sys from kernels_benchmark_tools import KernelTypeEnum, run_benchmark +from kernels import get_kernel +# Load the layer norm kernel +layer_norm_kernel = get_kernel("kernels-community/layer-norm") -def torch_layer_norm(x, weight, bias, eps: float = 1e-5): - return torch.nn.functional.layer_norm(x, (x.shape[-1],), weight, bias, eps) + +def hf_kernels_layer_norm(x, weight, bias, eps: float = 1e-5): + B, S, D = x.shape + # The kernel expects [N, D] input; support beta (bias) if provided. + out = layer_norm_kernel.dropout_add_ln_fwd( + input=x.view(-1, D), + gamma=weight, + beta=bias, + rowscale=None, + colscale=None, + x0_subset=None, + z_subset=None, + dropout_p=0.0, + epsilon=eps, + rowscale_const=1.0, + z_numrows=S, + gen=None, + residual_in_fp32=False, + is_rms_norm=False, + )[0].view(B, S, D) + return out run_benchmark( kernel_type=KernelTypeEnum.LAYER_NORM, - impl_name="torch_layer_norm", - impl_tags={"family": "torch", "op": "layer_norm"}, - impl_func=torch_layer_norm, + impl_name="hf_kernels_layer_norm", + impl_tags={"family": "hf-kernels", "repo": "kernels-community/layer-norm", "op": "layer_norm"}, + impl_func=hf_kernels_layer_norm, ) \ No newline at end of file diff --git a/layer_norm/impls/hf_kernels_layer_norm.html b/layer_norm/impls/hf_kernels_layer_norm.html index 1f158344e6570f7297be0322ab6a513d52dde712..030ed3457f77e56e79537ca9f55afa5f0e82335a 100644 --- a/layer_norm/impls/hf_kernels_layer_norm.html +++ b/layer_norm/impls/hf_kernels_layer_norm.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {

Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: benchmark | 10.09s +Cell: benchmark | 6.65s | Raw @@ -4179,19 +3961,19 @@ PROFILE TRACE: hf_kernels_layer_norm | LN_B16_S2048_D4096 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_layer_norm 5.01% 203.177us 46.78% 1.895ms 1.895ms 0.000us 0.00% 3.141ms 3.141ms 1 - _layer_norm_f8ec252::dropout_add_ln_fwd 1.71% 69.312us 41.16% 1.668ms 555.914us 2.399ms 100.00% 3.141ms 1.047ms 3 - hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 2.401ms 100.06% 2.401ms 2.401ms 1 -void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 2.399ms 100.00% 2.399ms 799.825us 3 - Activity Buffer Request 36.95% 1.497ms 36.95% 1.497ms 1.497ms 742.012us 30.92% 742.012us 742.012us 1 - aten::view 0.61% 24.559us 0.61% 24.559us 4.093us 0.000us 0.00% 0.000us 0.000us 6 - aten::empty 1.20% 48.622us 1.20% 48.622us 5.402us 0.000us 0.00% 0.000us 0.000us 9 -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.23% 9.170us 0.23% 9.170us 3.057us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 1.07% 43.390us 1.07% 43.390us 14.463us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 53.22% 2.156ms 53.22% 2.156ms 2.156ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_layer_norm 4.99% 214.535us 50.40% 2.165ms 2.165ms 0.000us 0.00% 3.089ms 3.089ms 1 + _layer_norm_f8ec252::dropout_add_ln_fwd 1.39% 59.840us 44.89% 1.928ms 642.793us 2.355ms 100.00% 3.089ms 1.030ms 3 + hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 2.357ms 100.06% 2.357ms 2.357ms 1 +void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 2.355ms 100.00% 2.355ms 785.131us 3 + Activity Buffer Request 41.22% 1.771ms 41.22% 1.771ms 1.771ms 733.313us 31.13% 733.313us 733.313us 1 + aten::view 0.51% 21.919us 0.51% 21.919us 3.653us 0.000us 0.00% 0.000us 0.000us 6 + aten::empty 1.06% 45.591us 1.06% 45.591us 5.066us 0.000us 0.00% 0.000us 0.000us 9 +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.22% 9.340us 0.22% 9.340us 3.113us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 1.00% 42.910us 1.00% 42.910us 14.303us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 49.60% 2.131ms 49.60% 2.131ms 2.131ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 4.052ms -Self CUDA time total: 2.399ms +Self CPU time total: 4.295ms +Self CUDA time total: 2.355ms @@ -4201,19 +3983,19 @@ PROFILE TRACE: hf_kernels_layer_norm | LN_B16_S2048_D8192 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_layer_norm 1.88% 119.443us 26.75% 1.701ms 1.701ms 0.000us 0.00% 6.407ms 6.407ms 1 - _layer_norm_f8ec252::dropout_add_ln_fwd 0.71% 45.121us 24.67% 1.568ms 522.677us 4.827ms 100.00% 6.407ms 2.136ms 3 - hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 4.829ms 100.03% 4.829ms 4.829ms 1 -void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 4.827ms 100.00% 4.827ms 1.609ms 3 - Activity Buffer Request 22.91% 1.456ms 22.91% 1.456ms 1.456ms 1.580ms 32.72% 1.580ms 1.580ms 1 - aten::view 0.21% 13.200us 0.21% 13.200us 2.200us 0.000us 0.00% 0.000us 0.000us 6 - aten::empty 0.51% 32.711us 0.51% 32.711us 3.635us 0.000us 0.00% 0.000us 0.000us 9 -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.08% 5.289us 0.08% 5.289us 1.763us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.45% 28.522us 0.45% 28.522us 9.507us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 73.25% 4.656ms 73.25% 4.656ms 4.656ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_layer_norm 2.21% 146.665us 30.12% 2.003ms 2.003ms 0.000us 0.00% 6.394ms 6.394ms 1 + _layer_norm_f8ec252::dropout_add_ln_fwd 0.64% 42.811us 27.74% 1.845ms 614.956us 4.819ms 100.00% 6.394ms 2.131ms 3 + hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 4.820ms 100.03% 4.820ms 4.820ms 1 +void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 4.819ms 100.00% 4.819ms 1.606ms 3 + Activity Buffer Request 26.14% 1.739ms 26.14% 1.739ms 1.739ms 1.575ms 32.69% 1.575ms 1.575ms 1 + aten::view 0.18% 11.889us 0.18% 11.889us 1.981us 0.000us 0.00% 0.000us 0.000us 6 + aten::empty 0.44% 29.319us 0.44% 29.319us 3.258us 0.000us 0.00% 0.000us 0.000us 9 +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.07% 4.690us 0.07% 4.690us 1.563us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.44% 29.150us 0.44% 29.150us 9.717us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 69.88% 4.648ms 69.88% 4.648ms 4.648ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 6.357ms -Self CUDA time total: 4.827ms +Self CPU time total: 6.651ms +Self CUDA time total: 4.819ms @@ -4223,19 +4005,19 @@ PROFILE TRACE: hf_kernels_layer_norm | LN_B16_S4096_D4096 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_layer_norm 1.89% 118.801us 26.85% 1.686ms 1.686ms 0.000us 0.00% 6.309ms 6.309ms 1 - _layer_norm_f8ec252::dropout_add_ln_fwd 0.78% 49.183us 24.77% 1.555ms 518.493us 4.763ms 100.00% 6.309ms 2.103ms 3 - hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 4.765ms 100.03% 4.765ms 4.765ms 1 -void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 4.763ms 100.00% 4.763ms 1.588ms 3 - Activity Buffer Request 22.96% 1.442ms 22.96% 1.442ms 1.442ms 1.546ms 32.46% 1.546ms 1.546ms 1 - aten::view 0.19% 11.741us 0.19% 11.741us 1.957us 0.000us 0.00% 0.000us 0.000us 6 - aten::empty 0.49% 30.460us 0.49% 30.460us 3.384us 0.000us 0.00% 0.000us 0.000us 9 -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.08% 4.920us 0.08% 4.920us 1.640us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 0.46% 29.050us 0.46% 29.050us 9.683us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 73.15% 4.593ms 73.15% 4.593ms 4.593ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_layer_norm 2.00% 133.492us 30.10% 2.007ms 2.007ms 0.000us 0.00% 6.406ms 6.406ms 1 + _layer_norm_f8ec252::dropout_add_ln_fwd 0.67% 44.942us 27.93% 1.863ms 620.970us 4.818ms 100.00% 6.406ms 2.135ms 3 + hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 4.819ms 100.03% 4.819ms 4.819ms 1 +void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 4.818ms 100.00% 4.818ms 1.606ms 3 + Activity Buffer Request 26.34% 1.756ms 26.34% 1.756ms 1.756ms 1.588ms 32.97% 1.588ms 1.588ms 1 + aten::view 0.16% 10.780us 0.16% 10.780us 1.797us 0.000us 0.00% 0.000us 0.000us 6 + aten::empty 0.44% 29.582us 0.44% 29.582us 3.287us 0.000us 0.00% 0.000us 0.000us 9 +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.07% 4.759us 0.07% 4.759us 1.586us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 0.41% 27.190us 0.41% 27.190us 9.063us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 69.90% 4.662ms 69.90% 4.662ms 4.662ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 6.279ms -Self CUDA time total: 4.763ms +Self CPU time total: 6.669ms +Self CUDA time total: 4.818ms @@ -4245,38 +4027,36 @@ PROFILE TRACE: hf_kernels_layer_norm | LN_B16_S4096_D8192 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ Name Self CPU % Self CPU CPU total % CPU total CPU time avg Self CUDA Self CUDA % CUDA total CUDA time avg # of Calls ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ - hf_kernels_layer_norm 1.11% 112.814us 7.31% 743.908us 743.908us 0.000us 0.00% 12.737ms 12.737ms 1 - _layer_norm_f8ec252::dropout_add_ln_fwd 0.47% 47.722us 6.09% 619.105us 206.368us 9.594ms 100.00% 12.737ms 4.246ms 3 - hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 9.595ms 100.02% 9.595ms 9.595ms 1 -void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 9.594ms 100.00% 9.594ms 3.198ms 3 - Activity Buffer Request 2.50% 254.176us 2.50% 254.176us 254.176us 3.143ms 32.76% 3.143ms 3.143ms 1 - aten::view 0.12% 11.989us 0.12% 11.989us 1.998us 0.000us 0.00% 0.000us 0.000us 6 - aten::empty 0.30% 30.280us 0.30% 30.280us 3.364us 0.000us 0.00% 0.000us 0.000us 9 -cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.05% 5.000us 0.05% 5.000us 1.667us 0.000us 0.00% 0.000us 0.000us 3 - cudaLaunchKernel 2.77% 281.927us 2.77% 281.927us 93.976us 0.000us 0.00% 0.000us 0.000us 3 - cudaDeviceSynchronize 92.69% 9.430ms 92.69% 9.430ms 9.430ms 0.000us 0.00% 0.000us 0.000us 1 + hf_kernels_layer_norm 1.10% 128.730us 19.25% 2.252ms 2.252ms 0.000us 0.00% 12.776ms 12.776ms 1 + _layer_norm_f8ec252::dropout_add_ln_fwd 0.38% 44.142us 18.05% 2.112ms 704.015us 9.608ms 100.00% 12.776ms 4.259ms 3 + hf_kernels_layer_norm 0.00% 0.000us 0.00% 0.000us 0.000us 9.609ms 100.01% 9.609ms 9.609ms 1 +void layer_norm::ln_fwd_kernel<layer_norm::Kernel_tr... 0.00% 0.000us 0.00% 0.000us 0.000us 9.608ms 100.00% 9.608ms 3.203ms 3 + Activity Buffer Request 15.07% 1.763ms 15.07% 1.763ms 1.763ms 3.168ms 32.98% 3.168ms 3.168ms 1 + aten::view 0.10% 11.611us 0.10% 11.611us 1.935us 0.000us 0.00% 0.000us 0.000us 6 + aten::empty 0.25% 29.429us 0.25% 29.429us 3.270us 0.000us 0.00% 0.000us 0.000us 9 +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFla... 0.04% 4.891us 0.04% 4.891us 1.630us 0.000us 0.00% 0.000us 0.000us 3 + cudaLaunchKernel 2.31% 270.775us 2.31% 270.775us 90.258us 0.000us 0.00% 0.000us 0.000us 3 + cudaDeviceSynchronize 80.75% 9.448ms 80.75% 9.448ms 9.448ms 0.000us 0.00% 0.000us 0.000us 1 ------------------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ ------------ -Self CPU time total: 10.174ms -Self CUDA time total: 9.594ms +Self CPU time total: 11.700ms +Self CUDA time total: 9.608ms impl wl p50(ms) ok -hf_kernels_layer_norm LN_B16_S2048_D4096 0.84 True -hf_kernels_layer_norm LN_B16_S2048_D8192 1.66 True -hf_kernels_layer_norm LN_B16_S4096_D4096 1.66 True -hf_kernels_layer_norm LN_B16_S4096_D8192 3.27 True +hf_kernels_layer_norm LN_B16_S2048_D4096 0.83 True +hf_kernels_layer_norm LN_B16_S2048_D8192 1.65 True +hf_kernels_layer_norm LN_B16_S4096_D4096 1.65 True +hf_kernels_layer_norm LN_B16_S4096_D8192 3.25 True
▶ UV Install Logs
Fetching 4 files: 0%| | 0/4 [00:00<?, ?it/s] -Fetching 4 files: 50%|█████ | 2/4 [00:01<00:01, 1.22it/s] -Fetching 4 files: 100%|██████████| 4/4 [00:01<00:00, 2.45it/s]
+Fetching 4 files: 50%|█████ | 2/4 [00:01<00:01, 1.12it/s] +Fetching 4 files: 100%|██████████| 4/4 [00:01<00:00, 2.24it/s]

Artifacts:

layer_norm.jsonl diff --git a/layer_norm/impls/torch_layer_norm.html b/layer_norm/impls/torch_layer_norm.html index 1205e964abde5a00c2c82d107fdba63f6b4fce51..cf92ad8b84409d27cb57a090b8380b6b2c5c27b0 100644 --- a/layer_norm/impls/torch_layer_norm.html +++ b/layer_norm/impls/torch_layer_norm.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.23s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.23s
-
Fri Oct 31 20:00:08 2025       
+
Mon Nov 10 22:11:21 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   32C    P0             85W /  350W |       0MiB /  46068MiB |     22%      Default |
+| N/A   36C    P0            121W /  350W |       0MiB /  46068MiB |     27%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4153,9 +3935,9 @@ Cell: nv | 0.23s
 
 ▼ code 
 ▼ output
- ▶ uv-logs
+ ▶ uv-logs
  | 
-Cell: benchmark | 3.89s
+Cell: benchmark | 7.73s
  | 
 
 Raw
@@ -4203,19 +3985,19 @@ PROFILE TRACE: torch_layer_norm | LN_B16_S2048_D4096
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                       torch_layer_norm         3.88%     150.743us        46.08%       1.790ms       1.790ms       0.000us         0.00%       3.031ms       3.031ms             1  
-                                       aten::layer_norm         0.46%      17.882us        42.20%       1.639ms     546.344us       0.000us         0.00%       3.031ms       1.010ms             3  
-                                aten::native_layer_norm         2.05%      79.451us        41.74%       1.621ms     540.384us       2.322ms       100.00%       3.031ms       1.010ms             3  
-                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       2.323ms       100.06%       2.323ms       2.323ms             1  
-void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       2.322ms       100.00%       2.322ms     773.873us             3  
-                                Activity Buffer Request        37.13%       1.442ms        37.13%       1.442ms       1.442ms     709.660us        30.57%     709.660us     709.660us             1  
-                                            aten::empty         1.23%      47.623us         1.23%      47.623us       5.291us       0.000us         0.00%       0.000us       0.000us             9  
-                                       cudaLaunchKernel         1.17%      45.281us         1.17%      45.281us      15.094us       0.000us         0.00%       0.000us       0.000us             3  
-                                             aten::view         0.17%       6.710us         0.17%       6.710us       1.118us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        53.92%       2.094ms        53.92%       2.094ms       2.094ms       0.000us         0.00%       0.000us       0.000us             1  
+                                       torch_layer_norm         3.69%     156.741us        50.70%       2.155ms       2.155ms       0.000us         0.00%       3.028ms       3.028ms             1  
+                                       aten::layer_norm         0.35%      14.940us        47.01%       1.998ms     666.050us       0.000us         0.00%       3.028ms       1.009ms             3  
+                                aten::native_layer_norm         1.75%      74.522us        46.66%       1.983ms     661.070us       2.321ms       100.00%       3.028ms       1.009ms             3  
+                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       2.322ms       100.06%       2.322ms       2.322ms             1  
+void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       2.321ms       100.00%       2.321ms     773.663us             3  
+                                Activity Buffer Request        42.51%       1.807ms        42.51%       1.807ms       1.807ms     707.360us        30.48%     707.360us     707.360us             1  
+                                            aten::empty         1.11%      47.041us         1.11%      47.041us       5.227us       0.000us         0.00%       0.000us       0.000us             9  
+                                       cudaLaunchKernel         1.12%      47.761us         1.12%      47.761us      15.920us       0.000us         0.00%       0.000us       0.000us             3  
+                                             aten::view         0.17%       7.200us         0.17%       7.200us       1.200us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        49.30%       2.095ms        49.30%       2.095ms       2.095ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 3.884ms
-Self CUDA time total: 2.322ms
+Self CPU time total: 4.250ms
+Self CUDA time total: 2.321ms
 
 
 
@@ -4225,19 +4007,19 @@ PROFILE TRACE: torch_layer_norm | LN_B16_S2048_D8192
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                       torch_layer_norm         1.99%     129.362us        27.22%       1.769ms       1.769ms       0.000us         0.00%       6.490ms       6.490ms             1  
-                                       aten::layer_norm         0.17%      10.831us        25.23%       1.640ms     546.698us       0.000us         0.00%       6.490ms       2.163ms             3  
-                                aten::native_layer_norm         0.91%      59.414us        25.06%       1.629ms     543.087us       4.900ms       100.00%       6.490ms       2.163ms             3  
-                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       4.901ms       100.03%       4.901ms       4.901ms             1  
-void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       4.900ms       100.00%       4.900ms       1.633ms             3  
-                                Activity Buffer Request        23.14%       1.504ms        23.14%       1.504ms       1.504ms       1.590ms        32.46%       1.590ms       1.590ms             1  
-                                            aten::empty         0.46%      29.779us         0.46%      29.779us       3.309us       0.000us         0.00%       0.000us       0.000us             9  
-                                       cudaLaunchKernel         0.49%      31.860us         0.49%      31.860us      10.620us       0.000us         0.00%       0.000us       0.000us             3  
-                                             aten::view         0.06%       3.750us         0.06%       3.750us       0.625us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        72.78%       4.732ms        72.78%       4.732ms       4.732ms       0.000us         0.00%       0.000us       0.000us             1  
+                                       torch_layer_norm         1.08%      72.370us        29.56%       1.986ms       1.986ms       0.000us         0.00%       6.439ms       6.439ms             1  
+                                       aten::layer_norm         0.14%       9.121us        28.49%       1.914ms     637.916us       0.000us         0.00%       6.439ms       2.146ms             3  
+                                aten::native_layer_norm         0.74%      49.777us        28.35%       1.905ms     634.876us       4.867ms       100.00%       6.439ms       2.146ms             3  
+                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       4.868ms       100.03%       4.868ms       4.868ms             1  
+void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       4.867ms       100.00%       4.867ms       1.622ms             3  
+                                Activity Buffer Request        26.73%       1.796ms        26.73%       1.796ms       1.796ms       1.572ms        32.30%       1.572ms       1.572ms             1  
+                                            aten::empty         0.42%      28.501us         0.42%      28.501us       3.167us       0.000us         0.00%       0.000us       0.000us             9  
+                                       cudaLaunchKernel         0.40%      26.970us         0.40%      26.970us       8.990us       0.000us         0.00%       0.000us       0.000us             3  
+                                             aten::view         0.06%       3.863us         0.06%       3.863us       0.644us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        70.44%       4.732ms        70.44%       4.732ms       4.732ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 6.501ms
-Self CUDA time total: 4.900ms
+Self CPU time total: 6.718ms
+Self CUDA time total: 4.867ms
 
 
 
@@ -4247,19 +4029,19 @@ PROFILE TRACE: torch_layer_norm | LN_B16_S4096_D4096
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                       torch_layer_norm         1.73%     108.072us        26.73%       1.674ms       1.674ms       0.000us         0.00%       6.258ms       6.258ms             1  
-                                       aten::layer_norm         0.14%       8.910us        25.01%       1.566ms     522.010us       0.000us         0.00%       6.258ms       2.086ms             3  
-                                aten::native_layer_norm         0.87%      54.314us        24.86%       1.557ms     519.040us       4.736ms       100.00%       6.258ms       2.086ms             3  
-                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       4.737ms       100.03%       4.737ms       4.737ms             1  
-void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       4.736ms       100.00%       4.736ms       1.579ms             3  
-                                Activity Buffer Request        23.05%       1.444ms        23.05%       1.444ms       1.444ms       1.522ms        32.13%       1.522ms       1.522ms             1  
-                                            aten::empty         0.46%      28.531us         0.46%      28.531us       3.170us       0.000us         0.00%       0.000us       0.000us             9  
-                                       cudaLaunchKernel         0.43%      26.620us         0.43%      26.620us       8.873us       0.000us         0.00%       0.000us       0.000us             3  
-                                             aten::view         0.06%       4.039us         0.06%       4.039us       0.673us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        73.27%       4.589ms        73.27%       4.589ms       4.589ms       0.000us         0.00%       0.000us       0.000us             1  
+                                       torch_layer_norm         1.07%      70.921us        30.56%       2.021ms       2.021ms       0.000us         0.00%       6.238ms       6.238ms             1  
+                                       aten::layer_norm         0.13%       8.430us        29.49%       1.951ms     650.186us       0.000us         0.00%       6.238ms       2.079ms             3  
+                                aten::native_layer_norm         0.76%      50.331us        29.36%       1.942ms     647.376us       4.725ms       100.00%       6.238ms       2.079ms             3  
+                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       4.726ms       100.03%       4.726ms       4.726ms             1  
+void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       4.725ms       100.00%       4.725ms       1.575ms             3  
+                                Activity Buffer Request        27.69%       1.832ms        27.69%       1.832ms       1.832ms       1.513ms        32.02%       1.513ms       1.513ms             1  
+                                            aten::empty         0.42%      27.940us         0.42%      27.940us       3.104us       0.000us         0.00%       0.000us       0.000us             9  
+                                       cudaLaunchKernel         0.42%      27.891us         0.42%      27.891us       9.297us       0.000us         0.00%       0.000us       0.000us             3  
+                                             aten::view         0.06%       4.260us         0.06%       4.260us       0.710us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        69.44%       4.592ms        69.44%       4.592ms       4.592ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 6.263ms
-Self CUDA time total: 4.736ms
+Self CPU time total: 6.614ms
+Self CUDA time total: 4.725ms
 
 
 
@@ -4269,27 +4051,33 @@ PROFILE TRACE: torch_layer_norm | LN_B16_S4096_D8192
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                       torch_layer_norm         0.85%     101.562us        19.08%       2.285ms       2.285ms       0.000us         0.00%      13.093ms      13.093ms             1  
-                                       aten::layer_norm         0.08%       9.511us        18.23%       2.184ms     727.942us       0.000us         0.00%      13.093ms       4.364ms             3  
-                                aten::native_layer_norm         0.48%      57.051us        18.15%       2.174ms     724.772us       9.846ms       100.00%      13.093ms       4.364ms             3  
-                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       9.848ms       100.01%       9.848ms       9.848ms             1  
-void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       9.846ms       100.00%       9.846ms       3.282ms             3  
-                                Activity Buffer Request        11.95%       1.431ms        11.95%       1.431ms       1.431ms       3.247ms        32.97%       3.247ms       3.247ms             1  
-                                            aten::empty         0.24%      29.142us         0.24%      29.142us       3.238us       0.000us         0.00%       0.000us       0.000us             9  
-                                       cudaLaunchKernel         5.45%     653.217us         5.45%     653.217us     217.739us       0.000us         0.00%       0.000us       0.000us             3  
-                                             aten::view         0.03%       3.890us         0.03%       3.890us       0.648us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        80.92%       9.693ms        80.92%       9.693ms       9.693ms       0.000us         0.00%       0.000us       0.000us             1  
+                                       torch_layer_norm         0.62%      70.560us        14.96%       1.705ms       1.705ms       0.000us         0.00%      13.056ms      13.056ms             1  
+                                       aten::layer_norm         0.08%       8.830us        14.34%       1.634ms     544.695us       0.000us         0.00%      13.056ms       4.352ms             3  
+                                aten::native_layer_norm         0.44%      49.828us        14.26%       1.625ms     541.752us       9.820ms       100.00%      13.056ms       4.352ms             3  
+                                       torch_layer_norm         0.00%       0.000us         0.00%       0.000us       0.000us       9.821ms       100.01%       9.821ms       9.821ms             1  
+void at::native::(anonymous namespace)::vectorized_l...         0.00%       0.000us         0.00%       0.000us       0.000us       9.820ms       100.00%       9.820ms       3.273ms             3  
+                                Activity Buffer Request        11.47%       1.307ms        11.47%       1.307ms       1.307ms       3.236ms        32.96%       3.236ms       3.236ms             1  
+                                            aten::empty         0.24%      27.683us         0.24%      27.683us       3.076us       0.000us         0.00%       0.000us       0.000us             9  
+                                       cudaLaunchKernel         2.07%     236.314us         2.07%     236.314us      78.771us       0.000us         0.00%       0.000us       0.000us             3  
+                                             aten::view         0.03%       3.970us         0.03%       3.970us       0.662us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        85.04%       9.690ms        85.04%       9.690ms       9.690ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 11.979ms
-Self CUDA time total: 9.846ms
+Self CPU time total: 11.395ms
+Self CUDA time total: 9.820ms
 
 
 impl                     wl                  p50(ms)  ok
-torch_layer_norm         LN_B16_S2048_D4096     0.82  True
+torch_layer_norm         LN_B16_S2048_D4096     0.83  True
 torch_layer_norm         LN_B16_S2048_D8192     1.68  True
 torch_layer_norm         LN_B16_S4096_D4096     1.61  True
 torch_layer_norm         LN_B16_S4096_D8192     3.33  True
 
+
+
▶ UV Install Logs
+ +

Artifacts:

layer_norm.jsonl diff --git a/layer_norm/results/artifacts/combine/latency.svg b/layer_norm/results/artifacts/combine/latency.svg index 3ed8cd82e2a1a2caaeef1654e62fb6cc1922ef61..782090adb248f0ecb282571436ebdf1442b5e0d7 100644 --- a/layer_norm/results/artifacts/combine/latency.svg +++ b/layer_norm/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be29ece5a8e85e2941ac21710ec16efd87996aaf0e9b42756a2189660af81a2c -size 14642 +oid sha256:89b9bc3a43e718cff3bcbca28b57be1049bf5906666d437e6fb05e36ce003086 +size 14641 diff --git a/layer_norm/results/combined_results.html b/layer_norm/results/combined_results.html index 37977b7fe79b0f6b62ae7797328594688de81469..cecee637d98ffea1e0041327e3b057a3c3a4ff36 100644 --- a/layer_norm/results/combined_results.html +++ b/layer_norm/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:13:56.885734 + 2025-11-10T22:12:10.245468 image/svg+xml @@ -4191,70 +3973,70 @@ body[data-tool="eraser"] .main-content { - + - + - 1.0 + 1.0 - + - + - 1.5 + 1.5 - + - + - 2.0 + 2.0 - + - + - 2.5 + 2.5 - + - + - 3.0 + 3.0 @@ -4262,27 +4044,27 @@ body[data-tool="eraser"] .main-content { - + - - + + - + - - - - + + + + @@ -4340,7 +4122,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.28s +Cell: combine | 4.44s | Raw @@ -4427,11 +4209,11 @@ Summary: 2 found, 0 skipped, 0 missing COMBINED BENCHMARK SUMMARY impl wl p50(ms) ok -hf_kernels_layer_norm LN_B16_S2048_D4096 0.84 True -hf_kernels_layer_norm LN_B16_S2048_D8192 1.66 True -hf_kernels_layer_norm LN_B16_S4096_D4096 1.66 True -hf_kernels_layer_norm LN_B16_S4096_D8192 3.27 True -torch_layer_norm LN_B16_S2048_D4096 0.82 True +hf_kernels_layer_norm LN_B16_S2048_D4096 0.83 True +hf_kernels_layer_norm LN_B16_S2048_D8192 1.65 True +hf_kernels_layer_norm LN_B16_S4096_D4096 1.65 True +hf_kernels_layer_norm LN_B16_S4096_D8192 3.25 True +torch_layer_norm LN_B16_S2048_D4096 0.83 True torch_layer_norm LN_B16_S2048_D8192 1.68 True torch_layer_norm LN_B16_S4096_D4096 1.61 True torch_layer_norm LN_B16_S4096_D8192 3.33 True @@ -4454,7 +4236,7 @@ Implementations included:
▶ UV Install Logs
@@ -4467,7 +4249,7 @@ Installed 37 packages in 216ms - 2025-10-31T20:13:56.885734 + 2025-11-10T22:12:10.245468 image/svg+xml @@ -4551,70 +4333,70 @@ Installed 37 packages in 216ms - + - + - 1.0 + 1.0 - + - + - 1.5 + 1.5 - + - + - 2.0 + 2.0 - + - + - 2.5 + 2.5 - + - + - 3.0 + 3.0 @@ -4622,27 +4404,27 @@ Installed 37 packages in 216ms - + - - + + - + - - - - + + + + diff --git a/openai_moe/impls/artifacts/benchmark/openai_moe.jsonl b/openai_moe/impls/artifacts/benchmark/openai_moe.jsonl index 777cf4efb2a6108a3b57a081de8b6b9f1ee3abd3..ca2cfcab1314b2a668bdae06cd3961c2afe193c1 100644 --- a/openai_moe/impls/artifacts/benchmark/openai_moe.jsonl +++ b/openai_moe/impls/artifacts/benchmark/openai_moe.jsonl @@ -1,8 +1,8 @@ -{"ts": "2025-10-31T20:01:48Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E2", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 149.85902099999748, "p50": 150.05062800003088, "p90": 150.2997029999733, "mean": 150.08009959999526, "iqr": 0.4259410000031494, "raw_times": [149.85902099999748, 150.3173840000045, 150.2997029999733, 149.87376199997016, 150.05062800003088], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 150.9511389999716, "peak_bytes": 416866816, "ok": true, "absmax": 2.765655517578125e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.765655517578125e-05, "mae": 2.0696452338597737e-06, "mse": 7.332408985538663e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:02:12Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E4", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 199.76808500001653, "p50": 200.257487999977, "p90": 201.3672960000008, "mean": 200.6008808000047, "iqr": 1.3947150000035435, "raw_times": [200.257487999977, 201.63895400003184, 201.3672960000008, 199.97258099999726, 199.76808500001653], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 200.2076969999962, "peak_bytes": 632035840, "ok": true, "absmax": 1.621246337890625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.621246337890625e-05, "mae": 9.61917862696282e-07, "mse": 1.59423277530657e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:02:55Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E2", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 370.4508769999961, "p50": 372.7904090000038, "p90": 374.84007900002325, "mean": 372.8004498000132, "iqr": 3.7740770000027624, "raw_times": [374.84007900002325, 371.0660020000205, 370.4508769999961, 374.85488200002237, 372.7904090000038], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 371.103493000021, "peak_bytes": 643844608, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.0501920516835526e-06, "mse": 7.1848811622476916e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:03:43Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E4", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 381.2919249999709, "p50": 382.6824700000202, "p90": 382.6975609999863, "mean": 382.48455139998896, "iqr": 0.3518089999943186, "raw_times": [382.345751999992, 381.2919249999709, 383.4050489999754, 382.6975609999863, 382.6824700000202], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 384.12325699999883, "peak_bytes": 823386112, "ok": true, "absmax": 1.3589859008789062e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.3589859008789062e-05, "mae": 9.400179123986163e-07, "mse": 1.5130355735665235e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:05:12Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E2", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 735.1488859999336, "p50": 742.0204380000541, "p90": 746.9078719999516, "mean": 742.4016768000001, "iqr": 5.8942259998957525, "raw_times": [746.9175420000056, 746.9078719999516, 742.0204380000541, 735.1488859999336, 741.0136460000558], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 715.4345070000545, "peak_bytes": 1036112384, "ok": true, "absmax": 3.2901763916015625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 3.2901763916015625e-05, "mae": 2.0572656467265915e-06, "mse": 7.247809123700488e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:06:54Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E4", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 799.7175619999553, "p50": 801.8970370000034, "p90": 803.0568570000014, "mean": 801.7179149999947, "iqr": 2.358569999955762, "raw_times": [799.7175619999553, 800.6982870000456, 803.2198319999679, 803.0568570000014, 801.8970370000034], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 797.9236759999822, "peak_bytes": 1235263488, "ok": true, "absmax": 1.430511474609375e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.430511474609375e-05, "mae": 9.400343401466671e-07, "mse": 1.5107844445957919e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:09:51Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E2", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1484.2085319999114, "p50": 1486.4837999999736, "p90": 1487.529773999995, "mean": 1488.3352192000075, "iqr": 2.3281069999256943, "raw_times": [1498.252323000088, 1486.4837999999736, 1484.2085319999114, 1485.2016670000694, 1487.529773999995], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1502.5766269999394, "peak_bytes": 1861947904, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.060702854578267e-06, "mse": 7.262949790198814e-12, "ref": "naive_moe"}, "err": null} -{"ts": "2025-10-31T20:13:14Z", "run": "cee70b6f35064c71bc12a633683f7c01", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E4", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1520.7084719999102, "p50": 1524.500331000013, "p90": 1525.4868470000247, "mean": 1524.7435091999705, "iqr": 1.6920530000561484, "raw_times": [1529.2271019999362, 1524.500331000013, 1523.7947939999685, 1525.4868470000247, 1520.7084719999102], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1532.9394789999924, "peak_bytes": 2062163968, "ok": true, "absmax": 1.5974044799804688e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.5974044799804688e-05, "mae": 9.529014732834185e-07, "mse": 1.5621694476192216e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T21:59:28Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E2", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 153.23935800000754, "p50": 154.66906200003905, "p90": 155.3045599999905, "mean": 154.4065966000062, "iqr": 1.9825210000021798, "raw_times": [154.66906200003905, 153.23935800000754, 153.3220389999883, 155.3045599999905, 155.4979640000056], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 150.5313740000247, "peak_bytes": 416866816, "ok": true, "absmax": 2.765655517578125e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.765655517578125e-05, "mae": 2.0696452338597737e-06, "mse": 7.332408985538663e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T21:59:51Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E4", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 200.93769199996814, "p50": 201.49722299998984, "p90": 202.5282779999884, "mean": 202.0041708000008, "iqr": 1.4469799999687893, "raw_times": [201.08129800001961, 203.97636300003796, 200.93769199996814, 201.49722299998984, 202.5282779999884], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 201.3829520000172, "peak_bytes": 632035840, "ok": true, "absmax": 1.621246337890625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.621246337890625e-05, "mae": 9.61917862696282e-07, "mse": 1.59423277530657e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:00:35Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E2", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 367.22704099997827, "p50": 367.62146799998163, "p90": 367.7445199999738, "mean": 367.9209119999882, "iqr": 0.4843269999810218, "raw_times": [369.7513380000146, 367.2601929999928, 367.62146799998163, 367.7445199999738, 367.22704099997827], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 368.07921899998064, "peak_bytes": 643844608, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.0501920516835526e-06, "mse": 7.1848811622476916e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:01:22Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E4", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 392.9537719999985, "p50": 394.19261099999403, "p90": 394.552635000025, "mean": 394.1458786000112, "iqr": 1.3762300000053074, "raw_times": [393.1764050000197, 394.19261099999403, 392.9537719999985, 394.552635000025, 395.85397000001876], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 399.8835020000229, "peak_bytes": 823386112, "ok": true, "absmax": 1.3589859008789062e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.3589859008789062e-05, "mae": 9.400179123986163e-07, "mse": 1.5130355735665235e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:02:51Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E2", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 734.9415649999855, "p50": 736.2597970000024, "p90": 736.4179590000504, "mean": 736.8552042000147, "iqr": 0.5320090000395794, "raw_times": [735.8859500000108, 736.4179590000504, 734.9415649999855, 736.2597970000024, 740.7707500000242], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 738.5199589999729, "peak_bytes": 1036112384, "ok": true, "absmax": 3.2901763916015625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 3.2901763916015625e-05, "mae": 2.0572656467265915e-06, "mse": 7.247809123700488e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:04:32Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E4", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 795.7670519999738, "p50": 798.8816239999323, "p90": 799.2389810000304, "mean": 798.3748011999751, "iqr": 0.5543240000633887, "raw_times": [798.684656999967, 798.8816239999323, 799.3016919999718, 799.2389810000304, 795.7670519999738], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 796.8497300000763, "peak_bytes": 1235263488, "ok": true, "absmax": 1.430511474609375e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.430511474609375e-05, "mae": 9.400343401466671e-07, "mse": 1.5107844445957919e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:07:29Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E2", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1474.9918590000561, "p50": 1483.5365430000138, "p90": 1484.378332999995, "mean": 1483.3181600000216, "iqr": 3.7910559999545512, "raw_times": [1480.5872770000406, 1474.9918590000561, 1484.378332999995, 1483.5365430000138, 1493.0967880000026], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1492.4540110000635, "peak_bytes": 1861947904, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.060702854578267e-06, "mse": 7.262949790198814e-12, "ref": "naive_moe"}, "err": null} +{"ts": "2025-11-10T22:10:52Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E4", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1594.949616000008, "p50": 1601.9022579999955, "p90": 1602.6959760000636, "mean": 1600.7068320000144, "iqr": 2.6664300000902585, "raw_times": [1601.9022579999955, 1600.0295459999734, 1594.949616000008, 1602.6959760000636, 1603.9567640000314], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1575.0532499999963, "peak_bytes": 2062163968, "ok": true, "absmax": 1.5974044799804688e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.5974044799804688e-05, "mae": 9.529014732834185e-07, "mse": 1.5621694476192216e-12, "ref": "naive_moe"}, "err": null} diff --git a/openai_moe/impls/binned_torch.html b/openai_moe/impls/binned_torch.html index a785c694cb0f0b00ce5a5c1d57f2f2717b2be01d..b9730920cd0c741eb712b131abc276de2e42df42 100644 --- a/openai_moe/impls/binned_torch.html +++ b/openai_moe/impls/binned_torch.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.24s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.24s
-
Fri Oct 31 20:00:34 2025       
+
Mon Nov 10 21:58:43 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   34C    P0             81W /  350W |       0MiB /  46068MiB |     18%      Default |
+| N/A   31C    P0             78W /  350W |       0MiB /  46068MiB |     17%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4155,7 +3937,7 @@ Cell: nv | 0.24s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 727.85s
+Cell: benchmark | 727.18s
  | 
 
 Raw
@@ -4313,29 +4095,29 @@ PROFILE TRACE: binned_torch | cuda_B1_S512_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us     906.550ms      1808.50%     906.550ms     906.550ms             1  
-                                           binned_torch        25.29%     229.728ms       100.00%     908.308ms     908.308ms       0.000us         0.00%      50.129ms      50.129ms             1  
-                                             aten::item         1.81%      16.434ms        25.66%     233.033ms      15.186us       0.000us         0.00%      15.809ms       1.030us         15345  
-                              aten::_local_scalar_dense         6.08%      55.189ms        23.85%     216.599ms      14.115us      15.808ms        31.54%      15.809ms       1.030us         15345  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      15.808ms        31.54%      15.808ms       1.030us         15345  
-                                              aten::bmm         0.02%     187.925us         0.02%     226.636us      37.773us       7.688ms        15.34%       7.688ms       1.281ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us       7.688ms        15.34%       7.688ms       1.281ms             6  
-                                     aten::floor_divide         5.37%      48.789ms        13.13%     119.247ms      19.409us       7.554ms        15.07%       7.554ms       1.230us          6144  
-                                            aten::copy_         3.71%      33.699ms         9.08%      82.451ms      13.394us       6.606ms        13.18%       6.607ms       1.073us          6156  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.602ms        13.17%       6.602ms       1.073us          6153  
-                                              aten::mul         3.08%      27.972ms         5.49%      49.893ms      16.194us       4.718ms         9.41%       4.718ms       1.531us          3081  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       4.471ms         8.92%       4.471ms       1.456us          3072  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.032ms         8.04%       4.032ms       1.312us          3072  
-                                        aten::remainder         3.03%      27.567ms         4.66%      42.309ms      13.772us       3.722ms         7.42%       3.722ms       1.212us          3072  
-                                              aten::add         2.91%      26.436ms         4.87%      44.207ms      14.575us       3.546ms         7.07%       3.546ms       1.169us          3033  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.524ms         7.03%       3.524ms       1.147us          3072  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.156ms         6.30%       3.156ms       1.042us          3030  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       1.964ms         3.92%       1.964ms       1.279us          1536  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.758ms         3.51%       1.758ms       1.145us          1536  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     286.305us         0.57%     286.305us      47.718us             6  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us     931.122ms      1835.78%     931.122ms     931.122ms             1  
+                                           binned_torch        25.32%     236.300ms       100.00%     933.185ms     933.185ms       0.000us         0.00%      50.723ms      50.723ms             1  
+                                             aten::item         1.92%      17.916ms        25.08%     234.061ms      15.253us       0.000us         0.00%      15.750ms       1.026us         15345  
+                              aten::_local_scalar_dense         5.72%      53.357ms        23.16%     216.145ms      14.086us      15.749ms        31.05%      15.750ms       1.026us         15345  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      15.749ms        31.05%      15.749ms       1.026us         15345  
+                                     aten::floor_divide         5.56%      51.923ms        13.14%     122.652ms      19.963us       7.815ms        15.41%       7.815ms       1.272us          6144  
+                                              aten::bmm         0.02%     190.442us         0.02%     231.383us      38.564us       7.780ms        15.34%       7.780ms       1.297ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us       7.780ms        15.34%       7.780ms       1.297ms             6  
+                                            aten::copy_         3.79%      35.401ms         9.18%      85.713ms      13.923us       6.584ms        12.98%       6.585ms       1.070us          6156  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.579ms        12.97%       6.579ms       1.069us          6153  
+                                              aten::mul         3.06%      28.578ms         5.54%      51.726ms      16.789us       4.711ms         9.29%       4.711ms       1.529us          3081  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       4.480ms         8.83%       4.480ms       1.458us          3072  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.161ms         8.20%       4.161ms       1.354us          3072  
+                                        aten::remainder         3.12%      29.137ms         4.83%      45.065ms      14.669us       3.840ms         7.57%       3.840ms       1.250us          3072  
+                                              aten::add         2.80%      26.083ms         4.76%      44.381ms      14.633us       3.757ms         7.41%       3.757ms       1.239us          3033  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.656ms         7.21%       3.656ms       1.190us          3072  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.366ms         6.64%       3.366ms       1.111us          3030  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.023ms         3.99%       2.023ms       1.317us          1536  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.817ms         3.58%       1.817ms       1.183us          1536  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     283.649us         0.56%     283.649us      47.275us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 908.315ms
-Self CUDA time total: 50.127ms
+Self CPU time total: 933.193ms
+Self CUDA time total: 50.721ms
 
 
 
@@ -4345,29 +4127,29 @@ PROFILE TRACE: binned_torch | cuda_B1_S512_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us     939.657ms      1760.51%     939.657ms     939.657ms             1  
-                                           binned_torch        24.72%     232.366ms       100.00%     940.175ms     940.175ms       0.000us         0.00%      53.379ms      53.379ms             1  
-                                             aten::item         1.65%      15.471ms        26.56%     249.752ms      14.748us       0.000us         0.00%      17.339ms       1.024us         16935  
-                              aten::_local_scalar_dense         6.16%      57.893ms        24.92%     234.282ms      13.834us      17.337ms        32.48%      17.339ms       1.024us         16935  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      17.337ms        32.48%      17.337ms       1.024us         16935  
-                                              aten::bmm         0.02%     191.684us         0.02%     230.777us      38.463us       7.882ms        14.77%       7.882ms       1.314ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us       7.882ms        14.77%       7.882ms       1.314ms             6  
-                                     aten::floor_divide         5.10%      47.974ms        12.37%     116.337ms      18.935us       7.540ms        14.13%       7.541ms       1.227us          6144  
-                                            aten::copy_         3.80%      35.738ms         9.00%      84.586ms      13.740us       6.593ms        12.35%       6.595ms       1.071us          6156  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.590ms        12.35%       6.590ms       1.071us          6153  
-                                              aten::add         4.16%      39.066ms         7.01%      65.874ms      14.342us       5.113ms         9.58%       5.113ms       1.113us          4593  
-                                              aten::mul         2.92%      27.472ms         5.20%      48.883ms      15.866us       4.715ms         8.83%       4.715ms       1.530us          3081  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       4.472ms         8.38%       4.472ms       1.456us          3072  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.021ms         7.53%       4.021ms       1.309us          3072  
-                                        aten::remainder         2.73%      25.664ms         4.27%      40.147ms      13.069us       3.707ms         6.95%       3.707ms       1.207us          3072  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.519ms         6.59%       3.519ms       1.146us          3072  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.178ms         5.95%       3.178ms       1.049us          3030  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       1.958ms         3.67%       1.958ms       1.275us          1536  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.749ms         3.28%       1.749ms       1.139us          1536  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.537ms         2.88%       1.537ms       0.985us          1560  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us     938.961ms      1720.32%     938.961ms     938.961ms             1  
+                                           binned_torch        25.07%     235.565ms       100.00%     939.473ms     939.473ms       0.000us         0.00%      54.589ms      54.589ms             1  
+                                             aten::item         1.76%      16.540ms        26.46%     248.589ms      14.679us       0.000us         0.00%      17.855ms       1.054us         16935  
+                              aten::_local_scalar_dense         5.69%      53.475ms        24.70%     232.048ms      13.702us      17.853ms        32.71%      17.855ms       1.054us         16935  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      17.853ms        32.71%      17.853ms       1.054us         16935  
+                                              aten::bmm         0.02%     182.580us         0.02%     223.522us      37.254us       7.981ms        14.62%       7.981ms       1.330ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us       7.981ms        14.62%       7.981ms       1.330ms             6  
+                                     aten::floor_divide         5.18%      48.644ms        12.51%     117.515ms      19.127us       7.813ms        14.31%       7.816ms       1.272us          6144  
+                                            aten::copy_         3.69%      34.686ms         8.73%      82.032ms      13.325us       6.629ms        12.15%       6.630ms       1.077us          6156  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.626ms        12.14%       6.626ms       1.077us          6153  
+                                              aten::add         3.97%      37.266ms         6.91%      64.908ms      14.132us       5.261ms         9.64%       5.261ms       1.145us          4593  
+                                              aten::mul         2.87%      26.992ms         5.23%      49.129ms      15.946us       4.699ms         8.61%       4.699ms       1.525us          3081  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       4.475ms         8.20%       4.475ms       1.457us          3072  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.158ms         7.62%       4.158ms       1.353us          3072  
+                                        aten::remainder         2.85%      26.773ms         4.50%      42.318ms      13.775us       3.852ms         7.06%       3.852ms       1.254us          3072  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.655ms         6.70%       3.655ms       1.190us          3072  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.271ms         5.99%       3.271ms       1.080us          3030  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.030ms         3.72%       2.030ms       1.322us          1536  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.822ms         3.34%       1.822ms       1.186us          1536  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       1.585ms         2.90%       1.585ms       1.016us          1560  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 940.182ms
-Self CUDA time total: 53.374ms
+Self CPU time total: 939.480ms
+Self CUDA time total: 54.581ms
 
 
 
@@ -4377,29 +4159,29 @@ PROFILE TRACE: binned_torch | cuda_B1_S1024_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        1.751s      1703.41%        1.751s        1.751s             1  
-                                           binned_torch        24.63%     431.727ms       100.00%        1.753s        1.753s       0.000us         0.00%     102.829ms     102.829ms             1  
-                                             aten::item         1.69%      29.621ms        25.96%     455.095ms      14.915us       0.000us         0.00%      31.387ms       1.029us         30513  
-                              aten::_local_scalar_dense         5.96%     104.552ms        24.27%     425.474ms      13.944us      31.383ms        30.52%      31.387ms       1.029us         30513  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      31.383ms        30.52%      31.383ms       1.029us         30513  
-                                              aten::bmm         0.01%     224.614us         0.02%     267.595us      44.599us      15.143ms        14.73%      15.143ms       2.524ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      15.143ms        14.73%      15.143ms       2.524ms             6  
-                                     aten::floor_divide         5.56%      97.549ms        13.34%     233.779ms      19.025us      15.089ms        14.68%      15.090ms       1.228us         12288  
-                                            aten::copy_         4.01%      70.283ms         9.47%     166.011ms      13.497us      13.317ms        12.95%      13.317ms       1.083us         12300  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      13.313ms        12.95%      13.313ms       1.083us         12294  
-                                              aten::mul         3.14%      55.060ms         5.66%      99.236ms      16.128us      11.295ms        10.99%      11.297ms       1.836us          6153  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       9.940ms         9.67%       9.940ms       1.618us          6144  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.059ms         7.84%       8.059ms       1.312us          6144  
-                                              aten::add         2.85%      49.952ms         4.90%      85.866ms      14.522us       7.505ms         7.30%       7.506ms       1.269us          5913  
-                                        aten::remainder         3.02%      53.015ms         4.74%      83.117ms      13.528us       7.414ms         7.21%       7.416ms       1.207us          6144  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.031ms         6.84%       7.031ms       1.144us          6144  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       6.224ms         6.05%       6.224ms       1.053us          5910  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.914ms         3.81%       3.914ms       1.274us          3072  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.500ms         3.40%       3.500ms       1.139us          3072  
-                                            aten::clamp         0.00%      71.603us         0.01%     117.833us      19.639us       1.180ms         1.15%       1.180ms     196.722us             6  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        1.710s      1645.94%        1.710s        1.710s             1  
+                                           binned_torch        23.47%     401.594ms       100.00%        1.711s        1.711s       0.000us         0.00%     103.932ms     103.932ms             1  
+                                             aten::item         1.77%      30.361ms        27.00%     461.971ms      15.140us       0.000us         0.00%      31.541ms       1.034us         30513  
+                              aten::_local_scalar_dense         5.97%     102.153ms        25.22%     431.610ms      14.145us      31.538ms        30.35%      31.541ms       1.034us         30513  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      31.538ms        30.35%      31.538ms       1.034us         30513  
+                                     aten::floor_divide         5.77%      98.697ms        13.68%     234.018ms      19.044us      15.598ms        15.01%      15.600ms       1.270us         12288  
+                                              aten::bmm         0.01%     219.084us         0.02%     260.723us      43.454us      15.235ms        14.66%      15.235ms       2.539ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      15.235ms        14.66%      15.235ms       2.539ms             6  
+                                            aten::copy_         3.97%      67.926ms         9.38%     160.451ms      13.045us      13.315ms        12.81%      13.316ms       1.083us         12300  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      13.311ms        12.81%      13.311ms       1.083us         12294  
+                                              aten::mul         3.19%      54.637ms         5.82%      99.678ms      16.200us      11.250ms        10.83%      11.252ms       1.829us          6153  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       9.903ms         9.53%       9.903ms       1.612us          6144  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.304ms         7.99%       8.304ms       1.352us          6144  
+                                        aten::remainder         3.07%      52.461ms         4.79%      82.008ms      13.348us       7.670ms         7.38%       7.671ms       1.249us          6144  
+                                              aten::add         2.76%      47.163ms         4.86%      83.106ms      14.055us       7.632ms         7.34%       7.633ms       1.291us          5913  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.294ms         7.02%       7.294ms       1.187us          6144  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       6.354ms         6.11%       6.354ms       1.075us          5910  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.041ms         3.89%       4.041ms       1.316us          3072  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.629ms         3.49%       3.629ms       1.181us          3072  
+                                            aten::clamp         0.00%      71.350us         0.01%     113.931us      18.988us       1.190ms         1.15%       1.190ms     198.366us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.753s
-Self CUDA time total: 102.819ms
+Self CPU time total: 1.711s
+Self CUDA time total: 103.922ms
 
 
 
@@ -4409,29 +4191,29 @@ PROFILE TRACE: binned_torch | cuda_B1_S1024_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        1.834s      1680.90%        1.834s        1.834s             1  
-                                           binned_torch        24.76%     454.393ms       100.00%        1.835s        1.835s       0.000us         0.00%     109.119ms     109.119ms             1  
-                                             aten::item         1.65%      30.229ms        26.42%     484.819ms      14.374us       0.000us         0.00%      34.734ms       1.030us         33729  
-                              aten::_local_scalar_dense         6.08%     111.551ms        24.77%     454.590ms      13.478us      34.731ms        31.83%      34.734ms       1.030us         33729  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      34.731ms        31.83%      34.731ms       1.030us         33729  
-                                              aten::bmm         0.01%     219.836us         0.01%     260.868us      43.478us      15.243ms        13.97%      15.243ms       2.540ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      15.243ms        13.97%      15.243ms       2.540ms             6  
-                                     aten::floor_divide         5.37%      98.619ms        12.62%     231.581ms      18.846us      15.065ms        13.81%      15.065ms       1.226us         12288  
-                                            aten::copy_         3.65%      66.986ms         8.64%     158.623ms      12.896us      13.313ms        12.20%      13.316ms       1.083us         12300  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      13.309ms        12.20%      13.309ms       1.082us         12297  
-                                              aten::mul         2.96%      54.365ms         5.27%      96.616ms      15.702us      10.967ms        10.05%      10.969ms       1.783us          6153  
-                                              aten::add         4.05%      74.247ms         6.97%     127.934ms      14.060us      10.631ms         9.74%      10.631ms       1.168us          9099  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       9.613ms         8.81%       9.613ms       1.565us          6144  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.047ms         7.37%       8.047ms       1.310us          6144  
-                                        aten::remainder         2.81%      51.641ms         4.37%      80.193ms      13.052us       7.438ms         6.82%       7.438ms       1.211us          6144  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.018ms         6.43%       7.018ms       1.142us          6144  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       6.225ms         5.71%       6.225ms       1.053us          5910  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       3.928ms         3.60%       3.928ms       1.279us          3072  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.510ms         3.22%       3.510ms       1.143us          3072  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.154ms         2.89%       3.154ms       0.990us          3186  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        1.831s      1659.19%        1.831s        1.831s             1  
+                                           binned_torch        23.77%     435.469ms       100.00%        1.832s        1.832s       0.000us         0.00%     110.361ms     110.361ms             1  
+                                             aten::item         1.74%      31.875ms        27.52%     504.183ms      14.948us       0.000us         0.00%      34.964ms       1.037us         33729  
+                              aten::_local_scalar_dense         6.20%     113.521ms        25.78%     472.309ms      14.003us      34.961ms        31.68%      34.964ms       1.037us         33729  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      34.961ms        31.68%      34.961ms       1.037us         33729  
+                                     aten::floor_divide         5.21%      95.369ms        12.55%     229.877ms      18.707us      15.595ms        14.13%      15.597ms       1.269us         12288  
+                                              aten::bmm         0.01%     225.035us         0.01%     267.825us      44.638us      15.231ms        13.80%      15.231ms       2.539ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      15.231ms        13.80%      15.231ms       2.539ms             6  
+                                            aten::copy_         3.69%      67.648ms         8.80%     161.241ms      13.109us      13.343ms        12.09%      13.347ms       1.085us         12300  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      13.340ms        12.09%      13.340ms       1.085us         12297  
+                                              aten::mul         2.99%      54.761ms         5.39%      98.799ms      16.057us      10.934ms         9.91%      10.936ms       1.777us          6153  
+                                              aten::add         3.91%      71.612ms         6.90%     126.397ms      13.891us      10.863ms         9.84%      10.863ms       1.194us          9099  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       9.586ms         8.69%       9.586ms       1.560us          6144  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.308ms         7.53%       8.308ms       1.352us          6144  
+                                        aten::remainder         2.81%      51.395ms         4.41%      80.796ms      13.150us       7.688ms         6.97%       7.688ms       1.251us          6144  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.287ms         6.60%       7.287ms       1.186us          6144  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       6.364ms         5.77%       6.364ms       1.077us          5910  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       4.054ms         3.67%       4.054ms       1.320us          3072  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.634ms         3.29%       3.634ms       1.183us          3072  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       3.232ms         2.93%       3.232ms       1.014us          3186  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.835s
-Self CUDA time total: 109.111ms
+Self CPU time total: 1.832s
+Self CUDA time total: 110.351ms
 
 
 
@@ -4441,29 +4223,29 @@ PROFILE TRACE: binned_torch | cuda_B4_S512_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        3.518s      1672.53%        3.518s        3.518s             1  
-                                           binned_torch        24.37%     858.118ms       100.00%        3.521s        3.521s       0.000us         0.00%     210.357ms     210.357ms             1  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      63.177ms        30.04%      63.177ms       1.026us         61586  
-                                             aten::item         1.69%      59.432ms        26.02%     916.275ms      14.878us       0.000us         0.00%      63.177ms       1.026us         61587  
-                              aten::_local_scalar_dense         5.96%     209.806ms        24.34%     856.843ms      13.913us      63.176ms        30.03%      63.177ms       1.026us         61587  
-                                     aten::floor_divide         5.42%     190.698ms        13.50%     475.217ms      19.337us      30.482ms        14.49%      30.486ms       1.240us         24576  
-                                              aten::bmm         0.01%     235.397us         0.01%     281.998us      47.000us      29.291ms        13.93%      29.291ms       4.882ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      29.291ms        13.93%      29.291ms       4.882ms             6  
-                                            aten::copy_         3.77%     132.744ms         9.15%     322.282ms      13.107us      26.808ms        12.75%      26.810ms       1.090us         24588  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      26.805ms        12.74%      26.805ms       1.090us         24582  
-                                              aten::mul         3.15%     110.895ms         5.78%     203.457ms      16.545us      25.566ms        12.15%      25.568ms       2.079us         12297  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.101ms        10.51%      22.101ms       1.799us         12288  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.470ms         7.83%      16.470ms       1.340us         12288  
-                                              aten::add         2.99%     105.439ms         5.15%     181.211ms      14.601us      16.115ms         7.66%      16.116ms       1.298us         12411  
-                                        aten::remainder         2.99%     105.111ms         4.72%     166.195ms      13.525us      14.836ms         7.05%      14.838ms       1.208us         12288  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.014ms         6.66%      14.014ms       1.140us         12288  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      12.996ms         6.18%      12.996ms       1.047us         12408  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       7.830ms         3.72%       7.830ms       1.274us          6144  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.006ms         3.33%       7.006ms       1.140us          6144  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.626ms         1.25%       2.626ms     437.595us             6  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        3.493s      1641.52%        3.493s        3.493s             1  
+                                           binned_torch        23.72%     828.141ms       100.00%        3.492s        3.492s       0.000us         0.00%     212.777ms     212.777ms             1  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      63.619ms        29.90%      63.619ms       1.033us         61586  
+                                             aten::item         1.76%      61.470ms        26.76%     934.319ms      15.171us       0.000us         0.00%      63.619ms       1.033us         61587  
+                              aten::_local_scalar_dense         5.95%     207.894ms        25.00%     872.849ms      14.173us      63.616ms        29.90%      63.619ms       1.033us         61587  
+                                     aten::floor_divide         5.53%     193.077ms        13.34%     465.879ms      18.957us      31.606ms        14.86%      31.612ms       1.286us         24576  
+                                              aten::bmm         0.01%     236.694us         0.01%     284.594us      47.432us      29.067ms        13.66%      29.067ms       4.844ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      29.067ms        13.66%      29.067ms       4.844ms             6  
+                                            aten::copy_         3.89%     135.756ms         9.33%     325.881ms      13.254us      26.713ms        12.56%      26.714ms       1.086us         24588  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      26.711ms        12.55%      26.711ms       1.087us         24582  
+                                              aten::mul         3.15%     110.066ms         5.73%     199.944ms      16.260us      25.593ms        12.03%      25.595ms       2.081us         12297  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.131ms        10.40%      22.131ms       1.801us         12288  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      17.009ms         7.99%      17.009ms       1.384us         12288  
+                                              aten::add         2.82%      98.495ms         4.98%     173.932ms      14.014us      16.658ms         7.83%      16.659ms       1.342us         12411  
+                                        aten::remainder         3.04%     106.037ms         4.77%     166.563ms      13.555us      15.433ms         7.25%      15.435ms       1.256us         12288  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.597ms         6.86%      14.597ms       1.188us         12288  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.527ms         6.36%      13.527ms       1.090us         12408  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.132ms         3.82%       8.132ms       1.324us          6144  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.300ms         3.43%       7.300ms       1.188us          6144  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.623ms         1.23%       2.623ms     437.201us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 3.521s
-Self CUDA time total: 210.342ms
+Self CPU time total: 3.492s
+Self CUDA time total: 212.763ms
 
 
 
@@ -4473,29 +4255,29 @@ PROFILE TRACE: binned_torch | cuda_B4_S512_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        3.742s      1679.57%        3.742s        3.742s             1  
-                                           binned_torch        24.42%     914.204ms       100.00%        3.744s        3.744s       0.000us         0.00%     222.834ms     222.834ms             1  
-                                             aten::item         1.73%      64.729ms        26.53%     993.125ms      14.638us       0.000us         0.00%      69.848ms       1.030us         67845  
-                              aten::_local_scalar_dense         6.14%     229.850ms        24.80%     928.396ms      13.684us      69.844ms        31.35%      69.848ms       1.030us         67845  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      69.844ms        31.35%      69.844ms       1.030us         67841  
-                                     aten::floor_divide         5.29%     197.931ms        12.52%     468.921ms      19.080us      30.509ms        13.69%      30.515ms       1.242us         24576  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      29.140ms        13.08%      29.140ms       4.857ms             6  
-                                              aten::bmm         0.01%     232.675us         0.01%     273.538us      45.590us      29.140ms        13.08%      29.140ms       4.857ms             6  
-                                            aten::copy_         3.66%     136.881ms         8.73%     326.908ms      13.295us      26.646ms        11.96%      26.647ms       1.084us         24588  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      26.643ms        11.96%      26.643ms       1.084us         24581  
-                                              aten::mul         2.96%     110.832ms         5.24%     196.253ms      15.959us      25.520ms        11.45%      25.522ms       2.075us         12297  
-                                              aten::add         4.16%     155.619ms         7.13%     266.948ms      14.322us      22.169ms         9.95%      22.169ms       1.189us         18639  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.076ms         9.91%      22.076ms       1.797us         12288  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.462ms         7.39%      16.462ms       1.340us         12287  
-                                        aten::remainder         2.77%     103.887ms         4.33%     162.240ms      13.203us      14.877ms         6.68%      14.879ms       1.211us         12288  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.047ms         6.30%      14.047ms       1.143us         12287  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      12.957ms         5.82%      12.957ms       1.044us         12407  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       7.856ms         3.53%       7.856ms       1.279us          6144  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.021ms         3.15%       7.021ms       1.143us          6144  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.109ms         2.74%       6.109ms       0.981us          6228  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        3.669s      1629.04%        3.669s        3.669s             1  
+                                           binned_torch        23.71%     870.025ms       100.00%        3.670s        3.670s       0.000us         0.00%     225.217ms     225.217ms             1  
+                                             aten::item         1.74%      63.801ms        26.98%     990.130ms      14.594us       0.000us         0.00%      69.736ms       1.028us         67845  
+                              aten::_local_scalar_dense         5.93%     217.737ms        25.24%     926.329ms      13.654us      69.731ms        30.96%      69.736ms       1.028us         67845  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      69.731ms        30.96%      69.731ms       1.028us         67841  
+                                     aten::floor_divide         5.15%     189.112ms        12.36%     453.770ms      18.464us      31.523ms        14.00%      31.529ms       1.283us         24576  
+                                              aten::bmm         0.01%     229.594us         0.01%     272.075us      45.346us      28.926ms        12.84%      28.926ms       4.821ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      28.926ms        12.84%      28.926ms       4.821ms             6  
+                                            aten::copy_         3.90%     143.149ms         8.93%     327.628ms      13.325us      26.721ms        11.87%      26.722ms       1.087us         24588  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      26.719ms        11.86%      26.719ms       1.087us         24581  
+                                              aten::mul         3.13%     114.822ms         5.47%     200.852ms      16.333us      25.594ms        11.37%      25.596ms       2.081us         12297  
+                                              aten::add         3.87%     141.881ms         6.78%     248.742ms      13.345us      23.243ms        10.32%      23.243ms       1.247us         18639  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.132ms         9.83%      22.132ms       1.801us         12288  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.988ms         7.54%      16.988ms       1.383us         12287  
+                                        aten::remainder         2.85%     104.729ms         4.42%     162.304ms      13.208us      15.354ms         6.82%      15.355ms       1.250us         12288  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.535ms         6.45%      14.535ms       1.183us         12287  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.676ms         6.07%      13.676ms       1.102us         12407  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us       8.096ms         3.60%       8.096ms       1.318us          6144  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       7.258ms         3.22%       7.258ms       1.181us          6144  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us       6.475ms         2.88%       6.475ms       1.040us          6228  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 3.744s
-Self CUDA time total: 222.814ms
+Self CPU time total: 3.670s
+Self CUDA time total: 225.199ms
 
 
 
@@ -4505,29 +4287,29 @@ PROFILE TRACE: binned_torch | cuda_B4_S1024_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        6.967s      1665.27%        6.967s        6.967s             1  
-                                           binned_torch        24.68%        1.721s       100.00%        6.973s        6.973s       0.000us         0.00%     418.392ms     418.392ms             1  
-                                             aten::item         1.64%     114.231ms        25.94%        1.809s      14.732us       0.000us         0.00%     125.163ms       1.020us        122763  
-                              aten::_local_scalar_dense         5.97%     416.624ms        24.30%        1.694s      13.802us     125.151ms        29.91%     125.163ms       1.020us        122763  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     125.151ms        29.91%     125.151ms       1.019us        122762  
-                                     aten::floor_divide         5.62%     391.846ms        13.33%     929.253ms      18.906us      61.051ms        14.59%      61.053ms       1.242us         49152  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      57.281ms        13.69%      57.281ms       9.547ms             6  
-                                              aten::bmm         0.00%     234.996us         0.00%     276.787us      46.131us      57.281ms        13.69%      57.281ms       9.547ms             6  
-                                            aten::copy_         3.92%     273.517ms         9.35%     652.240ms      13.268us      53.435ms        12.77%      53.437ms       1.087us         49158  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.433ms        12.77%      53.433ms       1.087us         49154  
-                                              aten::mul         3.15%     219.950ms         5.62%     391.612ms      15.929us      51.411ms        12.29%      51.419ms       2.091us         24585  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      44.451ms        10.62%      44.451ms       1.809us         24576  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      32.993ms         7.89%      32.993ms       1.343us         24576  
-                                              aten::add         2.87%     200.428ms         4.94%     344.166ms      14.085us      31.887ms         7.62%      31.889ms       1.305us         24435  
-                                        aten::remainder         3.00%     208.953ms         4.67%     325.902ms      13.261us      29.680ms         7.09%      29.684ms       1.208us         24576  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      28.059ms         6.71%      28.059ms       1.142us         24576  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      25.247ms         6.03%      25.247ms       1.033us         24431  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      15.667ms         3.74%      15.667ms       1.275us         12288  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.014ms         3.35%      14.014ms       1.140us         12288  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       5.233ms         1.25%       5.233ms     872.184us             6  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        6.859s      1611.59%        6.859s        6.859s             1  
+                                           binned_torch        24.10%        1.655s       100.00%        6.866s        6.866s       0.000us         0.00%     425.661ms     425.661ms             1  
+                                             aten::item         1.68%     115.068ms        26.29%        1.805s      14.704us       0.000us         0.00%     127.116ms       1.035us        122763  
+                              aten::_local_scalar_dense         5.74%     393.879ms        24.61%        1.690s      13.764us     127.109ms        29.86%     127.116ms       1.035us        122763  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     127.110ms        29.86%     127.110ms       1.035us        122762  
+                                     aten::floor_divide         5.46%     374.656ms        13.09%     898.826ms      18.287us      63.404ms        14.90%      63.408ms       1.290us         49152  
+                                              aten::bmm         0.00%     234.973us         0.00%     276.793us      46.132us      56.971ms        13.38%      56.971ms       9.495ms             6  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      56.971ms        13.38%      56.971ms       9.495ms             6  
+                                            aten::copy_         4.17%     286.167ms         9.49%     651.750ms      13.258us      53.615ms        12.60%      53.616ms       1.091us         49158  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.612ms        12.60%      53.612ms       1.091us         49154  
+                                              aten::mul         3.34%     229.543ms         5.86%     402.465ms      16.370us      51.556ms        12.11%      51.561ms       2.097us         24585  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      44.609ms        10.48%      44.609ms       1.815us         24576  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      34.184ms         8.03%      34.184ms       1.391us         24576  
+                                              aten::add         2.69%     184.813ms         4.71%     323.308ms      13.231us      33.584ms         7.89%      33.588ms       1.375us         24435  
+                                        aten::remainder         3.06%     210.055ms         4.75%     326.044ms      13.267us      30.927ms         7.27%      30.931ms       1.259us         24576  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      29.221ms         6.87%      29.221ms       1.189us         24576  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      26.946ms         6.33%      26.946ms       1.103us         24431  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.291ms         3.83%      16.291ms       1.326us         12288  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.637ms         3.44%      14.637ms       1.191us         12288  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       5.222ms         1.23%       5.222ms     870.407us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 6.973s
-Self CUDA time total: 418.361ms
+Self CPU time total: 6.866s
+Self CUDA time total: 425.634ms
 
 
 
@@ -4537,40 +4319,40 @@ PROFILE TRACE: binned_torch | cuda_B4_S1024_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        7.368s      1660.72%        7.368s        7.368s             1  
-                                           binned_torch        24.39%        1.797s       100.00%        7.370s        7.370s       0.000us         0.00%     443.698ms     443.698ms             1  
-                                             aten::item         1.69%     124.742ms        26.51%        1.954s      14.504us       0.000us         0.00%     137.717ms       1.022us        134715  
-                              aten::_local_scalar_dense         6.11%     450.407ms        24.82%        1.829s      13.577us     137.708ms        31.04%     137.717ms       1.022us        134715  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     137.710ms        31.04%     137.710ms       1.022us        134711  
-                                     aten::floor_divide         5.42%     399.563ms        12.65%     932.414ms      18.970us      61.071ms        13.77%      61.077ms       1.243us         49152  
-                                              aten::bmm         0.00%     230.664us         0.00%     272.466us      45.411us      57.304ms        12.92%      57.304ms       9.551ms             6  
-                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      57.304ms        12.92%      57.304ms       9.551ms             6  
-                                            aten::copy_         3.65%     269.132ms         8.67%     639.259ms      13.004us      54.065ms        12.19%      54.067ms       1.100us         49158  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      54.062ms        12.19%      54.062ms       1.100us         49153  
-                                              aten::mul         2.96%     217.959ms         5.26%     387.551ms      15.764us      51.653ms        11.64%      51.660ms       2.101us         24585  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      44.653ms        10.06%      44.653ms       1.817us         24576  
-                                              aten::add         4.03%     296.962ms         6.96%     512.647ms      14.100us      43.690ms         9.85%      43.694ms       1.202us         36357  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      32.954ms         7.43%      32.954ms       1.341us         24575  
-                                        aten::remainder         2.83%     208.527ms         4.40%     323.906ms      13.180us      29.662ms         6.69%      29.664ms       1.207us         24576  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      28.119ms         6.34%      28.119ms       1.144us         24576  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      25.409ms         5.73%      25.409ms       1.040us         24431  
-void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      15.666ms         3.53%      15.666ms       1.275us         12288  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      13.995ms         3.15%      13.995ms       1.139us         12288  
-void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      11.644ms         2.62%      11.644ms       0.977us         11922  
+                                           binned_torch         0.00%       0.000us         0.00%       0.000us       0.000us        7.331s      1630.84%        7.331s        7.331s             1  
+                                           binned_torch        23.92%        1.754s       100.00%        7.333s        7.333s       0.000us         0.00%     449.578ms     449.578ms             1  
+                                             aten::item         1.73%     127.153ms        27.44%        2.013s      14.940us       0.000us         0.00%     139.264ms       1.034us        134715  
+                              aten::_local_scalar_dense         6.23%     456.926ms        25.71%        1.885s      13.996us     139.253ms        30.98%     139.264ms       1.034us        134715  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     139.255ms        30.98%     139.255ms       1.034us        134707  
+                                     aten::floor_divide         5.02%     368.091ms        12.28%     900.843ms      18.328us      63.383ms        14.10%      63.388ms       1.290us         49152  
+                                ampere_sgemm_128x128_nn         0.00%       0.000us         0.00%       0.000us       0.000us      56.831ms        12.64%      56.831ms       9.472ms             6  
+                                              aten::bmm         0.00%     231.002us         0.00%     273.424us      45.571us      56.831ms        12.64%      56.831ms       9.472ms             6  
+                                            aten::copy_         3.67%     268.957ms         8.71%     638.523ms      12.989us      53.771ms        11.96%      53.773ms       1.094us         49158  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.768ms        11.96%      53.768ms       1.094us         49149  
+                                              aten::mul         2.96%     217.228ms         5.34%     391.576ms      15.927us      51.518ms        11.46%      51.524ms       2.096us         24585  
+                                              aten::add         3.83%     280.607ms         6.79%     497.692ms      13.689us      45.514ms        10.12%      45.518ms       1.252us         36357  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      44.542ms         9.91%      44.542ms       1.812us         24576  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      34.127ms         7.59%      34.127ms       1.389us         24573  
+                                        aten::remainder         2.85%     209.203ms         4.50%     330.314ms      13.441us      30.793ms         6.85%      30.795ms       1.253us         24576  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      29.257ms         6.51%      29.257ms       1.191us         24573  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      26.610ms         5.92%      26.610ms       1.089us         24431  
+void at::native::vectorized_elementwise_kernel<2, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.233ms         3.61%      16.233ms       1.321us         12288  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      14.559ms         3.24%      14.559ms       1.185us         12288  
+void at::native::unrolled_elementwise_kernel<at::nat...         0.00%       0.000us         0.00%       0.000us       0.000us      12.261ms         2.73%      12.261ms       1.028us         11922  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 7.370s
-Self CUDA time total: 443.660ms
+Self CPU time total: 7.333s
+Self CUDA time total: 449.542ms
 
 
 impl                     wl                  p50(ms)  ok
-binned_torch             cuda_B1_S1024_E2     372.79  True
-binned_torch             cuda_B1_S1024_E4     382.68  True
-binned_torch             cuda_B1_S512_E2      150.05  True
-binned_torch             cuda_B1_S512_E4      200.26  True
-binned_torch             cuda_B4_S1024_E2    1486.48  True
-binned_torch             cuda_B4_S1024_E4    1524.50  True
-binned_torch             cuda_B4_S512_E2      742.02  True
-binned_torch             cuda_B4_S512_E4      801.90  True
+binned_torch             cuda_B1_S1024_E2     367.62  True
+binned_torch             cuda_B1_S1024_E4     394.19  True
+binned_torch             cuda_B1_S512_E2      154.67  True
+binned_torch             cuda_B1_S512_E4      201.50  True
+binned_torch             cuda_B4_S1024_E2    1483.54  True
+binned_torch             cuda_B4_S1024_E4    1601.90  True
+binned_torch             cuda_B4_S512_E2      736.26  True
+binned_torch             cuda_B4_S512_E4      798.88  True
 

Artifacts:

diff --git a/openai_moe/impls/gpt_oss_moe.html b/openai_moe/impls/gpt_oss_moe.html index 2133ccac99c4f05bc3163b7f04c006955d4539b2..81d9b0320c1bf7519801b765d0d45e8340ae6e9f 100644 --- a/openai_moe/impls/gpt_oss_moe.html +++ b/openai_moe/impls/gpt_oss_moe.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.24s +Cell: nv | 0.22s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.24s
-
Fri Oct 31 20:00:34 2025       
+
Mon Nov 10 21:58:43 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   34C    P0             81W /  350W |       0MiB /  46068MiB |     18%      Default |
+| N/A   31C    P0             78W /  350W |       0MiB /  46068MiB |     17%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.24s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 24.32s
+Cell: benchmark | 25.04s
  | 
 
 Raw
@@ -4260,29 +4042,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B1_S512_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      10.211ms       197.81%      10.211ms      10.211ms             1  
-                                        gpt_oss_experts        16.48%       2.023ms        99.94%      12.270ms      12.270ms       0.000us         0.00%       5.165ms       5.165ms             1  
-                                           aten::matmul         0.22%      26.489us         3.82%     468.520us      39.043us       0.000us         0.00%       4.540ms     378.357us            12  
-                                               aten::mm         2.36%     289.825us         3.60%     442.031us      36.836us       4.540ms        87.96%       4.540ms     378.357us            12  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       3.078ms        59.62%       3.078ms     341.948us             9  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       1.457ms        28.23%       1.457ms     485.813us             3  
-                                              aten::mul         1.42%     174.948us         2.34%     287.701us      11.988us     109.119us         2.11%     109.119us       4.547us            24  
-                                              aten::add         1.61%     197.786us         3.85%     472.357us      26.242us     103.039us         2.00%     103.039us       5.724us            18  
-                                            aten::index         1.73%     212.127us         2.86%     350.900us      29.242us      86.591us         1.68%      86.591us       7.216us            12  
-                                       aten::index_add_         0.51%      62.499us         0.79%      97.312us      16.219us      82.688us         1.60%      82.688us      13.781us             6  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us      82.688us         1.60%      82.688us      13.781us             6  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      80.511us         1.56%      80.511us       6.709us            12  
-                                          aten::nonzero         2.20%     270.146us         6.58%     808.380us      89.820us      63.743us         1.23%      74.368us       8.263us             9  
-                                            aten::clamp         0.98%     120.045us         1.63%     200.026us      16.669us      64.705us         1.25%      64.705us       5.392us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      64.705us         1.25%      64.705us       5.392us            12  
-                                            aten::where         0.06%       7.400us         5.25%     644.007us     107.334us       0.000us         0.00%      60.384us      10.064us             6  
-                                    aten::nonzero_numpy         0.11%      13.320us         5.19%     636.607us     106.101us       0.000us         0.00%      60.384us      10.064us             6  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us      60.063us         1.16%      60.063us      10.011us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      56.800us         1.10%      56.800us       4.733us            12  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      50.911us         0.99%      50.911us       1.131us            45  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      10.360ms       190.98%      10.360ms      10.360ms             1  
+                                        gpt_oss_experts        15.12%       1.924ms        99.94%      12.713ms      12.713ms       0.000us         0.00%       5.428ms       5.428ms             1  
+                                           aten::matmul         0.18%      22.311us         3.73%     473.846us      39.487us       0.000us         0.00%       4.800ms     400.041us            12  
+                                               aten::mm         2.34%     297.100us         3.55%     451.535us      37.628us       4.800ms        88.50%       4.800ms     400.041us            12  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       3.258ms        60.07%       3.258ms     362.028us             9  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       1.536ms        28.31%       1.536ms     511.862us             3  
+                                              aten::mul         1.29%     163.978us         2.14%     271.630us      11.318us     109.411us         2.02%     109.411us       4.559us            24  
+                                              aten::add         1.51%     192.130us         3.80%     483.423us      26.857us     103.358us         1.91%     103.358us       5.742us            18  
+                                            aten::index         1.52%     193.374us         2.62%     333.164us      27.764us      88.224us         1.63%      88.224us       7.352us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      80.864us         1.49%      80.864us       6.739us            12  
+                                       aten::index_add_         0.46%      58.130us         0.76%      97.241us      16.207us      80.064us         1.48%      80.064us      13.344us             6  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us      80.064us         1.48%      80.064us      13.344us             6  
+                                          aten::nonzero         2.05%     260.439us         6.29%     799.492us      88.832us      65.278us         1.20%      76.093us       8.455us             9  
+                                            aten::clamp         0.99%     126.442us         1.60%     203.852us      16.988us      63.456us         1.17%      63.456us       5.288us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      63.456us         1.17%      63.456us       5.288us            12  
+                                            aten::where         0.06%       7.391us         5.01%     637.190us     106.198us       0.000us         0.00%      61.533us      10.256us             6  
+                                    aten::nonzero_numpy         0.09%      11.880us         4.95%     629.799us     104.967us       0.000us         0.00%      61.533us      10.256us             6  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us      60.544us         1.12%      60.544us      10.091us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      56.929us         1.05%      56.929us       4.744us            12  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      51.073us         0.94%      51.073us       1.135us            45  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 12.278ms
-Self CUDA time total: 5.162ms
+Self CPU time total: 12.720ms
+Self CUDA time total: 5.425ms
 
 
 
@@ -4292,29 +4074,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B1_S512_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      13.933ms       229.38%      13.933ms      13.933ms             1  
-                                        gpt_oss_experts        16.29%       2.560ms        99.97%      15.712ms      15.712ms       0.000us         0.00%       6.077ms       6.077ms             1  
-                                           aten::matmul         0.30%      47.223us         5.17%     812.581us      33.858us       0.000us         0.00%       5.268ms     219.512us            24  
-                                               aten::mm         3.09%     485.951us         4.87%     765.358us      31.890us       5.268ms        86.73%       5.268ms     219.512us            24  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       5.213ms        85.81%       5.213ms     217.198us            24  
-                                          aten::nonzero         2.45%     385.408us         7.89%       1.240ms      82.649us     112.163us         1.85%     134.498us       8.967us            15  
-                                              aten::mul         2.03%     318.275us         3.36%     528.222us      11.005us     130.496us         2.15%     130.496us       2.719us            48  
-                                              aten::add         2.25%     353.820us         3.74%     587.771us      16.327us     127.072us         2.09%     127.072us       3.530us            36  
-                                            aten::where         0.08%      11.882us         7.49%       1.177ms      98.080us       0.000us         0.00%     120.705us      10.059us            12  
-                                    aten::nonzero_numpy         0.15%      24.083us         7.41%       1.165ms      97.090us       0.000us         0.00%     120.705us      10.059us            12  
-                                            aten::index         2.31%     363.442us         3.93%     617.030us      25.710us     110.145us         1.81%     110.145us       4.589us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     101.312us         1.67%     101.312us       4.221us            24  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      91.447us         1.51%      91.447us       1.051us            87  
-                                            aten::clamp         1.32%     207.076us         2.26%     355.011us      14.792us      85.793us         1.41%      85.793us       3.575us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      85.793us         1.41%      85.793us       3.575us            24  
-                                             aten::item         0.52%      81.620us        38.60%       6.066ms      84.255us       0.000us         0.00%      75.446us       1.048us            72  
-                              aten::_local_scalar_dense         2.00%     315.046us        38.08%       5.985ms      83.122us      75.446us         1.24%      75.446us       1.048us            72  
-                                       aten::index_add_         0.75%     118.511us         1.16%     182.084us      15.174us      72.926us         1.20%      72.926us       6.077us            12  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us      72.926us         1.20%      72.926us       6.077us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us      65.857us         1.08%      65.857us       5.488us            12  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      13.942ms       218.38%      13.942ms      13.942ms             1  
+                                        gpt_oss_experts        15.57%       2.499ms        99.97%      16.048ms      16.048ms       0.000us         0.00%       6.387ms       6.387ms             1  
+                                           aten::matmul         0.25%      39.461us         4.79%     769.170us      32.049us       0.000us         0.00%       5.570ms     232.102us            24  
+                                               aten::mm         2.77%     444.894us         4.55%     729.709us      30.405us       5.570ms        87.25%       5.570ms     232.102us            24  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       5.515ms        86.38%       5.515ms     229.794us            24  
+                                          aten::nonzero         2.34%     374.919us         7.60%       1.220ms      81.308us     114.786us         1.80%     137.349us       9.157us            15  
+                                              aten::mul         1.86%     298.668us         3.09%     496.508us      10.344us     131.614us         2.06%     131.614us       2.742us            48  
+                                              aten::add         2.06%     330.439us         3.47%     556.980us      15.472us     127.904us         2.00%     127.904us       3.553us            36  
+                                            aten::where         0.07%      11.120us         7.17%       1.151ms      95.939us       0.000us         0.00%     123.109us      10.259us            12  
+                                    aten::nonzero_numpy         0.13%      20.771us         7.10%       1.140ms      95.012us       0.000us         0.00%     123.109us      10.259us            12  
+                                            aten::index         2.15%     344.365us         3.72%     597.667us      24.903us     111.391us         1.74%     111.391us       4.641us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     101.985us         1.60%     101.985us       4.249us            24  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us      91.395us         1.43%      91.395us       1.051us            87  
+                                            aten::clamp         1.30%     208.833us         2.21%     355.215us      14.801us      88.257us         1.38%      88.257us       3.677us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      88.257us         1.38%      88.257us       3.677us            24  
+                                             aten::item         0.49%      78.042us        39.66%       6.367ms      88.433us       0.000us         0.00%      75.297us       1.046us            72  
+                              aten::_local_scalar_dense         1.92%     308.797us        39.18%       6.289ms      87.349us      75.297us         1.18%      75.297us       1.046us            72  
+                                       aten::index_add_         0.59%      94.029us         0.99%     158.640us      13.220us      71.454us         1.12%      71.454us       5.954us            12  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us      71.454us         1.12%      71.454us       5.954us            12  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us      66.271us         1.04%      66.271us       5.523us            12  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 15.717ms
-Self CUDA time total: 6.074ms
+Self CPU time total: 16.053ms
+Self CUDA time total: 6.384ms
 
 
 
@@ -4324,29 +4106,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B1_S1024_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      12.540ms       148.48%      12.540ms      12.540ms             1  
-                                        gpt_oss_experts        11.83%       1.734ms        99.96%      14.654ms      14.654ms       0.000us         0.00%       8.451ms       8.451ms             1  
-                                           aten::matmul         0.16%      23.602us         3.00%     439.592us      36.633us       0.000us         0.00%       7.417ms     618.087us            12  
-                                               aten::mm         1.78%     261.037us         2.84%     415.990us      34.666us       7.417ms        87.82%       7.417ms     618.087us            12  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       4.532ms        53.65%       4.532ms     755.263us             6  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       1.475ms        17.46%       1.475ms     491.509us             3  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       1.405ms        16.64%       1.405ms     468.490us             3  
-                                              aten::mul         1.05%     153.262us         1.78%     261.173us      10.882us     197.791us         2.34%     197.791us       8.241us            24  
-                                              aten::add         1.26%     184.574us         2.07%     304.007us      16.889us     188.543us         2.23%     188.543us      10.475us            18  
-                                       aten::index_add_         0.35%      50.951us         0.57%      83.553us      13.925us     169.408us         2.01%     169.408us      28.235us             6  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     169.408us         2.01%     169.408us      28.235us             6  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     149.663us         1.77%     149.663us      12.472us            12  
-                                            aten::index         1.27%     186.102us         2.16%     316.927us      26.411us     146.942us         1.74%     146.942us      12.245us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     117.440us         1.39%     117.440us      19.573us             6  
-                                            aten::clamp         0.71%     104.743us         1.22%     178.924us      14.910us     110.912us         1.31%     110.912us       9.243us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     110.912us         1.31%     110.912us       9.243us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     104.864us         1.24%     104.864us       8.739us            12  
-                                          aten::nonzero         1.58%     232.211us         4.94%     724.348us      80.483us      69.633us         0.82%      81.377us       9.042us             9  
-                                            aten::where         0.04%       6.259us         4.08%     597.684us      99.614us       0.000us         0.00%      66.816us      11.136us             6  
-                                    aten::nonzero_numpy         0.08%      11.999us         4.03%     591.425us      98.571us       0.000us         0.00%      66.816us      11.136us             6  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      12.597ms       146.28%      12.597ms      12.597ms             1  
+                                        gpt_oss_experts        11.26%       1.671ms        99.96%      14.835ms      14.835ms       0.000us         0.00%       8.616ms       8.616ms             1  
+                                           aten::matmul         0.13%      19.980us         2.85%     423.596us      35.300us       0.000us         0.00%       7.614ms     634.486us            12  
+                                               aten::mm         1.70%     251.563us         2.72%     403.616us      33.635us       7.614ms        88.42%       7.614ms     634.486us            12  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       4.628ms        53.74%       4.628ms     771.312us             6  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       1.524ms        17.70%       1.524ms     508.107us             3  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       1.455ms        16.90%       1.455ms     485.046us             3  
+                                              aten::mul         1.00%     148.488us         1.71%     253.960us      10.582us     188.737us         2.19%     188.737us       7.864us            24  
+                                              aten::add         1.14%     169.821us         1.97%     292.395us      16.244us     180.606us         2.10%     180.606us      10.034us            18  
+                                       aten::index_add_         0.32%      47.691us         0.57%      84.001us      14.000us     164.000us         1.90%     164.000us      27.333us             6  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     164.000us         1.90%     164.000us      27.333us             6  
+                                            aten::index         1.23%     181.951us         2.12%     314.145us      26.179us     144.608us         1.68%     144.608us      12.051us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     142.815us         1.66%     142.815us      11.901us            12  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     114.816us         1.33%     114.816us      19.136us             6  
+                                            aten::clamp         0.72%     107.083us         1.24%     184.134us      15.345us     106.818us         1.24%     106.818us       8.902us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     106.818us         1.24%     106.818us       8.902us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     100.513us         1.17%     100.513us       8.376us            12  
+                                          aten::nonzero         1.51%     224.830us         4.84%     718.263us      79.807us      68.894us         0.80%      80.029us       8.892us             9  
+                                            aten::where         0.04%       5.681us         3.95%     586.411us      97.735us       0.000us         0.00%      65.405us      10.901us             6  
+                                    aten::nonzero_numpy         0.07%      10.160us         3.91%     580.730us      96.788us       0.000us         0.00%      65.405us      10.901us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 14.659ms
-Self CUDA time total: 8.446ms
+Self CPU time total: 14.841ms
+Self CUDA time total: 8.611ms
 
 
 
@@ -4356,29 +4138,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B1_S1024_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      18.317ms       174.31%      18.317ms      18.317ms             1  
-                                        gpt_oss_experts        13.54%       2.761ms        99.97%      20.385ms      20.385ms       0.000us         0.00%      10.514ms      10.514ms             1  
-                                           aten::matmul         0.23%      47.082us         4.02%     819.853us      34.161us       0.000us         0.00%       9.237ms     384.865us            24  
-                                               aten::mm         2.37%     482.255us         3.79%     772.771us      32.199us       9.237ms        87.90%       9.237ms     384.865us            24  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       6.282ms        59.78%       6.282ms     349.001us            18  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       2.944ms        28.01%       2.944ms     490.655us             6  
-                                              aten::mul         1.50%     305.331us         2.55%     520.818us      10.850us     235.298us         2.24%     235.298us       4.902us            48  
-                                              aten::add         1.72%     351.113us         2.86%     584.036us      16.223us     213.502us         2.03%     213.502us       5.931us            36  
-                                            aten::index         1.95%     397.314us         3.28%     668.454us      27.852us     205.349us         1.95%     205.349us       8.556us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     166.720us         1.59%     166.720us       6.947us            24  
-                                       aten::index_add_         0.50%     101.340us         0.81%     165.573us      13.798us     155.585us         1.48%     155.585us      12.965us            12  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     155.585us         1.48%     155.585us      12.965us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     146.947us         1.40%     146.947us      12.246us            12  
-                                          aten::nonzero         1.95%     398.176us         6.26%       1.276ms      85.090us     121.380us         1.16%     145.668us       9.711us            15  
-                                            aten::clamp         1.04%     212.193us         1.79%     365.180us      15.216us     134.239us         1.28%     134.239us       5.593us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     134.239us         1.28%     134.239us       5.593us            24  
-                                            aten::where         0.06%      11.340us         5.97%       1.216ms     101.373us       0.000us         0.00%     131.522us      10.960us            12  
-                                    aten::nonzero_numpy         0.12%      24.140us         5.91%       1.205ms     100.428us       0.000us         0.00%     131.522us      10.960us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     119.840us         1.14%     119.840us       4.993us            24  
-                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     100.830us         0.96%     100.830us       1.159us            87  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      18.460ms       171.74%      18.460ms      18.460ms             1  
+                                        gpt_oss_experts        12.58%       2.618ms        99.97%      20.806ms      20.806ms       0.000us         0.00%      10.754ms      10.754ms             1  
+                                           aten::matmul         0.19%      39.724us         3.85%     801.313us      33.388us       0.000us         0.00%       9.496ms     395.681us            24  
+                                               aten::mm         2.21%     460.813us         3.66%     761.589us      31.733us       9.496ms        88.35%       9.496ms     395.681us            24  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       6.491ms        60.39%       6.491ms     360.603us            18  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       2.993ms        27.84%       2.993ms     498.774us             6  
+                                              aten::mul         2.25%     467.369us         3.28%     683.452us      14.239us     226.014us         2.10%     226.014us       4.709us            48  
+                                              aten::add         1.60%     332.210us         2.74%     569.351us      15.815us     207.013us         1.93%     207.013us       5.750us            36  
+                                            aten::index         1.72%     357.427us         2.99%     622.664us      25.944us     203.329us         1.89%     203.329us       8.472us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     162.243us         1.51%     162.243us       6.760us            24  
+                                       aten::index_add_         0.45%      94.395us         0.78%     161.485us      13.457us     155.167us         1.44%     155.167us      12.931us            12  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     155.167us         1.44%     155.167us      12.931us            12  
+                                          aten::nonzero         1.86%     386.184us         6.07%       1.263ms      84.202us     120.989us         1.13%     144.894us       9.660us            15  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     144.769us         1.35%     144.769us      12.064us            12  
+                                            aten::where         0.05%      10.779us         5.71%       1.188ms      99.031us       0.000us         0.00%     130.270us      10.856us            12  
+                                    aten::nonzero_numpy         0.10%      20.452us         5.66%       1.178ms      98.133us       0.000us         0.00%     130.270us      10.856us            12  
+                                            aten::clamp         1.04%     217.185us         1.79%     373.407us      15.559us     129.252us         1.20%     129.252us       5.386us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     129.252us         1.20%     129.252us       5.386us            24  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     115.584us         1.08%     115.584us       4.816us            24  
+                         Memcpy DtoH (Device -> Pinned)         0.00%       0.000us         0.00%       0.000us       0.000us     107.234us         1.00%     107.234us       1.233us            87  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 20.390ms
-Self CUDA time total: 10.509ms
+Self CPU time total: 20.812ms
+Self CUDA time total: 10.749ms
 
 
 
@@ -4388,29 +4170,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B4_S512_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      21.031ms       119.92%      21.031ms      21.031ms             1  
-                                        gpt_oss_experts         7.59%       1.747ms        99.98%      23.024ms      23.024ms       0.000us         0.00%      17.548ms      17.548ms             1  
-                                           aten::matmul         0.10%      23.660us         1.94%     446.020us      37.168us       0.000us         0.00%      14.659ms       1.222ms            12  
-                                               aten::mm         1.17%     268.524us         1.83%     422.360us      35.197us      14.659ms        83.59%      14.659ms       1.222ms            12  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       8.967ms        51.13%       8.967ms       1.495ms             6  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       5.685ms        32.42%       5.685ms     947.562us             6  
-                                              aten::add         0.82%     187.722us         1.36%     312.616us      17.368us     785.408us         4.48%     785.408us      43.634us            18  
-                                              aten::mul         0.68%     156.369us         1.15%     264.222us      11.009us     674.688us         3.85%     674.688us      28.112us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     504.575us         2.88%     504.575us      42.048us            12  
-                                       aten::index_add_         0.22%      50.951us         0.37%      86.132us      14.355us     448.545us         2.56%     448.545us      74.757us             6  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     448.545us         2.56%     448.545us      74.757us             6  
-                                            aten::clamp         0.46%     107.053us         0.80%     183.295us      15.275us     336.000us         1.92%     336.000us      28.000us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     336.000us         1.92%     336.000us      28.000us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     314.239us         1.79%     314.239us      52.373us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     280.833us         1.60%     280.833us      46.806us             6  
-                                            aten::index         0.81%     185.806us         1.39%     320.548us      26.712us     259.102us         1.48%     259.102us      21.592us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     258.944us         1.48%     258.944us      21.579us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     225.407us         1.29%     225.407us      37.568us             6  
-                                          aten::sigmoid         0.16%      36.131us         0.27%      61.901us      10.317us     175.073us         1.00%     175.073us      29.179us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     175.073us         1.00%     175.073us      29.179us             6  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      21.083ms       119.21%      21.083ms      21.083ms             1  
+                                        gpt_oss_experts         7.12%       1.665ms        99.98%      23.365ms      23.365ms       0.000us         0.00%      17.695ms      17.695ms             1  
+                                           aten::matmul         0.09%      20.129us         1.89%     441.429us      36.786us       0.000us         0.00%      14.828ms       1.236ms            12  
+                                               aten::mm         1.11%     260.517us         1.80%     421.300us      35.108us      14.828ms        83.84%      14.828ms       1.236ms            12  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       9.047ms        51.15%       9.047ms       1.508ms             6  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       5.773ms        32.64%       5.773ms     962.167us             6  
+                                              aten::add         0.74%     174.025us         1.27%     296.156us      16.453us     776.579us         4.39%     776.579us      43.143us            18  
+                                              aten::mul         0.64%     149.555us         1.10%     257.226us      10.718us     654.338us         3.70%     654.338us      27.264us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     499.874us         2.83%     499.874us      41.656us            12  
+                                       aten::index_add_         0.21%      48.400us         0.36%      84.241us      14.040us     449.985us         2.54%     449.985us      74.998us             6  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     449.985us         2.54%     449.985us      74.998us             6  
+                                            aten::clamp         0.46%     107.321us         0.79%     185.253us      15.438us     329.054us         1.86%     329.054us      27.421us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     329.054us         1.86%     329.054us      27.421us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     300.737us         1.70%     300.737us      50.123us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     276.705us         1.56%     276.705us      46.117us             6  
+                                            aten::index         0.76%     178.051us         1.32%     309.462us      25.788us     268.800us         1.52%     268.800us      22.400us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     253.889us         1.44%     253.889us      21.157us            12  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     236.095us         1.33%     236.095us      39.349us             6  
+                                          aten::sigmoid         0.16%      36.571us         0.27%      63.572us      10.595us     176.833us         1.00%     176.833us      29.472us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     176.833us         1.00%     176.833us      29.472us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 23.030ms
-Self CUDA time total: 17.537ms
+Self CPU time total: 23.371ms
+Self CUDA time total: 17.686ms
 
 
 
@@ -4420,29 +4202,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B4_S512_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      24.377ms       140.11%      24.377ms      24.377ms             1  
-                                        gpt_oss_experts        10.50%       2.651ms        99.98%      25.237ms      25.237ms       0.000us         0.00%      17.408ms      17.408ms             1  
-                                           aten::matmul         0.19%      47.519us         3.41%     860.801us      35.867us       0.000us         0.00%      15.185ms     632.705us            24  
-                                               aten::mm         2.06%     521.061us         3.22%     813.282us      33.887us      15.185ms        87.28%      15.185ms     632.705us            24  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       9.179ms        52.76%       9.179ms     764.922us            12  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       3.124ms        17.96%       3.124ms     520.682us             6  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       2.871ms        16.50%       2.871ms     478.432us             6  
-                                              aten::add         1.42%     359.495us         2.37%     598.003us      16.611us     427.713us         2.46%     427.713us      11.881us            36  
-                                              aten::mul         1.23%     309.946us         2.09%     527.073us      10.981us     420.510us         2.42%     420.510us       8.761us            48  
-                                       aten::index_add_         0.40%     101.283us         0.66%     166.886us      13.907us     383.489us         2.20%     383.489us      31.957us            12  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     383.489us         2.20%     383.489us      31.957us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     343.712us         1.98%     343.712us      14.321us            24  
-                                            aten::index         1.56%     393.991us         2.62%     662.158us      27.590us     337.086us         1.94%     337.086us      14.045us            24  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     272.926us         1.57%     272.926us      22.744us            12  
-                                            aten::clamp         0.84%     212.993us         1.44%     363.038us      15.127us     230.431us         1.32%     230.431us       9.601us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     230.431us         1.32%     230.431us       9.601us            24  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     223.071us         1.28%     223.071us       9.295us            24  
-                                          aten::nonzero         1.57%     395.401us         5.00%       1.262ms      84.127us     128.836us         0.74%     156.164us      10.411us            15  
-                                            aten::where         0.05%      12.011us         4.77%       1.205ms     100.378us       0.000us         0.00%     140.900us      11.742us            12  
-                                    aten::nonzero_numpy         0.10%      25.021us         4.72%       1.193ms      99.377us       0.000us         0.00%     140.900us      11.742us            12  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      24.709ms       139.35%      24.709ms      24.709ms             1  
+                                        gpt_oss_experts         9.76%       2.650ms        99.98%      27.156ms      27.156ms       0.000us         0.00%      17.741ms      17.741ms             1  
+                                           aten::matmul         0.15%      40.162us         3.17%     860.144us      35.839us       0.000us         0.00%      15.537ms     647.383us            24  
+                                               aten::mm         1.90%     517.331us         3.02%     819.982us      34.166us      15.537ms        87.63%      15.537ms     647.383us            24  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us       9.352ms        52.74%       9.352ms     779.317us            12  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       3.225ms        18.19%       3.225ms     537.452us             6  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_128x64_8...         0.00%       0.000us         0.00%       0.000us       0.000us       2.947ms        16.62%       2.947ms     491.169us             6  
+                                              aten::add         1.29%     349.077us         2.22%     601.999us      16.722us     419.552us         2.37%     419.552us      11.654us            36  
+                                              aten::mul         1.15%     311.953us         1.98%     539.014us      11.229us     410.371us         2.31%     410.371us       8.549us            48  
+                                       aten::index_add_         0.36%      97.270us         0.61%     164.412us      13.701us     379.682us         2.14%     379.682us      31.640us            12  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     379.682us         2.14%     379.682us      31.640us            12  
+                                            aten::index         1.31%     354.897us         2.36%     641.129us      26.714us     344.639us         1.94%     344.639us      14.360us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     337.056us         1.90%     337.056us      14.044us            24  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     280.607us         1.58%     280.607us      23.384us            12  
+                                            aten::clamp         0.78%     212.661us         1.36%     368.626us      15.359us     225.662us         1.27%     225.662us       9.403us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     225.662us         1.27%     225.662us       9.403us            24  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     218.112us         1.23%     218.112us       9.088us            24  
+                                          aten::nonzero         1.41%     383.824us         4.68%       1.271ms      84.702us     127.715us         0.72%     153.604us      10.240us            15  
+                                            aten::where         0.04%      11.073us         4.43%       1.203ms     100.252us       0.000us         0.00%     138.052us      11.504us            12  
+                                    aten::nonzero_numpy         0.07%      20.230us         4.39%       1.192ms      99.329us       0.000us         0.00%     138.052us      11.504us            12  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 25.242ms
-Self CUDA time total: 17.398ms
+Self CPU time total: 27.162ms
+Self CUDA time total: 17.731ms
 
 
 
@@ -4452,29 +4234,29 @@ PROFILE TRACE: gpt_oss_experts | cuda_B4_S1024_E2
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      40.556ms       109.47%      40.556ms      40.556ms             1  
-                                        gpt_oss_experts         4.33%       1.794ms        99.85%      41.353ms      41.353ms       0.000us         0.00%      37.080ms      37.080ms             1  
-                                           aten::matmul         0.06%      24.371us         1.08%     445.903us      37.159us       0.000us         0.00%      27.082ms       2.257ms            12  
-                                               aten::mm         0.70%     291.738us         1.02%     421.532us      35.128us      27.082ms        73.10%      27.082ms       2.257ms            12  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us      27.079ms        73.09%      27.079ms       2.257ms            12  
-                                              aten::mul         0.38%     159.199us         0.65%     268.178us      11.174us       2.983ms         8.05%       2.983ms     124.287us            24  
-                                              aten::add         0.48%     198.424us         1.09%     451.763us      25.098us       2.404ms         6.49%       2.404ms     133.559us            18  
-                                            aten::clamp         0.27%     112.290us         0.46%     189.433us      15.786us       2.392ms         6.46%       2.392ms     199.373us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       2.392ms         6.46%       2.392ms     199.373us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       1.988ms         5.37%       1.988ms     165.669us            12  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       1.629ms         4.40%       1.629ms     135.763us            12  
-                                       aten::index_add_         0.12%      50.103us         0.20%      84.453us      14.076us     899.456us         2.43%     899.456us     149.909us             6  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     899.456us         2.43%     899.456us     149.909us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     774.912us         2.09%     774.912us     129.152us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     733.217us         1.98%     733.217us     122.203us             6  
-                                            aten::index         0.45%     187.302us         0.77%     318.787us      26.566us     712.767us         1.92%     712.767us      59.397us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     678.496us         1.83%     678.496us     113.083us             6  
-                                          aten::sigmoid         0.09%      36.082us         0.15%      63.023us      10.504us     323.008us         0.87%     323.008us      53.835us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     323.008us         0.87%     323.008us      53.835us             6  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     261.631us         0.71%     261.631us      43.605us             6  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      40.750ms       109.03%      40.750ms      40.750ms             1  
+                                        gpt_oss_experts         4.08%       1.695ms        99.82%      41.512ms      41.512ms       0.000us         0.00%      37.407ms      37.407ms             1  
+                                           aten::matmul         0.05%      20.951us         1.02%     424.118us      35.343us       0.000us         0.00%      27.409ms       2.284ms            12  
+                                               aten::mm         0.67%     277.566us         0.97%     403.167us      33.597us      27.409ms        73.34%      27.409ms       2.284ms            12  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us      27.406ms        73.33%      27.406ms       2.284ms            12  
+                                              aten::mul         0.37%     154.550us         0.63%     261.852us      10.911us       2.976ms         7.96%       2.976ms     124.014us            24  
+                                              aten::add         0.45%     185.160us         1.07%     445.895us      24.772us       2.401ms         6.42%       2.401ms     133.369us            18  
+                                            aten::clamp         0.28%     116.599us         0.48%     198.482us      16.540us       2.391ms         6.40%       2.391ms     199.291us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       2.391ms         6.40%       2.391ms     199.291us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       1.983ms         5.30%       1.983ms     165.222us            12  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us       1.625ms         4.35%       1.625ms     135.419us            12  
+                                       aten::index_add_         0.12%      48.080us         0.21%      86.751us      14.459us     910.402us         2.44%     910.402us     151.734us             6  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     910.402us         2.44%     910.402us     151.734us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     775.618us         2.08%     775.618us     129.270us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     740.611us         1.98%     740.611us     123.435us             6  
+                                            aten::index         0.44%     181.234us         0.76%     317.848us      26.487us     714.884us         1.91%     714.884us      59.574us            12  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     681.379us         1.82%     681.379us     113.563us             6  
+                                          aten::sigmoid         0.09%      38.611us         0.16%      65.922us      10.987us     320.927us         0.86%     320.927us      53.488us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     320.927us         0.86%     320.927us      53.488us             6  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     253.057us         0.68%     253.057us      42.176us             6  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 41.415ms
-Self CUDA time total: 37.046ms
+Self CPU time total: 41.585ms
+Self CUDA time total: 37.374ms
 
 
 
@@ -4484,55 +4266,54 @@ PROFILE TRACE: gpt_oss_experts | cuda_B4_S1024_E4
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      41.050ms       117.27%      41.050ms      41.050ms             1  
-                                        gpt_oss_experts         6.46%       2.709ms        99.99%      41.912ms      41.912ms       0.000us         0.00%      35.025ms      35.025ms             1  
-                                           aten::matmul         0.11%      47.590us         2.12%     888.873us      37.036us       0.000us         0.00%      29.051ms       1.210ms            24  
-                                               aten::mm         1.28%     536.727us         2.01%     841.283us      35.053us      29.051ms        82.99%      29.051ms       1.210ms            24  
-void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us      20.585ms        58.81%      20.585ms       1.372ms            15  
-                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       8.453ms        24.15%       8.453ms     939.204us             9  
-                                              aten::add         0.88%     367.610us         1.45%     609.056us      16.918us       1.486ms         4.24%       1.486ms      41.264us            36  
-                                              aten::mul         0.74%     309.128us         1.24%     518.283us      10.798us       1.380ms         3.94%       1.380ms      28.757us            48  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     925.695us         2.64%     925.695us      38.571us            24  
-                                       aten::index_add_         0.24%      99.111us         0.40%     167.273us      13.939us     903.487us         2.58%     903.487us      75.291us            12  
-void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     903.487us         2.58%     903.487us      75.291us            12  
-                                            aten::clamp         0.51%     214.986us         0.87%     364.790us      15.200us     775.806us         2.22%     775.806us      32.325us            24  
-void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     775.806us         2.22%     775.806us      32.325us            24  
-                                            aten::index         0.89%     373.269us         1.50%     629.207us      26.217us     670.881us         1.92%     670.881us      27.953us            24  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     631.200us         1.80%     631.200us      52.600us            12  
-void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     600.224us         1.71%     600.224us      50.019us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     559.808us         1.60%     559.808us      46.651us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     540.611us         1.54%     540.611us      22.525us            24  
-                                          aten::sigmoid         0.17%      72.182us         0.29%     123.582us      10.298us     351.039us         1.00%     351.039us      29.253us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     351.039us         1.00%     351.039us      29.253us            12  
+                                        gpt_oss_experts         0.00%       0.000us         0.00%       0.000us       0.000us      41.218ms       116.52%      41.218ms      41.218ms             1  
+                                        gpt_oss_experts         6.00%       2.524ms        99.99%      42.088ms      42.088ms       0.000us         0.00%      35.395ms      35.395ms             1  
+                                           aten::matmul         0.10%      40.160us         2.08%     875.043us      36.460us       0.000us         0.00%      29.436ms       1.226ms            24  
+                                               aten::mm         1.24%     520.099us         1.98%     834.883us      34.787us      29.436ms        83.21%      29.436ms       1.226ms            24  
+void cutlass::Kernel2<cutlass_80_simt_sgemm_256x128_...         0.00%       0.000us         0.00%       0.000us       0.000us      20.785ms        58.75%      20.785ms       1.386ms            15  
+                                 ampere_sgemm_128x64_nn         0.00%       0.000us         0.00%       0.000us       0.000us       8.635ms        24.41%       8.635ms     959.410us             9  
+                                              aten::add         0.83%     349.812us         1.43%     602.505us      16.736us       1.482ms         4.19%       1.482ms      41.161us            36  
+                                              aten::mul         0.72%     302.661us         1.25%     525.878us      10.956us       1.369ms         3.87%       1.369ms      28.527us            48  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     928.163us         2.62%     928.163us      38.673us            24  
+                                       aten::index_add_         0.23%      95.791us         0.40%     170.382us      14.198us     908.198us         2.57%     908.198us      75.683us            12  
+void at::native::indexFuncLargeIndex<float, long, un...         0.00%       0.000us         0.00%       0.000us       0.000us     908.198us         2.57%     908.198us      75.683us            12  
+                                            aten::clamp         0.52%     220.263us         0.90%     378.355us      15.765us     771.551us         2.18%     771.551us      32.148us            24  
+void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     771.551us         2.18%     771.551us      32.148us            24  
+                                            aten::index         0.83%     351.191us         1.46%     613.487us      25.562us     665.121us         1.88%     665.121us      27.713us            24  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     648.065us         1.83%     648.065us      54.005us            12  
+void at::native::vectorized_gather_kernel<16, long>(...         0.00%       0.000us         0.00%       0.000us       0.000us     594.560us         1.68%     594.560us      49.547us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     553.635us         1.57%     553.635us      46.136us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     519.010us         1.47%     519.010us      21.625us            24  
+                                          aten::sigmoid         0.17%      72.451us         0.30%     125.701us      10.475us     356.257us         1.01%     356.257us      29.688us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     356.257us         1.01%     356.257us      29.688us            12  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 41.917ms
-Self CUDA time total: 35.005ms
+Self CPU time total: 42.094ms
+Self CUDA time total: 35.375ms
 
 
 impl                     wl                  p50(ms)  ok
-gpt_oss_experts          cuda_B1_S1024_E2       3.79  True
-gpt_oss_experts          cuda_B1_S1024_E4       5.24  True
-gpt_oss_experts          cuda_B1_S512_E2        2.63  True
-gpt_oss_experts          cuda_B1_S512_E4        3.89  True
-gpt_oss_experts          cuda_B4_S1024_E2      13.28  True
-gpt_oss_experts          cuda_B4_S1024_E4      13.19  True
-gpt_oss_experts          cuda_B4_S512_E2        6.74  True
-gpt_oss_experts          cuda_B4_S512_E4        7.36  True
+gpt_oss_experts          cuda_B1_S1024_E2       3.84  True
+gpt_oss_experts          cuda_B1_S1024_E4       5.30  True
+gpt_oss_experts          cuda_B1_S512_E2        2.68  True
+gpt_oss_experts          cuda_B1_S512_E4        3.91  True
+gpt_oss_experts          cuda_B4_S1024_E2      13.35  True
+gpt_oss_experts          cuda_B4_S1024_E4      13.35  True
+gpt_oss_experts          cuda_B4_S512_E2        6.80  True
+gpt_oss_experts          cuda_B4_S512_E4        7.46  True
 
▶ UV Install Logs
Fetching 6 files: 0%| | 0/6 [00:00<?, ?it/s] -Fetching 6 files: 33%|███▎ | 2/6 [00:00<00:00, 16.13it/s] -Fetching 6 files: 67%|██████▋ | 4/6 [00:00<00:00, 7.33it/s] -Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 11.97it/s]
+Fetching 6 files: 50%|█████ | 3/6 [00:00<00:00, 3.54it/s] +Fetching 6 files: 100%|██████████| 6/6 [00:00<00:00, 7.08it/s]

Artifacts:

openai_moe.jsonl diff --git a/openai_moe/results/artifacts/combine/latency.svg b/openai_moe/results/artifacts/combine/latency.svg index 10dbc66aeb1ffe85716a2da3bc2a8a2ad4700bc3..349be26a9334038da38b3dc1b49d2b0171bb138c 100644 --- a/openai_moe/results/artifacts/combine/latency.svg +++ b/openai_moe/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b6b68c91c95cfb46a71083a3812949c831a6e82a5f655eb32ed7c0b19426124d -size 21857 +oid sha256:b2bc6dda123451533c1a79e388cff2690a74060cba2a49b113394b35cfec34c2 +size 21861 diff --git a/openai_moe/results/combined_results.html b/openai_moe/results/combined_results.html index bedcc83cf8db7d27f0e74cfbb9d1c9ceb5663901..d2d91debd63a24a32b83ca0ce612fbab5b46b84b 100644 --- a/openai_moe/results/combined_results.html +++ b/openai_moe/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:14.575906 + 2025-11-10T22:12:05.730920 image/svg+xml @@ -4243,122 +4025,122 @@ body[data-tool="eraser"] .main-content { - + - + - 0 + 0 - + - + - 200 + 200 - + - + - 400 + 400 - + - + - 600 + 600 - + - + - 800 + 800 - + - + - 1000 + 1000 - + - + - 1200 + 1200 - + - + - 1400 + 1400 - + - + - 1600 + 1600 @@ -4366,35 +4148,35 @@ body[data-tool="eraser"] .main-content { - + - - - - - - - + + + + + + + - + - - - - - - - + + + + + + + @@ -4452,7 +4234,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.26s +Cell: combine | 4.50s | Raw @@ -4541,22 +4323,22 @@ Summary: 2 found, 0 skipped, 0 missing COMBINED BENCHMARK SUMMARY impl wl p50(ms) ok -binned_torch cuda_B1_S1024_E2 372.79 True -binned_torch cuda_B1_S1024_E4 382.68 True -binned_torch cuda_B1_S512_E2 150.05 True -binned_torch cuda_B1_S512_E4 200.26 True -binned_torch cuda_B4_S1024_E2 1486.48 True -binned_torch cuda_B4_S1024_E4 1524.50 True -binned_torch cuda_B4_S512_E2 742.02 True -binned_torch cuda_B4_S512_E4 801.90 True -gpt_oss_experts cuda_B1_S1024_E2 3.79 True -gpt_oss_experts cuda_B1_S1024_E4 5.24 True -gpt_oss_experts cuda_B1_S512_E2 2.63 True -gpt_oss_experts cuda_B1_S512_E4 3.89 True -gpt_oss_experts cuda_B4_S1024_E2 13.28 True -gpt_oss_experts cuda_B4_S1024_E4 13.19 True -gpt_oss_experts cuda_B4_S512_E2 6.74 True -gpt_oss_experts cuda_B4_S512_E4 7.36 True +binned_torch cuda_B1_S1024_E2 367.62 True +binned_torch cuda_B1_S1024_E4 394.19 True +binned_torch cuda_B1_S512_E2 154.67 True +binned_torch cuda_B1_S512_E4 201.50 True +binned_torch cuda_B4_S1024_E2 1483.54 True +binned_torch cuda_B4_S1024_E4 1601.90 True +binned_torch cuda_B4_S512_E2 736.26 True +binned_torch cuda_B4_S512_E4 798.88 True +gpt_oss_experts cuda_B1_S1024_E2 3.84 True +gpt_oss_experts cuda_B1_S1024_E4 5.30 True +gpt_oss_experts cuda_B1_S512_E2 2.68 True +gpt_oss_experts cuda_B1_S512_E4 3.91 True +gpt_oss_experts cuda_B4_S1024_E2 13.35 True +gpt_oss_experts cuda_B4_S1024_E4 13.35 True +gpt_oss_experts cuda_B4_S512_E2 6.80 True +gpt_oss_experts cuda_B4_S512_E4 7.46 True GENERATING COMBINED VISUALIZATION @@ -4576,7 +4358,7 @@ Implementations included:
▶ UV Install Logs
@@ -4589,7 +4371,7 @@ Installed 37 packages in 196ms - 2025-10-31T20:14:14.575906 + 2025-11-10T22:12:05.730920 image/svg+xml @@ -4725,122 +4507,122 @@ Installed 37 packages in 196ms - + - + - 0 + 0 - + - + - 200 + 200 - + - + - 400 + 400 - + - + - 600 + 600 - + - + - 800 + 800 - + - + - 1000 + 1000 - + - + - 1200 + 1200 - + - + - 1400 + 1400 - + - + - 1600 + 1600 @@ -4848,35 +4630,35 @@ Installed 37 packages in 196ms - + - - - - - - - + + + + + + + - + - - - - - - - + + + + + + + diff --git a/rotary/impls/artifacts/benchmark/rotary.jsonl b/rotary/impls/artifacts/benchmark/rotary.jsonl index 2f046365b897b6b0052a6d0c4d2d39bda02f57ee..b3823789e8f57ae4dfc9f22b0db882237d8d8f85 100644 --- a/rotary/impls/artifacts/benchmark/rotary.jsonl +++ b/rotary/impls/artifacts/benchmark/rotary.jsonl @@ -1,24 +1,24 @@ -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H8_D64_R32", "batch": 1, "seqlen": 128, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0773009999761598, "p50": 0.07878200000277502, "p90": 0.07927199999357981, "mean": 0.08125379999910365, "iqr": 0.0008899999670575198, "raw_times": [0.07927199999357981, 0.07878200000277502, 0.09253199999648132, 0.07838200002652229, 0.0773009999761598], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08711200001698671, "peak_bytes": 3178496, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.590452924915553e-08, "mae_k": 1.5487040982975486e-08, "mse_q": 2.5241010080938753e-15, "mse_k": 2.364223539299626e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H8_D128_R64", "batch": 1, "seqlen": 128, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09208300002683245, "p50": 0.09279300002162927, "p90": 0.09387199997945572, "mean": 0.09325840001110919, "iqr": 0.0014699999724143709, "raw_times": [0.09208300002683245, 0.09240200000704135, 0.09387199997945572, 0.09514200002058715, 0.09279300002162927], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0956929999915701, "peak_bytes": 6356992, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5508486939097565e-08, "mae_k": 1.567566698668088e-08, "mse_q": 2.3630110116356316e-15, "mse_k": 2.416562128626943e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H32_D64_R32", "batch": 1, "seqlen": 128, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09122299996988659, "p50": 0.09174199999506527, "p90": 0.09311200000183817, "mean": 0.09907239998483419, "iqr": 0.0014700000292577897, "raw_times": [0.12764299998480055, 0.09311200000183817, 0.09174199999506527, 0.09164199997258038, 0.09122299996988659], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09512200000472149, "peak_bytes": 12615680, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5856898016863852e-08, "mae_k": 1.572981211950264e-08, "mse_q": 2.4771055025978386e-15, "mse_k": 2.4544071371937915e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H32_D128_R64", "batch": 1, "seqlen": 128, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09014300002263553, "p50": 0.09057199997641874, "p90": 0.09099299995796173, "mean": 0.09084659998279676, "iqr": 0.0004309999894758221, "raw_times": [0.09014300002263553, 0.0905619999684859, 0.09099299995796173, 0.09057199997641874, 0.0919629999884819], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09270300000707721, "peak_bytes": 25231360, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5617658277733426e-08, "mae_k": 1.5788685914230882e-08, "mse_q": 2.4549424620164562e-15, "mse_k": 2.492823469483563e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H8_D64_R32", "batch": 1, "seqlen": 512, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09128200002805897, "p50": 0.09358200003362072, "p90": 0.09361200000057579, "mean": 0.0932360000092558, "iqr": 5.9999990753567545e-05, "raw_times": [0.09358200003362072, 0.09415199997420132, 0.09361200000057579, 0.09128200002805897, 0.09355200000982222], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09915200001842095, "peak_bytes": 12779520, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5962712041073246e-08, "mae_k": 1.5743363945830424e-08, "mse_q": 2.534145124782417e-15, "mse_k": 2.451281585618423e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H8_D128_R64", "batch": 1, "seqlen": 512, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09190200000830373, "p50": 0.09338199998865093, "p90": 0.09447299999010283, "mean": 0.09361019999687414, "iqr": 0.0011509999922054703, "raw_times": [0.09190200000830373, 0.09338199998865093, 0.09497199999941586, 0.09332199999789736, 0.09447299999010283], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09578299994927875, "peak_bytes": 25427968, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.578730035589615e-08, "mae_k": 1.5859711766097462e-08, "mse_q": 2.440287521479536e-15, "mse_k": 2.477901290051784e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H32_D64_R32", "batch": 1, "seqlen": 512, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09281299998065151, "p50": 0.09429199997157411, "p90": 0.09554199999683988, "mean": 0.0945923999893239, "iqr": 0.0018490000002202578, "raw_times": [0.09662200000093435, 0.09281299998065151, 0.09369299999661962, 0.09554199999683988, 0.09429199997157411], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09821199995485586, "peak_bytes": 50462720, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5775295736375483e-08, "mae_k": 1.5847881229547056e-08, "mse_q": 2.471039476146077e-15, "mse_k": 2.472378635235686e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H32_D128_R64", "batch": 1, "seqlen": 512, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09230199998455646, "p50": 0.09352199998602373, "p90": 0.09397200000194061, "mean": 0.09366439998075293, "iqr": 0.00047900005029077874, "raw_times": [0.09230199998455646, 0.09503299997959402, 0.09349299995164984, 0.09352199998602373, 0.09397200000194061], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09802200003150574, "peak_bytes": 100925440, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5959869870130206e-08, "mae_k": 1.588083975434529e-08, "mse_q": 2.510663677418633e-15, "mse_k": 2.502786271009168e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H8_D64_R32", "batch": 1, "seqlen": 2048, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0920319999977437, "p50": 0.0931920000084574, "p90": 0.09354200000188939, "mean": 0.09315399998968132, "iqr": 0.0011200000358257967, "raw_times": [0.0931920000084574, 0.0920319999977437, 0.0924219999660636, 0.09458199997425254, 0.09354200000188939], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09710300003007433, "peak_bytes": 51118080, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5894533689220225e-08, "mae_k": 1.5873395042831362e-08, "mse_q": 2.5093181655819197e-15, "mse_k": 2.488611809911578e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H8_D128_R64", "batch": 1, "seqlen": 2048, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09380299997019392, "p50": 0.0960819999704654, "p90": 0.10296200002812839, "mean": 0.0988743999982944, "iqr": 0.00756899999032612, "raw_times": [0.10613199998488199, 0.09539300003780227, 0.09380299997019392, 0.10296200002812839, 0.0960819999704654], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0969220000115456, "peak_bytes": 101711872, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5936768349433805e-08, "mae_k": 1.5960043953100467e-08, "mse_q": 2.51039008577667e-15, "mse_k": 2.5111253103748867e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H32_D64_R32", "batch": 1, "seqlen": 2048, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09422199997288772, "p50": 0.0958319999995183, "p90": 0.09810200003812497, "mean": 0.09699820000150794, "iqr": 0.0028600000518963498, "raw_times": [0.0958319999995183, 0.09524199998622862, 0.09422199997288772, 0.09810200003812497, 0.10159300001078009], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09825199998658718, "peak_bytes": 201850880, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 1.9073486328125e-06, "absmax_k": 9.5367431640625e-07, "mae_q": 1.586510300910504e-08, "mae_k": 1.5813935050346117e-08, "mse_q": 2.499836478770355e-15, "mse_k": 2.4755639026338358e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H32_D128_R64", "batch": 1, "seqlen": 2048, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.26106699999672855, "p50": 0.2625369999691429, "p90": 0.266995999993469, "mean": 0.2640226000039547, "iqr": 0.0046789999714746955, "raw_times": [0.2625369999691429, 0.2671960000384388, 0.2623170000219943, 0.266995999993469, 0.26106699999672855], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.26397600004202104, "peak_bytes": 403701760, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.581049247079136e-08, "mae_k": 1.5861061797295406e-08, "mse_q": 2.4735094242202705e-15, "mse_k": 2.486832828964107e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H8_D64_R32", "batch": 2, "seqlen": 128, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09289299998727074, "p50": 0.09412200000724624, "p90": 0.0941720000469104, "mean": 0.09422220001624737, "iqr": 0.0009999999974752427, "raw_times": [0.09412200000724624, 0.09317200004943516, 0.0941720000469104, 0.09289299998727074, 0.09675199999037432], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09680300001946307, "peak_bytes": 137396224, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5824980437173508e-08, "mae_k": 1.5615324144846454e-08, "mse_q": 2.488090249374306e-15, "mse_k": 2.425079044911585e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H8_D128_R64", "batch": 2, "seqlen": 128, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09332299998732196, "p50": 0.09457300001258773, "p90": 0.10429200000316996, "mean": 0.09875060001149905, "iqr": 0.009959999999864522, "raw_times": [0.10723300005111014, 0.09332299998732196, 0.09457300001258773, 0.10429200000316996, 0.09433200000330544], "has_warnings": true, "reps": 5, "warmup": 2}, "compile_ms": 0.0961519999691518, "peak_bytes": 12648448, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5683587761827766e-08, "mae_k": 1.574532682013796e-08, "mse_q": 2.4310271220254415e-15, "mse_k": 2.4601385856313877e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H32_D64_R32", "batch": 2, "seqlen": 128, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0932629999965684, "p50": 0.09406200001649268, "p90": 0.09426200000461904, "mean": 0.09393640000325831, "iqr": 0.0008400000410802022, "raw_times": [0.0932629999965684, 0.09406200001649268, 0.09342199996353884, 0.09467300003507262, 0.09426200000461904], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09624299997312846, "peak_bytes": 25198592, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5835009747888762e-08, "mae_k": 1.572560215379326e-08, "mse_q": 2.478222950813504e-15, "mse_k": 2.4541699679685603e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H32_D128_R64", "batch": 2, "seqlen": 128, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09137300003203563, "p50": 0.09416199998213415, "p90": 0.09422200002973113, "mean": 0.09578819999660482, "iqr": 0.00042000004896181053, "raw_times": [0.09422200002973113, 0.09137300003203563, 0.09416199998213415, 0.10538199995835384, 0.09380199998076932], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09458300002052056, "peak_bytes": 50397184, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5876850056883995e-08, "mae_k": 1.5927410501603845e-08, "mse_q": 2.504224532953606e-15, "mse_k": 2.503892919554756e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H8_D64_R32", "batch": 2, "seqlen": 512, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09282199999915974, "p50": 0.09416199998213415, "p90": 0.09431199998743978, "mean": 0.09398199999850476, "iqr": 0.00039999997625272954, "raw_times": [0.09416199998213415, 0.0947020000126031, 0.09431199998743978, 0.09282199999915974, 0.09391200001118705], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09807300000375108, "peak_bytes": 25362432, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5820052823301012e-08, "mae_k": 1.580205122309053e-08, "mse_q": 2.4876468276264184e-15, "mse_k": 2.4866062476507165e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H8_D128_R64", "batch": 2, "seqlen": 512, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09392299995170106, "p50": 0.09451299996499074, "p90": 0.09455299999672206, "mean": 0.09461079997663546, "iqr": 0.00017000002117129043, "raw_times": [0.09392299995170106, 0.09568199999421267, 0.09451299996499074, 0.09438299997555077, 0.09455299999672206], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09652299996787406, "peak_bytes": 50593792, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5823172105911e-08, "mae_k": 1.582038855474366e-08, "mse_q": 2.464257071579175e-15, "mse_k": 2.4775099608301526e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H32_D64_R32", "batch": 2, "seqlen": 512, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0935829999662019, "p50": 0.0949919999584381, "p90": 0.09520300000076531, "mean": 0.09494659997244526, "iqr": 0.0008610000463704637, "raw_times": [0.09434199995439485, 0.09661299998242612, 0.0935829999662019, 0.09520300000076531, 0.0949919999584381], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09693200001947844, "peak_bytes": 100794368, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5888783622131086e-08, "mae_k": 1.5861886026868888e-08, "mse_q": 2.4766798685418433e-15, "mse_k": 2.475923891636419e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H32_D128_R64", "batch": 2, "seqlen": 512, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09247200000572775, "p50": 0.09415199997420132, "p90": 0.09440299999141644, "mean": 0.09443839999221382, "iqr": 0.001340999972399004, "raw_times": [0.09440299999141644, 0.09415199997420132, 0.09810299997070615, 0.09306200001901743, 0.09247200000572775], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09771300000238625, "peak_bytes": 201588736, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5826390864503992e-08, "mae_k": 1.5792682717119533e-08, "mse_q": 2.480465258783123e-15, "mse_k": 2.475580631534544e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H8_D64_R32", "batch": 2, "seqlen": 2048, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09252199998854849, "p50": 0.093122000009771, "p90": 0.09490200000072946, "mean": 0.09375020000561562, "iqr": 0.0023690000148235413, "raw_times": [0.093122000009771, 0.09252199998854849, 0.09490200000072946, 0.09567200004312326, 0.09253299998590592], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09691200000361277, "peak_bytes": 101449728, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.592899323554775e-08, "mae_k": 1.5925031959795888e-08, "mse_q": 2.50783882253954e-15, "mse_k": 2.5015648494992274e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H8_D128_R64", "batch": 2, "seqlen": 2048, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09136299996725938, "p50": 0.09425199999668621, "p90": 0.0960129999612036, "mean": 0.0991567999903964, "iqr": 0.002309999956651154, "raw_times": [0.09370300000455245, 0.09136299996725938, 0.12045300002228032, 0.09425199999668621, 0.0960129999612036], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09500200002321435, "peak_bytes": 202375168, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.590209919299923e-08, "mae_k": 1.590130160877834e-08, "mse_q": 2.4971026799330918e-15, "mse_k": 2.506967649153289e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H32_D64_R32", "batch": 2, "seqlen": 2048, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.26061699998081167, "p50": 0.26556599999594255, "p90": 0.26563699998405355, "mean": 0.2649027999950704, "iqr": 0.001249999968422344, "raw_times": [0.26061699998081167, 0.26830699999891294, 0.2643870000156312, 0.26563699998405355, 0.26556599999594255], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.26123600002847525, "peak_bytes": 403177472, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5847520629108658e-08, "mae_k": 1.5862454461057496e-08, "mse_q": 2.4917348203881045e-15, "mse_k": 2.491306009958557e-15, "ref": "rotary_torch"}, "err": null} -{"ts": "2025-10-31T20:01:22Z", "run": "eaf2d47fcdc24840a68457c07da24ae9", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H32_D128_R64", "batch": 2, "seqlen": 2048, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.8460020000029544, "p50": 0.8488419999821417, "p90": 0.8517510000274342, "mean": 0.8514335999961986, "iqr": 0.004409000041505351, "raw_times": [0.8632309999825338, 0.8488419999821417, 0.8517510000274342, 0.8473419999859289, 0.8460020000029544], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8540019999827564, "peak_bytes": 806354944, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.585225106737198e-08, "mae_k": 1.581303976649906e-08, "mse_q": 2.4866460581992374e-15, "mse_k": 2.4721545950211372e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H8_D64_R32", "batch": 1, "seqlen": 128, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.07309099999019963, "p50": 0.07444199997053147, "p90": 0.07482099999833736, "mean": 0.07456319998482286, "iqr": 0.00039000002516331733, "raw_times": [0.07443099997317404, 0.0760309999918718, 0.07482099999833736, 0.07444199997053147, 0.07309099999019963], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08203099997672325, "peak_bytes": 3178496, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.590452924915553e-08, "mae_k": 1.5487040982975486e-08, "mse_q": 2.5241010080938753e-15, "mse_k": 2.364223539299626e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H8_D128_R64", "batch": 1, "seqlen": 128, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.09060200000021723, "p50": 0.09103200000026845, "p90": 0.09151199998314041, "mean": 0.09118959999341314, "iqr": 0.0008709999974598759, "raw_times": [0.09060200000021723, 0.09216099999775906, 0.09103200000026845, 0.09151199998314041, 0.09064099998568054], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09373199998208293, "peak_bytes": 6356992, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5508486939097565e-08, "mae_k": 1.567566698668088e-08, "mse_q": 2.3630110116356316e-15, "mse_k": 2.416562128626943e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H32_D64_R32", "batch": 1, "seqlen": 128, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08705100003680855, "p50": 0.0875320000091051, "p90": 0.08769100003291896, "mean": 0.08876720002035654, "iqr": 0.0002699999868127634, "raw_times": [0.0874210000461062, 0.09414099997684389, 0.08705100003680855, 0.08769100003291896, 0.0875320000091051], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09279099998593665, "peak_bytes": 12615680, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5856898016863852e-08, "mae_k": 1.572981211950264e-08, "mse_q": 2.4771055025978386e-15, "mse_k": 2.4544071371937915e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S128_H32_D128_R64", "batch": 1, "seqlen": 128, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08560100002341642, "p50": 0.08801100000255246, "p90": 0.08860200000526675, "mean": 0.08908540002039445, "iqr": 0.0012109999829590379, "raw_times": [0.08739100002230771, 0.08801100000255246, 0.08860200000526675, 0.09582200004842889, 0.08560100002341642], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09329199997409887, "peak_bytes": 25231360, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5617658277733426e-08, "mae_k": 1.5788685914230882e-08, "mse_q": 2.4549424620164562e-15, "mse_k": 2.492823469483563e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H8_D64_R32", "batch": 1, "seqlen": 512, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08852199999864752, "p50": 0.08866100000659571, "p90": 0.08963200002654048, "mean": 0.08911940001326002, "iqr": 0.0010109999948326731, "raw_times": [0.08963200002654048, 0.08862100003170781, 0.08852199999864752, 0.08866100000659571, 0.09016100000280858], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0921010000070055, "peak_bytes": 12779520, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5962712041073246e-08, "mae_k": 1.5743363945830424e-08, "mse_q": 2.534145124782417e-15, "mse_k": 2.451281585618423e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H8_D128_R64", "batch": 1, "seqlen": 512, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08753199995226169, "p50": 0.08826099997349957, "p90": 0.08928100004368389, "mean": 0.08895959999790648, "iqr": 0.001079000014669873, "raw_times": [0.08753199995226169, 0.08928100004368389, 0.08820200002901402, 0.09152199999107324, 0.08826099997349957], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0922809999792662, "peak_bytes": 25427968, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.578730035589615e-08, "mae_k": 1.5859711766097462e-08, "mse_q": 2.440287521479536e-15, "mse_k": 2.477901290051784e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H32_D64_R32", "batch": 1, "seqlen": 512, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08700099999714439, "p50": 0.08810100001710452, "p90": 0.08876099997223719, "mean": 0.08815519998961463, "iqr": 0.0012099999935344385, "raw_times": [0.08700099999714439, 0.0893619999828843, 0.08876099997223719, 0.08810100001710452, 0.08755099997870275], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09151099999371581, "peak_bytes": 50462720, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5775295736375483e-08, "mae_k": 1.5847881229547056e-08, "mse_q": 2.471039476146077e-15, "mse_k": 2.472378635235686e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S512_H32_D128_R64", "batch": 1, "seqlen": 512, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08636100000103397, "p50": 0.08706099998789796, "p90": 0.0880219999999099, "mean": 0.08728360000986868, "iqr": 0.0015599999869664316, "raw_times": [0.08646200001294346, 0.0885120000475581, 0.0880219999999099, 0.08706099998789796, 0.08636100000103397], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09124100000690305, "peak_bytes": 100925440, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5959869870130206e-08, "mae_k": 1.588083975434529e-08, "mse_q": 2.510663677418633e-15, "mse_k": 2.502786271009168e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H8_D64_R32", "batch": 1, "seqlen": 2048, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08730199999718025, "p50": 0.08869100003039421, "p90": 0.09006199996974829, "mean": 0.08888559999604695, "iqr": 0.0023309999619414157, "raw_times": [0.08773100000780687, 0.09064199997510514, 0.09006199996974829, 0.08730199999718025, 0.08869100003039421], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09268100001236235, "peak_bytes": 51118080, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5894533689220225e-08, "mae_k": 1.5873395042831362e-08, "mse_q": 2.5093181655819197e-15, "mse_k": 2.488611809911578e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H8_D128_R64", "batch": 1, "seqlen": 2048, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08675099996935387, "p50": 0.08854100002508858, "p90": 0.08863200002906524, "mean": 0.08941120000827141, "iqr": 0.00029099999210302485, "raw_times": [0.09479099998088714, 0.08863200002906524, 0.08854100002508858, 0.08675099996935387, 0.08834100003696221], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09256099997401179, "peak_bytes": 101711872, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5936768349433805e-08, "mae_k": 1.5960043953100467e-08, "mse_q": 2.51039008577667e-15, "mse_k": 2.5111253103748867e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H32_D64_R32", "batch": 1, "seqlen": 2048, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08763099998532198, "p50": 0.08916199999475793, "p90": 0.08947200001330202, "mean": 0.08909940000876304, "iqr": 0.000891000013325538, "raw_times": [0.08947200001330202, 0.08763099998532198, 0.08916199999475793, 0.09065100005045679, 0.08858099999997648], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0931619999846589, "peak_bytes": 201850880, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 1.9073486328125e-06, "absmax_k": 9.5367431640625e-07, "mae_q": 1.586510300910504e-08, "mae_k": 1.5813935050346117e-08, "mse_q": 2.499836478770355e-15, "mse_k": 2.4755639026338358e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B1_S2048_H32_D128_R64", "batch": 1, "seqlen": 2048, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.2592540000136978, "p50": 0.2617740000232516, "p90": 0.2619539999955123, "mean": 0.2612200000157827, "iqr": 0.0011099999710495467, "raw_times": [0.2617740000232516, 0.2622740000219892, 0.2619539999955123, 0.26084400002446273, 0.2592540000136978], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.2616440000338116, "peak_bytes": 403701760, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.581049247079136e-08, "mae_k": 1.5861061797295406e-08, "mse_q": 2.4735094242202705e-15, "mse_k": 2.486832828964107e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:55Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H8_D64_R32", "batch": 2, "seqlen": 128, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08504199996650641, "p50": 0.08663200003411475, "p90": 0.0882019999721706, "mean": 0.08694359999026346, "iqr": 0.0022109999804342806, "raw_times": [0.08663200003411475, 0.0882019999721706, 0.08885099998678925, 0.08599099999173632, 0.08504199996650641], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08975200000804762, "peak_bytes": 137396224, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5824980437173508e-08, "mae_k": 1.5615324144846454e-08, "mse_q": 2.488090249374306e-15, "mse_k": 2.425079044911585e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H8_D128_R64", "batch": 2, "seqlen": 128, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.0891509999974005, "p50": 0.08992100003979431, "p90": 0.09012199996050185, "mean": 0.0899451999998746, "iqr": 0.0002709999762373627, "raw_times": [0.08985099998426449, 0.09068100001741186, 0.09012199996050185, 0.08992100003979431, 0.0891509999974005], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09275200000047334, "peak_bytes": 12648448, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5683587761827766e-08, "mae_k": 1.574532682013796e-08, "mse_q": 2.4310271220254415e-15, "mse_k": 2.4601385856313877e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H32_D64_R32", "batch": 2, "seqlen": 128, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08789200001046993, "p50": 0.08992099998295089, "p90": 0.0902720000226509, "mean": 0.09012159999883806, "iqr": 0.0012010000318696257, "raw_times": [0.08789200001046993, 0.08907099999078127, 0.09345199998733733, 0.0902720000226509, 0.08992099998295089], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09202100000038627, "peak_bytes": 25198592, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5835009747888762e-08, "mae_k": 1.572560215379326e-08, "mse_q": 2.478222950813504e-15, "mse_k": 2.4541699679685603e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S128_H32_D128_R64", "batch": 2, "seqlen": 128, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08713099998658436, "p50": 0.08851199999071468, "p90": 0.08962200001860765, "mean": 0.09088959999417057, "iqr": 0.0023510000346504967, "raw_times": [0.08713099998658436, 0.08851199999071468, 0.08962200001860765, 0.08727099998395715, 0.10191199999098899], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08909199999607154, "peak_bytes": 50397184, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5876850056883995e-08, "mae_k": 1.5927410501603845e-08, "mse_q": 2.504224532953606e-15, "mse_k": 2.503892919554756e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H8_D64_R32", "batch": 2, "seqlen": 512, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08660100002089166, "p50": 0.08889200000794517, "p90": 0.08962200001860765, "mean": 0.08841560002110782, "iqr": 0.002391000009538402, "raw_times": [0.08889200000794517, 0.08660100002089166, 0.08723100000906925, 0.08962200001860765, 0.08973200004902537], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.10579199999938282, "peak_bytes": 25362432, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5820052823301012e-08, "mae_k": 1.580205122309053e-08, "mse_q": 2.4876468276264184e-15, "mse_k": 2.4866062476507165e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H8_D128_R64", "batch": 2, "seqlen": 512, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08826199996292416, "p50": 0.08903100001589337, "p90": 0.0892219999855115, "mean": 0.0892053999905329, "iqr": 0.0008609999895270448, "raw_times": [0.08826199996292416, 0.09115099999235099, 0.0892219999855115, 0.08836099999598446, 0.08903100001589337], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09266099999649668, "peak_bytes": 50593792, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5823172105911e-08, "mae_k": 1.582038855474366e-08, "mse_q": 2.464257071579175e-15, "mse_k": 2.4775099608301526e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H32_D64_R32", "batch": 2, "seqlen": 512, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08734199997206815, "p50": 0.0891519999868251, "p90": 0.09024100000942781, "mean": 0.0889337999979034, "iqr": 0.002558999995017075, "raw_times": [0.08734199997206815, 0.09024100000942781, 0.09025200000678524, 0.0891519999868251, 0.08768200001441073], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09219200001098216, "peak_bytes": 100794368, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5888783622131086e-08, "mae_k": 1.5861886026868888e-08, "mse_q": 2.4766798685418433e-15, "mse_k": 2.475923891636419e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S512_H32_D128_R64", "batch": 2, "seqlen": 512, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08766199999854507, "p50": 0.0882019999721706, "p90": 0.08848099997749159, "mean": 0.08829339999465446, "iqr": 0.0005199999577598646, "raw_times": [0.08766199999854507, 0.08848099997749159, 0.08916100000533334, 0.08796100001973173, 0.0882019999721706], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.0894309999921461, "peak_bytes": 201588736, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5826390864503992e-08, "mae_k": 1.5792682717119533e-08, "mse_q": 2.480465258783123e-15, "mse_k": 2.475580631534544e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H8_D64_R32", "batch": 2, "seqlen": 2048, "num_heads": 8, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08695100001432365, "p50": 0.08826100003034298, "p90": 0.08882200000925877, "mean": 0.08816519999754746, "iqr": 0.001351000037175254, "raw_times": [0.08747099997208352, 0.08882200000925877, 0.08826100003034298, 0.08695100001432365, 0.08932099996172838], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.09112100002539592, "peak_bytes": 101449728, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.592899323554775e-08, "mae_k": 1.5925031959795888e-08, "mse_q": 2.50783882253954e-15, "mse_k": 2.5015648494992274e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H8_D128_R64", "batch": 2, "seqlen": 2048, "num_heads": 8, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.08768100002498613, "p50": 0.08872199998677388, "p90": 0.08966100000407096, "mean": 0.09043360000760003, "iqr": 0.0018989999830409943, "raw_times": [0.09834200000113924, 0.08966100000407096, 0.08872199998677388, 0.08768100002498613, 0.08776200002102996], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.08996200000410681, "peak_bytes": 202375168, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.590209919299923e-08, "mae_k": 1.590130160877834e-08, "mse_q": 2.4971026799330918e-15, "mse_k": 2.506967649153289e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H32_D64_R32", "batch": 2, "seqlen": 2048, "num_heads": 32, "head_dim": 64, "rotary_dim": 32, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.2556240000330945, "p50": 0.2579839999725664, "p90": 0.2584439999964161, "mean": 0.258233800002472, "iqr": 0.0005109999960950518, "raw_times": [0.2579839999725664, 0.2584439999964161, 0.2556240000330945, 0.2611840000099619, 0.25793300000032104], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.2541540000038367, "peak_bytes": 403177472, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.5847520629108658e-08, "mae_k": 1.5862454461057496e-08, "mse_q": 2.4917348203881045e-15, "mse_k": 2.491306009958557e-15, "ref": "rotary_torch"}, "err": null} +{"ts": "2025-11-10T21:58:56Z", "run": "3078eb3fdd0247809e806075fdf7ee85", "impl": "hf_kernels_rotary", "tags": {"family": "hf-kernels", "backend": "cuda"}, "wl": {"name": "cuda_B2_S2048_H32_D128_R64", "batch": 2, "seqlen": 2048, "num_heads": 32, "head_dim": 128, "rotary_dim": 64, "dtype": "float32", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 0.8455130000015743, "p50": 0.8465030000479601, "p90": 0.850922999973136, "mean": 0.8485591999942699, "iqr": 0.005059999978129781, "raw_times": [0.850922999973136, 0.8465030000479601, 0.8458629999950062, 0.8455130000015743, 0.8539939999536728], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.8586040000295725, "peak_bytes": 806354944, "ok": true, "absmax": null, "corr": {"ok": true, "rtol": 1e-05, "atol": 1e-05, "absmax_q": 9.5367431640625e-07, "absmax_k": 9.5367431640625e-07, "mae_q": 1.585225106737198e-08, "mae_k": 1.581303976649906e-08, "mse_q": 2.4866460581992374e-15, "mse_k": 2.4721545950211372e-15, "ref": "rotary_torch"}, "err": null} diff --git a/rotary/impls/hf_kernels_rotary.html b/rotary/impls/hf_kernels_rotary.html index 330944cc91943bb2b53930714f4af06cb0ca72b7..35d48f93791ee1139fa8b65e4b4cf5848ad3af73 100644 --- a/rotary/impls/hf_kernels_rotary.html +++ b/rotary/impls/hf_kernels_rotary.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.23s +Cell: nv | 0.22s | Raw @@ -4123,16 +3905,16 @@ Cell: nv | 0.23s
-
Fri Oct 31 20:00:00 2025       
+
Mon Nov 10 21:57:39 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   32C    P0            101W /  350W |       0MiB /  46068MiB |    100%      Default |
+| N/A   26C    P0             88W /  350W |       0MiB /  46068MiB |     22%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4156,7 +3938,7 @@ Cell: nv | 0.23s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 4.67s
+Cell: benchmark | 4.74s
  | 
 
 Raw
@@ -4227,23 +4009,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S128_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     426.303us      1837.51%     426.303us     426.303us             1  
-                                      hf_kernels_rotary        12.40%     260.056us        99.66%       2.090ms       2.090ms       0.000us         0.00%      24.480us      24.480us             1  
-                          _rotary_dba7d1e::apply_rotary         2.75%      57.674us         5.07%     106.315us      17.719us      16.128us        69.52%      16.128us       2.688us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.128us        69.52%      16.128us       2.688us             6  
-                                            aten::clone         2.13%      44.582us        79.34%       1.664ms     277.309us       0.000us         0.00%       8.352us       1.392us             6  
-                                            aten::copy_         1.84%      38.562us        74.44%       1.561ms     260.165us       7.072us        30.48%       8.352us       1.392us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.072us        30.48%       7.072us       1.179us             6  
-                                Activity Buffer Request        69.01%       1.447ms        69.01%       1.447ms       1.447ms       1.280us         5.52%       1.280us       1.280us             1  
-                                    aten::empty_strided         2.78%      58.281us         2.78%      58.281us       9.713us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         3.58%      75.121us         3.58%      75.121us      12.520us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.14%      44.780us         2.85%      59.790us       4.983us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.72%      15.010us         0.72%      15.010us       1.251us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.32%      48.641us         2.32%      48.641us       8.107us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.34%       7.100us         0.34%       7.100us       7.100us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     403.678us      1730.44%     403.678us     403.678us             1  
+                                      hf_kernels_rotary         9.63%     231.023us        99.37%       2.384ms       2.384ms       0.000us         0.00%      24.608us      24.608us             1  
+                          _rotary_dba7d1e::apply_rotary         2.18%      52.340us         4.07%      97.602us      16.267us      16.224us        69.55%      16.224us       2.704us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.224us        69.55%      16.224us       2.704us             6  
+                                            aten::clone         1.53%      36.662us        83.59%       2.005ms     334.171us       0.000us         0.00%       8.384us       1.397us             6  
+                                            aten::copy_         1.80%      43.260us        79.70%       1.912ms     318.600us       7.104us        30.45%       8.384us       1.397us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.104us        30.45%       7.104us       1.184us             6  
+                                Activity Buffer Request        74.82%       1.795ms        74.82%       1.795ms       1.795ms       1.280us         5.49%       1.280us       1.280us             1  
+                                    aten::empty_strided         2.37%      56.761us         2.37%      56.761us       9.460us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         3.07%      73.591us         3.07%      73.591us      12.265us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.65%      39.481us         2.08%      49.901us       4.158us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.43%      10.420us         0.43%      10.420us       0.868us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.89%      45.262us         1.89%      45.262us       7.544us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.63%      15.070us         0.63%      15.070us      15.070us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.097ms
-Self CUDA time total: 23.200us
+Self CPU time total: 2.399ms
+Self CUDA time total: 23.328us
 
 
 
@@ -4253,23 +4035,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S128_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     340.796us      1422.00%     340.796us     340.796us             1  
-                                      hf_kernels_rotary         9.48%     182.026us        99.73%       1.916ms       1.916ms       0.000us         0.00%      25.278us      25.278us             1  
-                          _rotary_dba7d1e::apply_rotary         2.22%      42.701us         4.40%      84.531us      14.088us      16.159us        67.42%      16.159us       2.693us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.159us        67.42%      16.159us       2.693us             6  
-                                            aten::clone         1.41%      27.120us        83.58%       1.605ms     267.570us       0.000us         0.00%       9.119us       1.520us             6  
-                                            aten::copy_         2.02%      38.773us        80.45%       1.545ms     257.555us       7.807us        32.58%       9.119us       1.520us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.807us        32.58%       7.807us       1.301us             6  
-                                Activity Buffer Request        75.56%       1.451ms        75.56%       1.451ms       1.451ms       1.312us         5.47%       1.312us       1.312us             1  
-                                    aten::empty_strided         1.72%      32.970us         1.72%      32.970us       5.495us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         2.88%      55.291us         2.88%      55.291us       9.215us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.76%      33.749us         2.27%      43.642us       3.637us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.52%       9.893us         0.52%       9.893us       0.824us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.18%      41.830us         2.18%      41.830us       6.972us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.27%       5.161us         0.27%       5.161us       5.161us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     334.494us      1388.06%     334.494us     334.494us             1  
+                                      hf_kernels_rotary         8.19%     181.152us        99.73%       2.206ms       2.206ms       0.000us         0.00%      25.410us      25.410us             1  
+                          _rotary_dba7d1e::apply_rotary         1.81%      39.991us         3.60%      79.751us      13.292us      16.193us        67.20%      16.193us       2.699us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.193us        67.20%      16.193us       2.699us             6  
+                                            aten::clone         1.33%      29.430us        86.17%       1.906ms     317.722us       0.000us         0.00%       9.217us       1.536us             6  
+                                            aten::copy_         1.70%      37.720us        83.32%       1.843ms     307.237us       7.905us        32.80%       9.217us       1.536us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.905us        32.80%       7.905us       1.317us             6  
+                                Activity Buffer Request        79.13%       1.751ms        79.13%       1.751ms       1.751ms       1.312us         5.44%       1.312us       1.312us             1  
+                                    aten::empty_strided         1.51%      33.481us         1.51%      33.481us       5.580us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         2.49%      55.161us         2.49%      55.161us       9.194us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.38%      30.530us         1.77%      39.222us       3.268us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.39%       8.692us         0.39%       8.692us       0.724us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.80%      39.760us         1.80%      39.760us       6.627us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.27%       5.870us         0.27%       5.870us       5.870us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.921ms
-Self CUDA time total: 23.966us
+Self CPU time total: 2.212ms
+Self CUDA time total: 24.098us
 
 
 
@@ -4279,23 +4061,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S128_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     339.421us      1391.81%     339.421us     339.421us             1  
-                                      hf_kernels_rotary         9.18%     172.926us        99.76%       1.879ms       1.879ms       0.000us         0.00%      25.699us      25.699us             1  
-                          _rotary_dba7d1e::apply_rotary         2.20%      41.409us         4.51%      85.000us      14.167us      16.481us        67.58%      16.481us       2.747us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.481us        67.58%      16.481us       2.747us             6  
-                                            aten::clone         1.46%      27.581us        83.73%       1.577ms     262.862us       0.000us         0.00%       9.218us       1.536us             6  
-                                            aten::copy_         1.97%      37.091us        80.45%       1.515ms     252.563us       7.906us        32.42%       9.218us       1.536us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.906us        32.42%       7.906us       1.318us             6  
-                                Activity Buffer Request        75.71%       1.426ms        75.71%       1.426ms       1.426ms       1.312us         5.38%       1.312us       1.312us             1  
-                                    aten::empty_strided         1.82%      34.210us         1.82%      34.210us       5.702us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         2.77%      52.231us         2.77%      52.231us       8.705us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.80%      33.892us         2.33%      43.952us       3.663us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.53%      10.060us         0.53%      10.060us       0.838us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.31%      43.591us         2.31%      43.591us       7.265us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.24%       4.550us         0.24%       4.550us       4.550us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     333.020us      1374.81%     333.020us     333.020us             1  
+                                      hf_kernels_rotary         8.22%     183.662us        99.77%       2.229ms       2.229ms       0.000us         0.00%      25.535us      25.535us             1  
+                          _rotary_dba7d1e::apply_rotary         1.78%      39.771us         3.54%      79.142us      13.190us      16.479us        68.03%      16.479us       2.747us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.479us        68.03%      16.479us       2.747us             6  
+                                            aten::clone         1.23%      27.502us        86.14%       1.925ms     320.808us       0.000us         0.00%       9.056us       1.509us             6  
+                                            aten::copy_         1.51%      33.780us        83.43%       1.864ms     310.723us       7.744us        31.97%       9.056us       1.509us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us        31.97%       7.744us       1.291us             6  
+                                Activity Buffer Request        79.60%       1.779ms        79.60%       1.779ms       1.779ms       1.312us         5.42%       1.312us       1.312us             1  
+                                    aten::empty_strided         1.48%      33.009us         1.48%      33.009us       5.501us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         2.32%      51.921us         2.32%      51.921us       8.654us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.44%      32.260us         1.87%      41.742us       3.478us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.42%       9.482us         0.42%       9.482us       0.790us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.76%      39.371us         1.76%      39.371us       6.562us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.23%       5.150us         0.23%       5.150us       5.150us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.884ms
-Self CUDA time total: 24.387us
+Self CPU time total: 2.235ms
+Self CUDA time total: 24.223us
 
 
 
@@ -4305,23 +4087,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S128_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     353.466us      1252.36%     353.466us     353.466us             1  
-                                      hf_kernels_rotary         8.35%     176.747us        99.76%       2.111ms       2.111ms       0.000us         0.00%      30.048us      30.048us             1  
-                          _rotary_dba7d1e::apply_rotary         2.17%      45.850us         4.21%      89.000us      14.833us      17.664us        62.59%      17.664us       2.944us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      17.664us        62.59%      17.664us       2.944us             6  
-                                            aten::clone         1.36%      28.714us        85.13%       1.802ms     300.274us       0.000us         0.00%      12.384us       2.064us             6  
-                                            aten::copy_         1.83%      38.751us        82.20%       1.740ms     289.944us      10.560us        37.41%      12.384us       2.064us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.560us        37.41%      10.560us       1.760us             6  
-                                Activity Buffer Request        67.60%       1.431ms        67.60%       1.431ms       1.431ms       1.824us         6.46%       1.824us       1.824us             1  
-                                    aten::empty_strided         1.57%      33.269us         1.57%      33.269us       5.545us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        12.77%     270.306us        12.77%     270.306us      45.051us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.59%      33.568us         2.07%      43.911us       3.659us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.49%      10.343us         0.49%      10.343us       0.862us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.04%      43.150us         2.04%      43.150us       7.192us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.24%       5.130us         0.24%       5.130us       5.130us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     330.396us      1170.66%     330.396us     330.396us             1  
+                                      hf_kernels_rotary        19.88%     180.354us        99.43%     901.975us     901.975us       0.000us         0.00%      29.983us      29.983us             1  
+                          _rotary_dba7d1e::apply_rotary         4.33%      39.273us         8.60%      78.013us      13.002us      17.759us        62.92%      17.759us       2.960us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      17.759us        62.92%      17.759us       2.960us             6  
+                                            aten::clone         2.43%      22.040us        66.64%     604.579us     100.763us       0.000us         0.00%      12.224us       2.037us             6  
+                                            aten::copy_         3.81%      34.600us        60.79%     551.459us      91.910us      10.464us        37.08%      12.224us       2.037us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.464us        37.08%      10.464us       1.744us             6  
+                                Activity Buffer Request        27.63%     250.684us        27.63%     250.684us     250.684us       1.760us         6.24%       1.760us       1.760us             1  
+                                    aten::empty_strided         3.43%      31.080us         3.43%      31.080us       5.180us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        29.34%     266.175us        29.34%     266.175us      44.362us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.36%      30.489us         4.30%      39.029us       3.252us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.94%       8.540us         0.94%       8.540us       0.712us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.27%      38.740us         4.27%      38.740us       6.457us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.57%       5.209us         0.57%       5.209us       5.209us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.116ms
-Self CUDA time total: 28.224us
+Self CPU time total: 907.184us
+Self CUDA time total: 28.223us
 
 
 
@@ -4331,23 +4113,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S512_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     351.740us      1444.46%     351.740us     351.740us             1  
-                                      hf_kernels_rotary         8.68%     176.155us        99.77%       2.024ms       2.024ms       0.000us         0.00%      25.663us      25.663us             1  
-                          _rotary_dba7d1e::apply_rotary         2.27%      46.099us         4.32%      87.680us      14.613us      16.479us        67.67%      16.479us       2.747us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.479us        67.67%      16.479us       2.747us             6  
-                                            aten::clone         1.42%      28.832us        84.62%       1.717ms     286.091us       0.000us         0.00%       9.184us       1.531us             6  
-                                            aten::copy_         1.86%      37.831us        81.49%       1.653ms     275.519us       7.872us        32.33%       9.184us       1.531us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.872us        32.33%       7.872us       1.312us             6  
-                                Activity Buffer Request        70.03%       1.420ms        70.03%       1.420ms       1.420ms       1.312us         5.39%       1.312us       1.312us             1  
-                                    aten::empty_strided         1.71%      34.601us         1.71%      34.601us       5.767us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.60%     194.784us         9.60%     194.784us      32.464us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.63%      33.102us         2.14%      43.512us       3.626us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.51%      10.410us         0.51%      10.410us       0.867us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.05%      41.581us         2.05%      41.581us       6.930us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.23%       4.660us         0.23%       4.660us       4.660us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     333.950us      1371.35%     333.950us     333.950us             1  
+                                      hf_kernels_rotary         7.53%     182.915us        99.79%       2.425ms       2.425ms       0.000us         0.00%      25.664us      25.664us             1  
+                          _rotary_dba7d1e::apply_rotary         1.65%      40.000us         3.26%      79.130us      13.188us      16.545us        67.94%      16.545us       2.758us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      16.545us        67.94%      16.545us       2.758us             6  
+                                            aten::clone         1.26%      30.642us        87.34%       2.122ms     353.721us       0.000us         0.00%       9.119us       1.520us             6  
+                                            aten::copy_         1.47%      35.799us        84.75%       2.059ms     343.229us       7.807us        32.06%       9.119us       1.520us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.807us        32.06%       7.807us       1.301us             6  
+                                Activity Buffer Request        73.06%       1.775ms        73.06%       1.775ms       1.775ms       1.312us         5.39%       1.312us       1.312us             1  
+                                    aten::empty_strided         1.33%      32.310us         1.33%      32.310us       5.385us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.22%     248.434us        10.22%     248.434us      41.406us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.31%      31.720us         1.66%      40.370us       3.364us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.36%       8.650us         0.36%       8.650us       0.721us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.61%      39.130us         1.61%      39.130us       6.522us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.21%       5.100us         0.21%       5.100us       5.100us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.029ms
-Self CUDA time total: 24.351us
+Self CPU time total: 2.430ms
+Self CUDA time total: 24.352us
 
 
 
@@ -4357,23 +4139,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S512_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     349.111us      1238.38%     349.111us     349.111us             1  
-                                      hf_kernels_rotary        23.24%     192.013us        99.32%     820.571us     820.571us       0.000us         0.00%      30.015us      30.015us             1  
-                          _rotary_dba7d1e::apply_rotary         5.42%      44.795us        10.63%      87.866us      14.644us      17.632us        62.54%      17.632us       2.939us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      17.632us        62.54%      17.632us       2.939us             6  
-                                            aten::clone         2.69%      22.223us        60.09%     496.442us      82.740us       0.000us         0.00%      12.383us       2.064us             6  
-                                            aten::copy_         4.60%      38.000us        53.48%     441.890us      73.648us      10.559us        37.46%      12.383us       2.064us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.559us        37.46%      10.559us       1.760us             6  
-                                Activity Buffer Request        26.48%     218.816us        26.48%     218.816us     218.816us       1.824us         6.47%       1.824us       1.824us             1  
-                                    aten::empty_strided         3.91%      32.329us         3.91%      32.329us       5.388us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.40%     185.074us        22.40%     185.074us      30.846us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.04%      33.410us         5.36%      44.250us       3.688us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.31%      10.840us         1.31%      10.840us       0.903us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.21%      43.071us         5.21%      43.071us       7.178us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.68%       5.641us         0.68%       5.641us       5.641us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     330.717us      1169.19%     330.717us     330.717us             1  
+                                      hf_kernels_rotary         7.60%     182.573us        99.80%       2.396ms       2.396ms       0.000us         0.00%      30.046us      30.046us             1  
+                          _rotary_dba7d1e::apply_rotary         1.66%      39.960us         3.28%      78.811us      13.135us      17.758us        62.78%      17.758us       2.960us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      17.758us        62.78%      17.758us       2.960us             6  
+                                            aten::clone         1.18%      28.252us        87.25%       2.095ms     349.108us       0.000us         0.00%      12.288us       2.048us             6  
+                                            aten::copy_         1.56%      37.480us        84.78%       2.035ms     339.209us      10.528us        37.22%      12.288us       2.048us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.528us        37.22%      10.528us       1.755us             6  
+                                Activity Buffer Request        73.02%       1.753ms        73.02%       1.753ms       1.753ms       1.760us         6.22%       1.760us       1.760us             1  
+                                    aten::empty_strided         1.30%      31.140us         1.30%      31.140us       5.190us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.19%     244.675us        10.19%     244.675us      40.779us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.30%      31.158us         1.66%      39.899us       3.325us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.36%       8.741us         0.36%       8.741us       0.728us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.62%      38.851us         1.62%      38.851us       6.475us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.20%       4.770us         0.20%       4.770us       4.770us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 826.212us
-Self CUDA time total: 28.191us
+Self CPU time total: 2.401ms
+Self CUDA time total: 28.286us
 
 
 
@@ -4383,23 +4165,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S512_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     344.984us       852.93%     344.984us     344.984us             1  
-                                      hf_kernels_rotary        22.02%     168.975us        99.39%     762.759us     762.759us       0.000us         0.00%      43.263us      43.263us             1  
-                          _rotary_dba7d1e::apply_rotary         5.75%      44.162us        11.18%      85.802us      14.300us      23.456us        57.99%      23.456us       3.909us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      23.456us        57.99%      23.456us       3.909us             6  
-                                            aten::clone         2.91%      22.350us        60.45%     463.932us      77.322us       0.000us         0.00%      19.807us       3.301us             6  
-                                            aten::copy_         4.98%      38.249us        53.45%     410.170us      68.362us      16.991us        42.01%      19.807us       3.301us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.991us        42.01%      16.991us       2.832us             6  
-                                Activity Buffer Request        24.55%     188.395us        24.55%     188.395us     188.395us       2.816us         6.96%       2.816us       2.816us             1  
-                                    aten::empty_strided         4.09%      31.412us         4.09%      31.412us       5.235us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        23.91%     183.526us        23.91%     183.526us      30.588us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.40%      33.790us         5.74%      44.050us       3.671us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.34%      10.260us         1.34%      10.260us       0.855us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.43%      41.640us         5.43%      41.640us       6.940us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.61%       4.661us         0.61%       4.661us       4.661us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     331.263us       811.96%     331.263us     331.263us             1  
+                                      hf_kernels_rotary         7.62%     179.163us        99.79%       2.346ms       2.346ms       0.000us         0.00%      43.646us      43.646us             1  
+                          _rotary_dba7d1e::apply_rotary         1.67%      39.309us         3.29%      77.411us      12.902us      23.680us        58.04%      23.680us       3.947us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      23.680us        58.04%      23.680us       3.947us             6  
+                                            aten::clone         1.17%      27.469us        87.14%       2.049ms     341.486us       0.000us         0.00%      19.966us       3.328us             6  
+                                            aten::copy_         1.49%      35.141us        84.62%       1.990ms     331.589us      17.118us        41.96%      19.966us       3.328us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.118us        41.96%      17.118us       2.853us             6  
+                                Activity Buffer Request        73.01%       1.717ms        73.01%       1.717ms       1.717ms       2.848us         6.98%       2.848us       2.848us             1  
+                                    aten::empty_strided         1.36%      31.912us         1.36%      31.912us       5.319us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.11%     237.764us        10.11%     237.764us      39.627us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.35%      31.810us         1.74%      40.800us       3.400us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.38%       8.990us         0.38%       8.990us       0.749us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.62%      38.102us         1.62%      38.102us       6.350us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.21%       4.871us         0.21%       4.871us       4.871us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 767.420us
-Self CUDA time total: 40.447us
+Self CPU time total: 2.351ms
+Self CUDA time total: 40.798us
 
 
 
@@ -4409,23 +4191,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S512_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     347.453us       442.64%     347.453us     347.453us             1  
-                                      hf_kernels_rotary        20.37%     160.826us        99.39%     784.751us     784.751us       0.000us         0.00%      91.040us      91.040us             1  
-                                            aten::clone         2.83%      22.340us        62.44%     492.983us      82.164us       0.000us         0.00%      52.865us       8.811us             6  
-                                            aten::copy_         4.65%      36.740us        55.30%     436.663us      72.777us      40.321us        51.37%      52.865us       8.811us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      40.321us        51.37%      40.321us       6.720us             6  
-                          _rotary_dba7d1e::apply_rotary         5.74%      45.350us        11.00%      86.891us      14.482us      38.175us        48.63%      38.175us       6.362us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      38.175us        48.63%      38.175us       6.362us             6  
-                                Activity Buffer Request        27.86%     219.946us        27.86%     219.946us     219.946us      12.544us        15.98%      12.544us      12.544us             1  
-                                    aten::empty_strided         4.30%      33.980us         4.30%      33.980us       5.663us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.79%     179.977us        22.79%     179.977us      29.996us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.35%      34.361us         5.58%      44.051us       3.671us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.23%       9.690us         1.23%       9.690us       0.808us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.26%      41.541us         5.26%      41.541us       6.924us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.61%       4.830us         0.61%       4.830us       4.830us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     336.387us       451.94%     336.387us     336.387us             1  
+                                      hf_kernels_rotary         7.84%     184.420us        99.78%       2.346ms       2.346ms       0.000us         0.00%      82.976us      82.976us             1  
+                                            aten::clone         1.21%      28.560us        86.97%       2.045ms     340.779us       0.000us         0.00%      43.553us       7.259us             6  
+                                            aten::copy_         1.54%      36.092us        84.34%       1.983ms     330.495us      35.009us        47.03%      43.553us       7.259us             6  
+                          _rotary_dba7d1e::apply_rotary         1.67%      39.331us         3.28%      77.091us      12.849us      39.423us        52.97%      39.423us       6.571us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      39.423us        52.97%      39.423us       6.571us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      35.009us        47.03%      35.009us       5.835us             6  
+                                Activity Buffer Request        73.02%       1.717ms        73.02%       1.717ms       1.717ms       8.544us        11.48%       8.544us       8.544us             1  
+                                    aten::empty_strided         1.41%      33.141us         1.41%      33.141us       5.523us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         9.79%     230.064us         9.79%     230.064us      38.344us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.34%      31.492us         1.69%      39.832us       3.319us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.35%       8.340us         0.35%       8.340us       0.695us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.61%      37.760us         1.61%      37.760us       6.293us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.22%       5.070us         0.22%       5.070us       5.070us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 789.581us
-Self CUDA time total: 78.496us
+Self CPU time total: 2.351ms
+Self CUDA time total: 74.432us
 
 
 
@@ -4435,23 +4217,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S2048_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     347.324us       858.06%     347.324us     347.324us             1  
-                                      hf_kernels_rotary         8.65%     173.958us        99.77%       2.007ms       2.007ms       0.000us         0.00%      43.325us      43.325us             1  
-                          _rotary_dba7d1e::apply_rotary         2.18%      43.910us         4.21%      84.770us      14.128us      23.423us        57.87%      23.423us       3.904us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      23.423us        57.87%      23.423us       3.904us             6  
-                                            aten::clone         1.35%      27.211us        84.83%       1.706ms     284.405us       0.000us         0.00%      19.902us       3.317us             6  
-                                            aten::copy_         1.92%      38.681us        81.76%       1.645ms     274.138us      17.055us        42.13%      19.902us       3.317us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.055us        42.13%      17.055us       2.842us             6  
-                                Activity Buffer Request        70.68%       1.422ms        70.68%       1.422ms       1.422ms       2.847us         7.03%       2.847us       2.847us             1  
-                                    aten::empty_strided         1.71%      34.392us         1.71%      34.392us       5.732us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.16%     184.363us         9.16%     184.363us      30.727us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.62%      32.593us         2.08%      41.861us       3.488us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.46%       9.268us         0.46%       9.268us       0.772us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.03%      40.860us         2.03%      40.860us       6.810us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.23%       4.670us         0.23%       4.670us       4.670us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     334.720us       824.27%     334.720us     334.720us             1  
+                                      hf_kernels_rotary         7.69%     178.052us        99.76%       2.310ms       2.310ms       0.000us         0.00%      43.488us      43.488us             1  
+                          _rotary_dba7d1e::apply_rotary         1.77%      40.921us         3.42%      79.272us      13.212us      23.680us        58.31%      23.680us       3.947us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      23.680us        58.31%      23.680us       3.947us             6  
+                                            aten::clone         1.23%      28.463us        86.92%       2.013ms     335.521us       0.000us         0.00%      19.808us       3.301us             6  
+                                            aten::copy_         1.52%      35.247us        84.34%       1.953ms     325.533us      16.928us        41.69%      19.808us       3.301us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.928us        41.69%      16.928us       2.821us             6  
+                                Activity Buffer Request        73.01%       1.691ms        73.01%       1.691ms       1.691ms       2.880us         7.09%       2.880us       2.880us             1  
+                                    aten::empty_strided         1.36%      31.460us         1.36%      31.460us       5.243us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         9.81%     227.126us         9.81%     227.126us      37.854us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.37%      31.801us         1.73%      40.020us       3.335us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.35%       8.219us         0.35%       8.219us       0.685us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.66%      38.351us         1.66%      38.351us       6.392us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.24%       5.500us         0.24%       5.500us       5.500us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.012ms
-Self CUDA time total: 40.478us
+Self CPU time total: 2.316ms
+Self CUDA time total: 40.608us
 
 
 
@@ -4461,23 +4243,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S2048_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     361.785us       476.45%     361.785us     361.785us             1  
-                                      hf_kernels_rotary         8.64%     176.662us        99.77%       2.040ms       2.040ms       0.000us         0.00%      86.685us      86.685us             1  
-                                            aten::clone         1.40%      28.682us        84.64%       1.731ms     288.486us       0.000us         0.00%      47.871us       7.979us             6  
-                                            aten::copy_         1.80%      36.737us        81.55%       1.668ms     277.962us      37.119us        48.88%      47.871us       7.979us             6  
-                          _rotary_dba7d1e::apply_rotary         2.24%      45.910us         4.34%      88.820us      14.803us      38.814us        51.12%      38.814us       6.469us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      38.814us        51.12%      38.814us       6.469us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      37.119us        48.88%      37.119us       6.187us             6  
-                                Activity Buffer Request        70.82%       1.448ms        70.82%       1.448ms       1.448ms      10.752us        14.16%      10.752us      10.752us             1  
-                                    aten::empty_strided         1.69%      34.462us         1.69%      34.462us       5.744us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         8.93%     182.677us         8.93%     182.677us      30.446us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.66%      33.994us         2.15%      43.925us       3.660us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.49%       9.931us         0.49%       9.931us       0.828us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.10%      42.910us         2.10%      42.910us       7.152us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.23%       4.670us         0.23%       4.670us       4.670us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     343.357us       451.99%     343.357us     343.357us             1  
+                                      hf_kernels_rotary         7.23%     182.803us        99.81%       2.522ms       2.522ms       0.000us         0.00%      85.341us      85.341us             1  
+                                            aten::clone         1.16%      29.441us        87.88%       2.221ms     370.131us       0.000us         0.00%      46.013us       7.669us             6  
+                                            aten::copy_         1.42%      35.932us        85.39%       2.158ms     359.654us      36.637us        48.23%      46.013us       7.669us             6  
+                          _rotary_dba7d1e::apply_rotary         1.58%      39.950us         3.09%      78.111us      13.018us      39.328us        51.77%      39.328us       6.555us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      39.328us        51.77%      39.328us       6.555us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      36.637us        48.23%      36.637us       6.106us             6  
+                                Activity Buffer Request        75.16%       1.899ms        75.16%       1.899ms       1.899ms       9.376us        12.34%       9.376us       9.376us             1  
+                                    aten::empty_strided         1.32%      33.420us         1.32%      33.420us       5.570us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         8.81%     222.633us         8.81%     222.633us      37.105us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.25%      31.613us         1.61%      40.701us       3.392us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.36%       9.088us         0.36%       9.088us       0.757us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.51%      38.161us         1.51%      38.161us       6.360us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.19%       4.790us         0.19%       4.790us       4.790us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.045ms
-Self CUDA time total: 75.933us
+Self CPU time total: 2.527ms
+Self CUDA time total: 75.965us
 
 
 
@@ -4487,23 +4269,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S2048_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     373.629us       268.97%     373.629us     373.629us             1  
-                                      hf_kernels_rotary         8.95%     179.578us        99.78%       2.002ms       2.002ms       0.000us         0.00%     162.750us     162.750us             1  
-                                            aten::clone         1.48%      29.597us        83.94%       1.684ms     280.680us       0.000us         0.00%     102.944us      17.157us             6  
-                                            aten::copy_         1.82%      36.553us        80.73%       1.620ms     269.962us      79.104us        56.95%     102.944us      17.157us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      79.104us        56.95%      79.104us      13.184us             6  
-                          _rotary_dba7d1e::apply_rotary         2.30%      46.131us         4.57%      91.713us      15.285us      59.806us        43.05%      59.806us       9.968us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      59.806us        43.05%      59.806us       9.968us             6  
-                                Activity Buffer Request        69.91%       1.403ms        69.91%       1.403ms       1.403ms      23.840us        17.16%      23.840us      23.840us             1  
-                                    aten::empty_strided         1.73%      34.712us         1.73%      34.712us       5.785us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.00%     180.563us         9.00%     180.563us      30.094us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.75%      35.198us         2.31%      46.409us       3.867us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.56%      11.211us         0.56%      11.211us       0.934us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.27%      45.582us         2.27%      45.582us       7.597us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.22%       4.510us         0.22%       4.510us       4.510us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     335.487us       241.29%     335.487us     335.487us             1  
+                                      hf_kernels_rotary         7.48%     174.562us        99.79%       2.329ms       2.329ms       0.000us         0.00%     162.718us     162.718us             1  
+                                            aten::clone         1.24%      29.010us        87.24%       2.036ms     339.299us       0.000us         0.00%     102.494us      17.082us             6  
+                                            aten::copy_         1.51%      35.312us        84.60%       1.974ms     329.037us      78.815us        56.69%     102.494us      17.082us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      78.815us        56.69%      78.815us      13.136us             6  
+                          _rotary_dba7d1e::apply_rotary         1.71%      39.800us         3.37%      78.741us      13.124us      60.224us        43.31%      60.224us      10.037us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      60.224us        43.31%      60.224us      10.037us             6  
+                                Activity Buffer Request        73.92%       1.725ms        73.92%       1.725ms       1.725ms      23.679us        17.03%      23.679us      23.679us             1  
+                                    aten::empty_strided         1.40%      32.561us         1.40%      32.561us       5.427us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         9.17%     213.963us         9.17%     213.963us      35.660us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.33%      31.050us         1.69%      39.471us       3.289us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.36%       8.421us         0.36%       8.421us       0.702us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.67%      38.941us         1.67%      38.941us       6.490us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.21%       4.971us         0.21%       4.971us       4.971us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.006ms
-Self CUDA time total: 138.910us
+Self CPU time total: 2.334ms
+Self CUDA time total: 139.039us
 
 
 
@@ -4513,23 +4295,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B1_S2048_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         7.56%     177.196us        86.68%       2.032ms       2.032ms       0.000us         0.00%     778.402us     778.402us             1  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     717.248us       101.07%     717.248us     717.248us             1  
-                                            aten::clone         1.23%      28.772us        72.98%       1.711ms     285.141us       0.000us         0.00%     578.626us      96.438us             6  
-                                            aten::copy_         1.64%      38.341us        70.23%       1.646ms     274.415us     509.889us        71.85%     578.626us      96.438us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     509.889us        71.85%     509.889us      84.982us             6  
-                          _rotary_dba7d1e::apply_rotary         2.34%      54.801us         4.25%      99.591us      16.598us     199.776us        28.15%     199.776us      33.296us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us     199.776us        28.15%     199.776us      33.296us             6  
-                                Activity Buffer Request        60.86%       1.427ms        60.86%       1.427ms       1.427ms      68.737us         9.69%      68.737us      68.737us             1  
-                                    aten::empty_strided         1.52%      35.581us         1.52%      35.581us       5.930us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         7.74%     181.435us         7.74%     181.435us      30.239us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.41%      33.151us         1.89%      44.330us       3.694us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.48%      11.179us         0.48%      11.179us       0.932us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         1.91%      44.790us         1.91%      44.790us       7.465us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        13.32%     312.348us        13.32%     312.348us     312.348us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary        13.11%     152.482us        70.07%     814.833us     814.833us       0.000us         0.00%     767.862us     767.862us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     709.398us       101.13%     709.398us     709.398us             1  
+                                            aten::clone         1.92%      22.371us        46.79%     544.150us      90.692us       0.000us         0.00%     567.671us      94.612us             6  
+                                            aten::copy_         3.06%      35.584us        42.24%     491.229us      81.872us     501.304us        71.46%     567.671us      94.612us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     501.304us        71.46%     501.304us      83.551us             6  
+                          _rotary_dba7d1e::apply_rotary         3.52%      40.960us         6.87%      79.901us      13.317us     200.191us        28.54%     200.191us      33.365us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us     200.191us        28.54%     200.191us      33.365us             6  
+                                Activity Buffer Request        20.99%     244.144us        20.99%     244.144us     244.144us      66.367us         9.46%      66.367us      66.367us             1  
+                                    aten::empty_strided         2.63%      30.550us         2.63%      30.550us       5.092us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        18.19%     211.501us        18.19%     211.501us      35.250us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.57%      29.881us         3.29%      38.300us       3.192us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.72%       8.419us         0.72%       8.419us       0.702us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         3.35%      38.941us         3.35%      38.941us       6.490us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        29.93%     348.096us        29.93%     348.096us     348.096us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.344ms
-Self CUDA time total: 709.665us
+Self CPU time total: 1.163ms
+Self CUDA time total: 701.495us
 
 
 
@@ -4539,23 +4321,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S128_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     349.629us      1313.11%     349.629us     349.629us             1  
-                                      hf_kernels_rotary         8.75%     174.875us        99.76%       1.994ms       1.994ms       0.000us         0.00%      27.938us      27.938us             1  
-                          _rotary_dba7d1e::apply_rotary         2.16%      43.200us         4.40%      87.900us      14.650us      18.754us        70.43%      18.754us       3.126us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      18.754us        70.43%      18.754us       3.126us             6  
-                                            aten::clone         1.44%      28.720us        84.48%       1.688ms     281.365us       0.000us         0.00%       9.184us       1.531us             6  
-                                            aten::copy_         1.82%      36.432us        81.36%       1.626ms     271.003us       7.872us        29.57%       9.184us       1.531us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.872us        29.57%       7.872us       1.312us             6  
-                                Activity Buffer Request        70.53%       1.410ms        70.53%       1.410ms       1.410ms       1.312us         4.93%       1.312us       1.312us             1  
-                                    aten::empty_strided         1.67%      33.452us         1.67%      33.452us       5.575us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.01%     180.083us         9.01%     180.083us      30.014us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.63%      32.560us         2.14%      42.684us       3.557us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.51%      10.124us         0.51%      10.124us       0.844us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.24%      44.700us         2.24%      44.700us       7.450us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.24%       4.780us         0.24%       4.780us       4.780us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     326.016us      1225.99%     326.016us     326.016us             1  
+                                      hf_kernels_rotary        18.50%     152.323us        99.40%     818.663us     818.663us       0.000us         0.00%      27.904us      27.904us             1  
+                          _rotary_dba7d1e::apply_rotary         4.86%      40.039us         9.57%      78.850us      13.142us      18.752us        70.52%      18.752us       3.125us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      18.752us        70.52%      18.752us       3.125us             6  
+                                            aten::clone         2.56%      21.061us        66.62%     548.640us      91.440us       0.000us         0.00%       9.152us       1.525us             6  
+                                            aten::copy_         4.19%      34.519us        60.27%     496.387us      82.731us       7.840us        29.48%       9.152us       1.525us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.840us        29.48%       7.840us       1.307us             6  
+                                Activity Buffer Request        29.97%     246.784us        29.97%     246.784us     246.784us       1.312us         4.93%       1.312us       1.312us             1  
+                                    aten::empty_strided         3.79%      31.192us         3.79%      31.192us       5.199us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        26.12%     215.084us        26.12%     215.084us      35.847us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.71%      30.531us         4.72%      38.850us       3.237us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.01%       8.319us         1.01%       8.319us       0.693us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.71%      38.811us         4.71%      38.811us       6.469us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.60%       4.910us         0.60%       4.910us       4.910us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.998ms
-Self CUDA time total: 26.626us
+Self CPU time total: 823.573us
+Self CUDA time total: 26.592us
 
 
 
@@ -4565,23 +4347,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S128_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     344.698us      1282.22%     344.698us     344.698us             1  
-                                      hf_kernels_rotary        22.61%     152.757us        99.23%     670.538us     670.538us       0.000us         0.00%      28.195us      28.195us             1  
-                          _rotary_dba7d1e::apply_rotary         6.64%      44.870us        12.97%      87.630us      14.605us      19.009us        70.71%      19.009us       3.168us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      19.009us        70.71%      19.009us       3.168us             6  
-                                            aten::clone         3.38%      22.839us        57.25%     386.869us      64.478us       0.000us         0.00%       9.186us       1.531us             6  
-                                            aten::copy_         5.63%      38.041us        49.11%     331.829us      55.305us       7.874us        29.29%       9.186us       1.531us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.874us        29.29%       7.874us       1.312us             6  
-                                Activity Buffer Request        16.48%     111.363us        16.48%     111.363us     111.363us       1.312us         4.88%       1.312us       1.312us             1  
-                                    aten::empty_strided         4.77%      32.201us         4.77%      32.201us       5.367us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        27.00%     182.425us        27.00%     182.425us      30.404us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.90%      33.085us         6.41%      43.282us       3.607us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.51%      10.197us         1.51%      10.197us       0.850us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         6.33%      42.760us         6.33%      42.760us       7.127us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.77%       5.200us         0.77%       5.200us       5.200us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     323.263us      1209.82%     323.263us     323.263us             1  
+                                      hf_kernels_rotary        17.52%     147.623us        99.42%     837.623us     837.623us       0.000us         0.00%      28.032us      28.032us             1  
+                          _rotary_dba7d1e::apply_rotary         4.62%      38.930us         9.25%      77.941us      12.990us      18.944us        70.90%      18.944us       3.157us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      18.944us        70.90%      18.944us       3.157us             6  
+                                            aten::clone         2.83%      23.880us        68.02%     573.009us      95.502us       0.000us         0.00%       9.088us       1.515us             6  
+                                            aten::copy_         4.05%      34.160us        61.53%     518.397us      86.400us       7.776us        29.10%       9.088us       1.515us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.776us        29.10%       7.776us       1.296us             6  
+                                Activity Buffer Request        32.41%     273.024us        32.41%     273.024us     273.024us       1.312us         4.91%       1.312us       1.312us             1  
+                                    aten::empty_strided         3.65%      30.732us         3.65%      30.732us       5.122us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        25.07%     211.213us        25.07%     211.213us      35.202us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.65%      30.720us         4.64%      39.050us       3.254us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.99%       8.330us         0.99%       8.330us       0.694us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.63%      39.011us         4.63%      39.011us       6.502us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.58%       4.850us         0.58%       4.850us       4.850us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 675.738us
-Self CUDA time total: 26.883us
+Self CPU time total: 842.473us
+Self CUDA time total: 26.720us
 
 
 
@@ -4591,22 +4373,22 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S128_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     350.004us      1141.75%     350.004us     350.004us             1  
-                                      hf_kernels_rotary        19.05%     154.214us        99.36%     804.261us     804.261us       0.000us         0.00%      32.414us      32.414us             1  
-                          _rotary_dba7d1e::apply_rotary         5.47%      44.240us        10.98%      88.910us      14.818us      20.064us        65.45%      20.064us       3.344us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      20.064us        65.45%      20.064us       3.344us             6  
-                                            aten::clone         3.02%      24.421us        63.80%     516.433us      86.072us       0.000us         0.00%      12.350us       2.058us             6  
-                                            aten::copy_         4.66%      37.732us        56.69%     458.901us      76.483us      10.591us        34.55%      12.350us       2.058us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.591us        34.55%      10.591us       1.765us             6  
-                                Activity Buffer Request        29.69%     240.306us        29.69%     240.306us     240.306us       1.759us         5.74%       1.759us       1.759us             1  
-                                    aten::empty_strided         4.09%      33.111us         4.09%      33.111us       5.518us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.34%     180.863us        22.34%     180.863us      30.144us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.15%      33.594us         5.52%      44.704us       3.725us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.37%      11.110us         1.37%      11.110us       0.926us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.52%      44.670us         5.52%      44.670us       7.445us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.64%       5.201us         0.64%       5.201us       5.201us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     317.947us      1037.18%     317.947us     317.947us             1  
+                                      hf_kernels_rotary        18.00%     147.321us        99.35%     812.963us     812.963us       0.000us         0.00%      32.383us      32.383us             1  
+                          _rotary_dba7d1e::apply_rotary         4.88%      39.901us         9.44%      77.251us      12.875us      20.255us        66.07%      20.255us       3.376us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      20.255us        66.07%      20.255us       3.376us             6  
+                                            aten::clone         2.41%      19.693us        67.19%     549.781us      91.630us       0.000us         0.00%      12.128us       2.021us             6  
+                                            aten::copy_         4.28%      35.023us        61.13%     500.160us      83.360us      10.400us        33.93%      12.128us       2.021us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.400us        33.93%      10.400us       1.733us             6  
+                                Activity Buffer Request        31.00%     253.664us        31.00%     253.664us     253.664us       1.728us         5.64%       1.728us       1.728us             1  
+                                    aten::empty_strided         3.66%      29.928us         3.66%      29.928us       4.988us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        25.84%     211.473us        25.84%     211.473us      35.245us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.72%      30.411us         4.72%      38.610us       3.218us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.00%       8.199us         1.00%       8.199us       0.683us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.56%      37.350us         4.56%      37.350us       6.225us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.65%       5.289us         0.65%       5.289us       5.289us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 809.462us
+Self CPU time total: 818.252us
 Self CUDA time total: 30.655us
 
 
@@ -4617,23 +4399,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S128_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     350.355us       822.64%     350.355us     350.355us             1  
-                                      hf_kernels_rotary        19.55%     155.605us        99.35%     790.981us     790.981us       0.000us         0.00%      45.469us      45.469us             1  
-                          _rotary_dba7d1e::apply_rotary         5.55%      44.191us        11.02%      87.731us      14.622us      25.565us        60.03%      25.565us       4.261us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      25.565us        60.03%      25.565us       4.261us             6  
-                                            aten::clone         2.81%      22.389us        63.13%     502.593us      83.766us       0.000us         0.00%      19.904us       3.317us             6  
-                                            aten::copy_         4.90%      39.043us        56.13%     446.833us      74.472us      17.024us        39.97%      19.904us       3.317us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.024us        39.97%      17.024us       2.837us             6  
-                                Activity Buffer Request        28.37%     225.886us        28.37%     225.886us     225.886us       2.880us         6.76%       2.880us       2.880us             1  
-                                    aten::empty_strided         4.19%      33.371us         4.19%      33.371us       5.562us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.85%     181.904us        22.85%     181.904us      30.317us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.29%      34.142us         5.66%      45.052us       3.754us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.37%      10.910us         1.37%      10.910us       0.909us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.47%      43.540us         5.47%      43.540us       7.257us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.65%       5.140us         0.65%       5.140us       5.140us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     331.769us       777.76%     331.769us     331.769us             1  
+                                      hf_kernels_rotary        19.70%     168.549us        99.44%     850.864us     850.864us       0.000us         0.00%      45.537us      45.537us             1  
+                          _rotary_dba7d1e::apply_rotary         4.73%      40.431us         9.19%      78.662us      13.110us      25.697us        60.24%      25.697us       4.283us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      25.697us        60.24%      25.697us       4.283us             6  
+                                            aten::clone         2.97%      25.433us        65.78%     562.881us      93.814us       0.000us         0.00%      19.840us       3.307us             6  
+                                            aten::copy_         4.23%      36.170us        59.14%     506.068us      84.345us      16.960us        39.76%      19.840us       3.307us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.960us        39.76%      16.960us       2.827us             6  
+                                Activity Buffer Request        30.43%     260.334us        30.43%     260.334us     260.334us       2.880us         6.75%       2.880us       2.880us             1  
+                                    aten::empty_strided         3.67%      31.380us         3.67%      31.380us       5.230us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        24.49%     209.564us        24.49%     209.564us      34.927us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.75%      32.092us         4.77%      40.772us       3.398us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.01%       8.680us         1.01%       8.680us       0.723us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.47%      38.231us         4.47%      38.231us       6.372us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.56%       4.789us         0.56%       4.789us       4.789us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 796.121us
-Self CUDA time total: 42.589us
+Self CPU time total: 855.653us
+Self CUDA time total: 42.657us
 
 
 
@@ -4643,23 +4425,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S512_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     344.951us      1133.59%     344.951us     344.951us             1  
-                                      hf_kernels_rotary        19.05%     153.418us        99.42%     800.680us     800.680us       0.000us         0.00%      32.125us      32.125us             1  
-                          _rotary_dba7d1e::apply_rotary         5.43%      43.718us        10.83%      87.180us      14.530us      20.095us        66.04%      20.095us       3.349us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      20.095us        66.04%      20.095us       3.349us             6  
-                                            aten::clone         2.75%      22.180us        64.20%     517.012us      86.169us       0.000us         0.00%      12.030us       2.005us             6  
-                                            aten::copy_         4.82%      38.813us        57.22%     460.802us      76.800us      10.335us        33.96%      12.030us       2.005us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.335us        33.96%      10.335us       1.722us             6  
-                                Activity Buffer Request        30.13%     242.666us        30.13%     242.666us     242.666us       1.695us         5.57%       1.695us       1.695us             1  
-                                    aten::empty_strided         4.23%      34.030us         4.23%      34.030us       5.672us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.27%     179.323us        22.27%     179.323us      29.887us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.11%      33.131us         5.35%      43.070us       3.589us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.23%       9.939us         1.23%       9.939us       0.828us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.40%      43.462us         5.40%      43.462us       7.244us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.58%       4.660us         0.58%       4.660us       4.660us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     324.568us      1058.74%     324.568us     324.568us             1  
+                                      hf_kernels_rotary        19.85%     169.202us        99.36%     847.094us     847.094us       0.000us         0.00%      32.384us      32.384us             1  
+                          _rotary_dba7d1e::apply_rotary         4.69%      39.959us         9.27%      78.991us      13.165us      20.352us        66.39%      20.352us       3.392us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      20.352us        66.39%      20.352us       3.392us             6  
+                                            aten::clone         2.92%      24.890us        65.73%     560.410us      93.402us       0.000us         0.00%      12.032us       2.005us             6  
+                                            aten::copy_         4.20%      35.769us        59.19%     504.659us      84.110us      10.304us        33.61%      12.032us       2.005us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.304us        33.61%      10.304us       1.717us             6  
+                                Activity Buffer Request        30.61%     260.975us        30.61%     260.975us     260.975us       1.728us         5.64%       1.728us       1.728us             1  
+                                    aten::empty_strided         3.62%      30.861us         3.62%      30.861us       5.143us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        24.39%     207.915us        24.39%     207.915us      34.652us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.54%      30.221us         4.51%      38.491us       3.208us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.97%       8.270us         0.97%       8.270us       0.689us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.58%      39.032us         4.58%      39.032us       6.505us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.64%       5.460us         0.64%       5.460us       5.460us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 805.340us
-Self CUDA time total: 30.430us
+Self CPU time total: 852.554us
+Self CUDA time total: 30.656us
 
 
 
@@ -4669,23 +4451,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S512_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     358.905us       840.15%     358.905us     358.905us             1  
-                                      hf_kernels_rotary        15.26%     159.123us        99.55%       1.038ms       1.038ms       0.000us         0.00%      45.598us      45.598us             1  
-                          _rotary_dba7d1e::apply_rotary         4.27%      44.490us         8.42%      87.790us      14.632us      25.600us        59.93%      25.600us       4.267us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      25.600us        59.93%      25.600us       4.267us             6  
-                                            aten::clone         2.23%      23.211us        71.54%     746.059us     124.343us       0.000us         0.00%      19.998us       3.333us             6  
-                                            aten::copy_         3.70%      38.572us        65.96%     687.817us     114.636us      17.119us        40.07%      19.998us       3.333us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.119us        40.07%      17.119us       2.853us             6  
-                                Activity Buffer Request        44.90%     468.242us        44.90%     468.242us     468.242us       2.879us         6.74%       2.879us       2.879us             1  
-                                    aten::empty_strided         3.36%      35.031us         3.36%      35.031us       5.838us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.36%     181.003us        17.36%     181.003us      30.167us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         3.32%      34.604us         4.33%      45.135us       3.761us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.01%      10.531us         1.01%      10.531us       0.878us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         4.15%      43.300us         4.15%      43.300us       7.217us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.45%       4.700us         0.45%       4.700us       4.700us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     328.702us       766.04%     328.702us     328.702us             1  
+                                      hf_kernels_rotary        18.09%     152.853us        99.33%     839.363us     839.363us       0.000us         0.00%      45.788us      45.788us             1  
+                          _rotary_dba7d1e::apply_rotary         4.68%      39.541us         9.21%      77.782us      12.964us      25.887us        60.33%      25.887us       4.314us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      25.887us        60.33%      25.887us       4.314us             6  
+                                            aten::clone         2.66%      22.468us        67.35%     569.108us      94.851us       0.000us         0.00%      19.901us       3.317us             6  
+                                            aten::copy_         4.16%      35.173us        60.88%     514.450us      85.742us      17.022us        39.67%      19.901us       3.317us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.022us        39.67%      17.022us       2.837us             6  
+                                Activity Buffer Request        32.07%     270.965us        32.07%     270.965us     270.965us       2.879us         6.71%       2.879us       2.879us             1  
+                                    aten::empty_strided         3.81%      32.190us         3.81%      32.190us       5.365us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        24.65%     208.312us        24.65%     208.312us      34.719us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.71%      31.390us         4.69%      39.620us       3.302us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.97%       8.230us         0.97%       8.230us       0.686us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.53%      38.241us         4.53%      38.241us       6.374us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.67%       5.631us         0.67%       5.631us       5.631us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.043ms
-Self CUDA time total: 42.719us
+Self CPU time total: 844.994us
+Self CUDA time total: 42.909us
 
 
 
@@ -4695,23 +4477,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S512_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     383.638us       432.19%     383.638us     383.638us             1  
-                                      hf_kernels_rotary        19.20%     158.364us        99.38%     819.611us     819.611us       0.000us         0.00%     103.870us     103.870us             1  
-                                            aten::clone         2.74%      22.581us        61.51%     507.313us      84.552us       0.000us         0.00%      63.135us      10.522us             6  
-                                            aten::copy_         4.83%      39.811us        54.76%     451.622us      75.270us      48.031us        54.11%      63.135us      10.522us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      48.031us        54.11%      48.031us       8.005us             6  
-                          _rotary_dba7d1e::apply_rotary         5.49%      45.243us        13.16%     108.504us      18.084us      40.735us        45.89%      40.735us       6.789us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      40.735us        45.89%      40.735us       6.789us             6  
-                                Activity Buffer Request        27.50%     226.825us        27.50%     226.825us     226.825us      15.104us        17.02%      15.104us      15.104us             1  
-                                    aten::empty_strided         4.01%      33.110us         4.01%      33.110us       5.518us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        22.43%     184.986us        22.43%     184.986us      30.831us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.25%      35.021us         5.51%      45.430us       3.786us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.26%      10.409us         1.26%      10.409us       0.867us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         7.67%      63.261us         7.67%      63.261us      10.543us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.62%       5.141us         0.62%       5.141us       5.141us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     337.246us       364.66%     337.246us     337.246us             1  
+                                      hf_kernels_rotary         7.43%     178.431us        99.78%       2.398ms       2.398ms       0.000us         0.00%     107.425us     107.425us             1  
+                                            aten::clone         1.14%      27.439us        87.31%       2.098ms     349.642us       0.000us         0.00%      65.823us      10.970us             6  
+                                            aten::copy_         1.39%      33.333us        84.85%       2.039ms     339.779us      50.880us        55.02%      65.823us      10.970us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      50.880us        55.02%      50.880us       8.480us             6  
+                          _rotary_dba7d1e::apply_rotary         1.70%      40.740us         3.29%      79.070us      13.178us      41.602us        44.98%      41.602us       6.934us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      41.602us        44.98%      41.602us       6.934us             6  
+                                Activity Buffer Request        74.72%       1.795ms        74.72%       1.795ms       1.795ms      14.943us        16.16%      14.943us      14.943us             1  
+                                    aten::empty_strided         1.32%      31.741us         1.32%      31.741us       5.290us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         8.74%     209.903us         8.74%     209.903us      34.984us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.35%      32.344us         1.76%      42.183us       3.515us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.41%       9.839us         0.41%       9.839us       0.820us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.60%      38.330us         1.60%      38.330us       6.388us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.22%       5.280us         0.22%       5.280us       5.280us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 824.752us
-Self CUDA time total: 88.766us
+Self CPU time total: 2.403ms
+Self CUDA time total: 92.482us
 
 
 
@@ -4721,23 +4503,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S512_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     359.259us       247.18%     359.259us     359.259us             1  
-                                      hf_kernels_rotary        19.06%     158.337us        99.39%     825.781us     825.781us       0.000us         0.00%     168.829us     168.829us             1  
-                                            aten::clone         2.83%      23.549us        64.09%     532.493us      88.749us       0.000us         0.00%     105.470us      17.578us             6  
-                                            aten::copy_         4.58%      38.013us        57.29%     475.972us      79.329us      81.982us        56.41%     105.470us      17.578us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      81.982us        56.41%      81.982us      13.664us             6  
-                          _rotary_dba7d1e::apply_rotary         5.47%      45.451us        10.86%      90.251us      15.042us      63.359us        43.59%      63.359us      10.560us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      63.359us        43.59%      63.359us      10.560us             6  
-                                Activity Buffer Request        31.29%     259.966us        31.29%     259.966us     259.966us      23.488us        16.16%      23.488us      23.488us             1  
-                                    aten::empty_strided         3.97%      32.972us         3.97%      32.972us       5.495us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        21.42%     177.993us        21.42%     177.993us      29.665us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.19%      34.839us         5.38%      44.700us       3.725us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         1.19%       9.861us         1.19%       9.861us       0.822us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         5.39%      44.800us         5.39%      44.800us       7.467us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.61%       5.100us         0.61%       5.100us       5.100us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     331.357us       227.98%     331.357us     331.357us             1  
+                                      hf_kernels_rotary        19.22%     153.403us        99.38%     793.253us     793.253us       0.000us         0.00%     169.054us     169.054us             1  
+                                            aten::clone         2.47%      19.681us        65.33%     521.479us      86.913us       0.000us         0.00%     105.151us      17.525us             6  
+                                            aten::copy_         4.41%      35.219us        59.11%     471.788us      78.631us      81.439us        56.03%     105.151us      17.525us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      81.439us        56.03%      81.439us      13.573us             6  
+                          _rotary_dba7d1e::apply_rotary         5.09%      40.640us         9.93%      79.270us      13.212us      63.903us        43.97%      63.903us      10.650us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      63.903us        43.97%      63.903us      10.650us             6  
+                                Activity Buffer Request        29.11%     232.364us        29.11%     232.364us     232.364us      23.712us        16.31%      23.712us      23.712us             1  
+                                    aten::empty_strided         3.76%      30.010us         3.76%      30.010us       5.002us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        25.58%     204.205us        25.58%     204.205us      34.034us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.78%      30.171us         4.90%      39.101us       3.258us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.12%       8.930us         1.12%       8.930us       0.744us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.84%      38.630us         4.84%      38.630us       6.438us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.62%       4.940us         0.62%       4.940us       4.940us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 830.881us
-Self CUDA time total: 145.341us
+Self CPU time total: 798.193us
+Self CUDA time total: 145.342us
 
 
 
@@ -4747,23 +4529,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S2048_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     385.725us       509.05%     385.725us     385.725us             1  
-                                      hf_kernels_rotary         8.62%     176.456us        99.78%       2.043ms       2.043ms       0.000us         0.00%      82.558us      82.558us             1  
-                          _rotary_dba7d1e::apply_rotary         2.32%      47.603us         4.41%      90.273us      15.045us      41.694us        55.02%      41.694us       6.949us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      41.694us        55.02%      41.694us       6.949us             6  
-                                            aten::clone         1.42%      29.000us        84.54%       1.731ms     288.534us       0.000us         0.00%      40.864us       6.811us             6  
-                                            aten::copy_         1.93%      39.552us        80.14%       1.641ms     273.497us      34.080us        44.98%      40.864us       6.811us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      34.080us        44.98%      34.080us       5.680us             6  
-                                Activity Buffer Request        69.16%       1.416ms        69.16%       1.416ms       1.416ms       6.784us         8.95%       6.784us       6.784us             1  
-                                    aten::empty_strided         2.99%      61.221us         2.99%      61.221us      10.204us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.05%     185.224us         9.05%     185.224us      30.871us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.69%      34.591us         2.21%      45.260us       3.772us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.52%      10.669us         0.52%      10.669us       0.889us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.08%      42.670us         2.08%      42.670us       7.112us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.22%       4.530us         0.22%       4.530us       4.530us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     327.384us       410.23%     327.384us     327.384us             1  
+                                      hf_kernels_rotary        18.75%     148.421us        99.39%     786.852us     786.852us       0.000us         0.00%      89.981us      89.981us             1  
+                                            aten::clone         2.67%      21.153us        65.81%     521.010us      86.835us       0.000us         0.00%      47.613us       7.935us             6  
+                                            aten::copy_         4.62%      36.560us        59.19%     468.587us      78.098us      37.437us        46.91%      47.613us       7.935us             6  
+                          _rotary_dba7d1e::apply_rotary         5.10%      40.369us         9.95%      78.790us      13.132us      42.368us        53.09%      42.368us       7.061us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      42.368us        53.09%      42.368us       7.061us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      37.437us        46.91%      37.437us       6.240us             6  
+                                Activity Buffer Request        28.86%     228.474us        28.86%     228.474us     228.474us      10.176us        12.75%      10.176us      10.176us             1  
+                                    aten::empty_strided         3.95%      31.270us         3.95%      31.270us       5.212us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        25.71%     203.553us        25.71%     203.553us      33.925us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.86%      30.542us         4.88%      38.631us       3.219us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.02%       8.089us         1.02%       8.089us       0.674us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.85%      38.421us         4.85%      38.421us       6.403us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.61%       4.869us         0.61%       4.869us       4.869us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.048ms
-Self CUDA time total: 75.774us
+Self CPU time total: 791.721us
+Self CUDA time total: 79.805us
 
 
 
@@ -4773,23 +4555,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S2048_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     368.925us       253.94%     368.925us     368.925us             1  
-                                      hf_kernels_rotary         8.62%     177.641us        99.74%       2.055ms       2.055ms       0.000us         0.00%     169.118us     169.118us             1  
-                                            aten::clone         1.42%      29.322us        84.62%       1.743ms     290.539us       0.000us         0.00%     105.470us      17.578us             6  
-                                            aten::copy_         1.92%      39.462us        81.52%       1.679ms     279.897us      81.631us        56.19%     105.470us      17.578us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      81.631us        56.19%      81.631us      13.605us             6  
-                          _rotary_dba7d1e::apply_rotary         2.27%      46.683us         4.40%      90.665us      15.111us      63.648us        43.81%      63.648us      10.608us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      63.648us        43.81%      63.648us      10.608us             6  
-                                Activity Buffer Request        70.79%       1.458ms        70.79%       1.458ms       1.458ms      23.839us        16.41%      23.839us      23.839us             1  
-                                    aten::empty_strided         1.68%      34.530us         1.68%      34.530us       5.755us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         8.81%     181.504us         8.81%     181.504us      30.251us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.62%      33.289us         2.09%      43.080us       3.590us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.48%       9.791us         0.48%       9.791us       0.816us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         2.13%      43.982us         2.13%      43.982us       7.330us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize         0.26%       5.450us         0.26%       5.450us       5.450us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     334.133us       229.04%     334.133us     334.133us             1  
+                                      hf_kernels_rotary        18.91%     152.747us        99.33%     802.303us     802.303us       0.000us         0.00%     169.593us     169.593us             1  
+                                            aten::clone         2.63%      21.282us        65.81%     531.500us      88.583us       0.000us         0.00%     105.244us      17.541us             6  
+                                            aten::copy_         4.22%      34.070us        59.15%     477.709us      79.618us      81.533us        55.89%     105.244us      17.541us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      81.533us        55.89%      81.533us      13.589us             6  
+                          _rotary_dba7d1e::apply_rotary         4.95%      39.971us         9.71%      78.412us      13.069us      64.349us        44.11%      64.349us      10.725us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us      64.349us        44.11%      64.349us      10.725us             6  
+                                Activity Buffer Request        29.92%     241.694us        29.92%     241.694us     241.694us      23.711us        16.25%      23.711us      23.711us             1  
+                                    aten::empty_strided         4.02%      32.509us         4.02%      32.509us       5.418us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        25.00%     201.945us        25.00%     201.945us      33.657us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         3.87%      31.225us         4.91%      39.644us       3.304us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         1.04%       8.419us         1.04%       8.419us       0.702us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         4.76%      38.441us         4.76%      38.441us       6.407us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize         0.67%       5.380us         0.67%       5.380us       5.380us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.060ms
-Self CUDA time total: 145.279us
+Self CPU time total: 807.683us
+Self CUDA time total: 145.882us
 
 
 
@@ -4799,23 +4581,23 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S2048_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary        20.72%     223.838us        78.32%     845.992us     845.992us       0.000us         0.00%     747.476us     747.476us             1  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     688.117us       101.15%     688.117us     688.117us             1  
-                                            aten::clone         2.05%      22.091us        45.23%     488.522us      81.420us       0.000us         0.00%     558.423us      93.070us             6  
-                                            aten::copy_         3.67%      39.650us        40.20%     434.190us      72.365us     491.256us        72.21%     558.423us      93.070us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     491.256us        72.21%     491.256us      81.876us             6  
-                          _rotary_dba7d1e::apply_rotary         4.18%      45.161us         8.45%      91.252us      15.209us     189.053us        27.79%     189.053us      31.509us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us     189.053us        27.79%     189.053us      31.509us             6  
-                                Activity Buffer Request        19.62%     211.896us        19.62%     211.896us     211.896us      67.167us         9.87%      67.167us      67.167us             1  
-                                    aten::empty_strided         2.98%      32.241us         2.98%      32.241us       5.374us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        16.91%     182.644us        16.91%     182.644us      30.441us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         3.05%      32.939us         3.92%      42.380us       3.532us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.87%       9.441us         0.87%       9.441us       0.787us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         4.27%      46.091us         4.27%      46.091us       7.682us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        21.68%     234.186us        21.68%     234.186us     234.186us       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary        13.54%     152.254us        71.57%     804.992us     804.992us       0.000us         0.00%     741.111us     741.111us             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us     682.359us       101.20%     682.359us     682.359us             1  
+                                            aten::clone         1.94%      21.788us        47.45%     533.747us      88.958us       0.000us         0.00%     557.274us      92.879us             6  
+                                            aten::copy_         3.08%      34.611us        42.75%     480.788us      80.131us     490.426us        72.74%     557.274us      92.879us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     490.426us        72.74%     490.426us      81.738us             6  
+                          _rotary_dba7d1e::apply_rotary         3.61%      40.571us         7.01%      78.811us      13.135us     183.837us        27.26%     183.837us      30.639us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us     183.837us        27.26%     183.837us      30.639us             6  
+                                Activity Buffer Request        21.83%     245.524us        21.83%     245.524us     245.524us      66.848us         9.91%      66.848us      66.848us             1  
+                                    aten::empty_strided         2.77%      31.171us         2.77%      31.171us       5.195us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        17.84%     200.653us        17.84%     200.653us      33.442us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.81%      31.570us         3.57%      40.180us       3.348us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.77%       8.610us         0.77%       8.610us       0.718us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         3.40%      38.240us         3.40%      38.240us       6.373us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        28.43%     319.765us        28.43%     319.765us     319.765us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.080ms
-Self CUDA time total: 680.309us
+Self CPU time total: 1.125ms
+Self CUDA time total: 674.263us
 
 
 
@@ -4825,33 +4607,33 @@ PROFILE TRACE: hf_kernels_rotary | cuda_B2_S2048_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                      hf_kernels_rotary         5.41%     154.946us        27.83%     797.061us     797.061us       0.000us         0.00%       2.625ms       2.625ms             1  
-                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us       2.453ms       100.31%       2.453ms       2.453ms             1  
-                                            aten::clone         0.79%      22.601us        17.83%     510.683us      85.114us       0.000us         0.00%       1.396ms     232.586us             6  
-                                            aten::copy_         1.43%      40.940us        15.89%     455.120us      75.853us       1.216ms        49.74%       1.396ms     232.586us             6  
-                          _rotary_dba7d1e::apply_rotary         1.59%      45.590us         3.06%      87.640us      14.607us       1.229ms        50.26%       1.229ms     204.885us             6  
-void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us       1.229ms        50.26%       1.229ms     204.885us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       1.216ms        49.74%       1.216ms     202.730us             6  
-                                Activity Buffer Request         7.23%     207.076us         7.23%     207.076us     207.076us     179.136us         7.32%     179.136us     179.136us             1  
-                                    aten::empty_strided         1.15%      32.962us         1.15%      32.962us       5.494us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         7.23%     207.104us         7.23%     207.104us      34.517us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         1.15%      33.011us         1.53%      43.792us       3.649us       0.000us         0.00%       0.000us       0.000us            12  
-                                       aten::as_strided         0.38%      10.781us         0.38%      10.781us       0.898us       0.000us         0.00%       0.000us       0.000us            12  
-                                       cudaLaunchKernel         1.47%      42.050us         1.47%      42.050us       7.008us       0.000us         0.00%       0.000us       0.000us             6  
-                                  cudaDeviceSynchronize        72.17%       2.067ms        72.17%       2.067ms       2.067ms       0.000us         0.00%       0.000us       0.000us             1  
+                                      hf_kernels_rotary         5.26%     152.407us        28.24%     818.853us     818.853us       0.000us         0.00%       2.611ms       2.611ms             1  
+                                      hf_kernels_rotary         0.00%       0.000us         0.00%       0.000us       0.000us       2.442ms       100.34%       2.442ms       2.442ms             1  
+                                            aten::clone         0.72%      20.941us        18.92%     548.700us      91.450us       0.000us         0.00%       1.390ms     231.619us             6  
+                                            aten::copy_         1.19%      34.511us        17.07%     495.108us      82.518us       1.212ms        49.82%       1.390ms     231.619us             6  
+                          _rotary_dba7d1e::apply_rotary         1.41%      40.761us         2.75%      79.892us      13.315us       1.221ms        50.18%       1.221ms     203.523us             6  
+void at::native::(anonymous namespace)::unrolled_ele...         0.00%       0.000us         0.00%       0.000us       0.000us       1.221ms        50.18%       1.221ms     203.523us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       1.212ms        49.82%       1.212ms     202.067us             6  
+                                Activity Buffer Request         8.94%     259.144us         8.94%     259.144us     259.144us     177.311us         7.29%     177.311us     177.311us             1  
+                                    aten::empty_strided         1.13%      32.651us         1.13%      32.651us       5.442us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         6.95%     201.453us         6.95%     201.453us      33.575us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.03%      29.842us         1.31%      37.854us       3.154us       0.000us         0.00%       0.000us       0.000us            12  
+                                       aten::as_strided         0.28%       8.012us         0.28%       8.012us       0.668us       0.000us         0.00%       0.000us       0.000us            12  
+                                       cudaLaunchKernel         1.35%      39.131us         1.35%      39.131us       6.522us       0.000us         0.00%       0.000us       0.000us             6  
+                                  cudaDeviceSynchronize        71.76%       2.081ms        71.76%       2.081ms       2.081ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.864ms
-Self CUDA time total: 2.446ms
+Self CPU time total: 2.900ms
+Self CUDA time total: 2.434ms
 
 
 impl                     wl                  p50(ms)  ok
 hf_kernels_rotary        cuda_B1_S128_H32_D128_R64     0.09  True
 hf_kernels_rotary        cuda_B1_S128_H32_D64_R32     0.09  True
 hf_kernels_rotary        cuda_B1_S128_H8_D128_R64     0.09  True
-hf_kernels_rotary        cuda_B1_S128_H8_D64_R32     0.08  True
+hf_kernels_rotary        cuda_B1_S128_H8_D64_R32     0.07  True
 hf_kernels_rotary        cuda_B1_S2048_H32_D128_R64     0.26  True
-hf_kernels_rotary        cuda_B1_S2048_H32_D64_R32     0.10  True
-hf_kernels_rotary        cuda_B1_S2048_H8_D128_R64     0.10  True
+hf_kernels_rotary        cuda_B1_S2048_H32_D64_R32     0.09  True
+hf_kernels_rotary        cuda_B1_S2048_H8_D128_R64     0.09  True
 hf_kernels_rotary        cuda_B1_S2048_H8_D64_R32     0.09  True
 hf_kernels_rotary        cuda_B1_S512_H32_D128_R64     0.09  True
 hf_kernels_rotary        cuda_B1_S512_H32_D64_R32     0.09  True
@@ -4862,7 +4644,7 @@ hf_kernels_rotary        cuda_B2_S128_H32_D64_R32     0.09  True
 hf_kernels_rotary        cuda_B2_S128_H8_D128_R64     0.09  True
 hf_kernels_rotary        cuda_B2_S128_H8_D64_R32     0.09  True
 hf_kernels_rotary        cuda_B2_S2048_H32_D128_R64     0.85  True
-hf_kernels_rotary        cuda_B2_S2048_H32_D64_R32     0.27  True
+hf_kernels_rotary        cuda_B2_S2048_H32_D64_R32     0.26  True
 hf_kernels_rotary        cuda_B2_S2048_H8_D128_R64     0.09  True
 hf_kernels_rotary        cuda_B2_S2048_H8_D64_R32     0.09  True
 hf_kernels_rotary        cuda_B2_S512_H32_D128_R64     0.09  True
@@ -4873,12 +4655,12 @@ hf_kernels_rotary        cuda_B2_S512_H8_D64_R32     0.09  True
 
▶ UV Install Logs
Fetching 5 files: 0%| | 0/5 [00:00<?, ?it/s] -Fetching 5 files: 60%|██████ | 3/5 [00:00<00:00, 28.46it/s] -Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 9.80it/s]
+Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 12.23it/s] +Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 12.22it/s]

Artifacts:

rotary.jsonl diff --git a/rotary/impls/torch_rotary.html b/rotary/impls/torch_rotary.html index d7b34676102680b464b702d7de0525c0d9d460d2..25112ab7856f582c9c54186076c6ee1c859bac70 100644 --- a/rotary/impls/torch_rotary.html +++ b/rotary/impls/torch_rotary.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4106,7 +3888,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: nv | 0.23s +Cell: nv | 0.22s | Raw @@ -4122,16 +3904,16 @@ Cell: nv | 0.23s
-
Fri Oct 31 20:00:00 2025       
+
Mon Nov 10 21:57:39 2025       
 +-----------------------------------------------------------------------------------------+
-| NVIDIA-SMI 570.195.03             Driver Version: 570.195.03     CUDA Version: 12.8     |
-|-----------------------------------------+------------------------+----------------------+
+| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
++-----------------------------------------+------------------------+----------------------+
 | GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
 |                                         |                        |               MIG M. |
 |=========================================+========================+======================|
 |   0  NVIDIA L40S                    On  |   00000000:4D:00.0 Off |                    0 |
-| N/A   32C    P0            101W /  350W |       0MiB /  46068MiB |    100%      Default |
+| N/A   26C    P0             88W /  350W |       0MiB /  46068MiB |     22%      Default |
 |                                         |                        |                  N/A |
 +-----------------------------------------+------------------------+----------------------+
 
@@ -4155,7 +3937,7 @@ Cell: nv | 0.23s
 ▼ output
  ▶ uv-logs
  | 
-Cell: benchmark | 7.58s
+Cell: benchmark | 38.43s
  | 
 
 Raw
@@ -4234,27 +4016,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S128_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.068ms      1195.27%       1.068ms       1.068ms             1  
-                                            torch_eager        14.00%     388.140us        99.71%       2.764ms       2.764ms       0.000us         0.00%      90.528us      90.528us             1  
-                                              aten::mul         6.16%     170.676us        10.43%     289.217us      12.051us      46.911us        52.52%      46.911us       1.955us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      46.911us        52.52%      46.911us       1.955us            24  
-                                            aten::copy_         4.25%     117.935us        62.65%       1.737ms      96.500us      29.185us        32.68%      30.401us       1.689us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.561us        25.26%      22.561us       1.880us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.216us        14.80%      13.216us       1.101us            12  
-                                            aten::clone         1.62%      44.961us        61.78%       1.713ms     285.451us       0.000us         0.00%       7.840us       1.307us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.624us         7.42%       6.624us       1.104us             6  
-                                              aten::sub         1.59%      44.071us         2.54%      70.301us      11.717us       6.624us         7.42%       6.624us       1.104us             6  
-                                              aten::add         1.26%      34.801us         2.08%      57.721us       9.620us       6.592us         7.38%       6.592us       1.099us             6  
-                                Activity Buffer Request        53.17%       1.474ms        53.17%       1.474ms       1.474ms       1.216us         1.36%       1.216us       1.216us             1  
-                                    aten::empty_strided         2.35%      65.251us         2.35%      65.251us      10.875us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         2.98%      82.752us         2.98%      82.752us      13.792us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         3.05%      84.591us         4.03%     111.694us       4.654us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.98%      27.103us         0.98%      27.103us       1.129us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.29%     229.882us         8.29%     229.882us       4.789us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.29%       8.120us         0.29%       8.120us       8.120us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.315ms      1474.39%       1.315ms       1.315ms             1  
+                                            torch_eager         7.00%     401.548us        82.40%       4.729ms       4.729ms       0.000us         0.00%      90.432us      90.432us             1  
+                                              aten::mul         3.25%     186.430us         5.35%     307.044us      12.793us      46.943us        52.62%      46.943us       1.956us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      46.943us        52.62%      46.943us       1.956us            24  
+                                            aten::copy_         2.48%     142.261us        48.48%       2.782ms     154.576us      29.122us        32.64%      30.338us       1.685us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.433us        25.14%      22.433us       1.869us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.151us        14.74%      13.151us       1.096us            12  
+                                            aten::clone         0.88%      50.441us        59.65%       3.423ms     570.575us       0.000us         0.00%       7.905us       1.318us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.689us         7.50%       6.689us       1.115us             6  
+                                              aten::sub         0.82%      47.350us         1.28%      73.411us      12.235us       6.591us         7.39%       6.591us       1.098us             6  
+                                              aten::add         0.64%      36.811us         1.04%      59.601us       9.934us       6.560us         7.35%       6.560us       1.093us             6  
+                                Activity Buffer Request        39.92%       2.291ms        39.92%       2.291ms       2.291ms       1.216us         1.36%       1.216us       1.216us             1  
+                                    aten::empty_strided        16.52%     948.386us        16.52%     948.386us     158.064us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         1.38%      78.980us         1.38%      78.980us      13.163us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         1.46%      83.925us         1.86%     106.703us       4.446us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.40%      22.778us         0.40%      22.778us       0.949us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.66%     439.430us         7.66%     439.430us       9.155us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize        17.60%       1.010ms        17.60%       1.010ms       1.010ms       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.772ms
-Self CUDA time total: 89.312us
+Self CPU time total: 5.740ms
+Self CUDA time total: 89.216us
 
 
 
@@ -4264,27 +4046,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S128_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     960.345us      1063.10%     960.345us     960.345us             1  
-                                            torch_eager        11.94%     304.272us        99.78%       2.543ms       2.543ms       0.000us         0.00%      91.454us      91.454us             1  
-                                              aten::mul         6.19%     157.625us        10.77%     274.398us      11.433us      47.776us        52.89%      47.776us       1.991us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      47.776us        52.89%      47.776us       1.991us            24  
-                                            aten::copy_         4.14%     105.392us        66.58%       1.697ms      94.258us      29.343us        32.48%      30.463us       1.692us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.559us        24.97%      22.559us       1.880us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.215us        14.63%      13.215us       1.101us            12  
-                                            aten::clone         0.97%      24.733us        63.76%       1.625ms     270.825us       0.000us         0.00%       7.904us       1.317us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.784us         7.51%       6.784us       1.131us             6  
-                                              aten::add         1.23%      31.452us         2.12%      54.072us       9.012us       6.623us         7.33%       6.623us       1.104us             6  
-                                              aten::sub         1.53%      39.032us         2.55%      64.964us      10.827us       6.592us         7.30%       6.592us       1.099us             6  
-                                Activity Buffer Request        57.59%       1.468ms        57.59%       1.468ms       1.468ms       1.120us         1.24%       1.120us       1.120us             1  
-                                    aten::empty_strided         1.31%      33.410us         1.31%      33.410us       5.568us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         2.43%      61.963us         2.43%      61.963us      10.327us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.76%      70.222us         3.54%      90.271us       3.761us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.79%      20.049us         0.79%      20.049us       0.835us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.91%     226.937us         8.91%     226.937us       4.728us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.22%       5.590us         0.22%       5.590us       5.590us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     967.576us      1072.55%     967.576us     967.576us             1  
+                                            torch_eager        10.80%     301.919us        99.80%       2.790ms       2.790ms       0.000us         0.00%      91.365us      91.365us             1  
+                                              aten::mul         5.82%     162.824us         9.87%     275.997us      11.500us      47.523us        52.68%      47.523us       1.980us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      47.523us        52.68%      47.523us       1.980us            24  
+                                            aten::copy_         4.18%     116.751us        70.01%       1.957ms     108.723us      29.282us        32.46%      30.434us       1.691us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.497us        24.94%      22.497us       1.875us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.408us        14.86%      13.408us       1.117us            12  
+                                            aten::clone         0.79%      22.172us        66.92%       1.871ms     311.782us       0.000us         0.00%       7.937us       1.323us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.785us         7.52%       6.785us       1.131us             6  
+                                              aten::add         1.23%      34.361us         2.02%      56.562us       9.427us       6.720us         7.45%       6.720us       1.120us             6  
+                                              aten::sub         1.36%      38.010us         2.19%      61.310us      10.218us       6.688us         7.41%       6.688us       1.115us             6  
+                                Activity Buffer Request        61.66%       1.724ms        61.66%       1.724ms       1.724ms       1.152us         1.28%       1.152us       1.152us             1  
+                                    aten::empty_strided         1.16%      32.541us         1.16%      32.541us       5.424us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         2.01%      56.260us         2.01%      56.260us       9.377us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.34%      65.363us         2.94%      82.214us       3.426us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.60%      16.851us         0.60%      16.851us       0.702us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.84%     219.114us         7.84%     219.114us       4.565us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.20%       5.580us         0.20%       5.580us       5.580us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.548ms
-Self CUDA time total: 90.334us
+Self CPU time total: 2.795ms
+Self CUDA time total: 90.213us
 
 
 
@@ -4294,27 +4076,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S128_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     989.616us      1051.23%     989.616us     989.616us             1  
-                                            torch_eager        12.09%     307.194us        99.76%       2.536ms       2.536ms       0.000us         0.00%      95.450us      95.450us             1  
-                                              aten::mul         6.35%     161.494us        11.09%     281.865us      11.744us      48.958us        52.01%      48.958us       2.040us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      48.958us        52.01%      48.958us       2.040us            24  
-                                            aten::copy_         4.30%     109.293us        66.10%       1.680ms      93.343us      30.814us        32.73%      32.125us       1.785us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.943us        24.37%      22.943us       1.912us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.367us        15.26%      14.367us       1.197us            12  
-                                            aten::clone         0.97%      24.599us        62.75%       1.595ms     265.823us       0.000us         0.00%       9.182us       1.530us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.871us         8.36%       7.871us       1.312us             6  
-                                              aten::add         1.20%      30.579us         2.08%      52.891us       8.815us       7.199us         7.65%       7.199us       1.200us             6  
-                                              aten::sub         1.49%      37.871us         2.53%      64.231us      10.705us       7.168us         7.61%       7.168us       1.195us             6  
-                                Activity Buffer Request        56.57%       1.438ms        56.57%       1.438ms       1.438ms       1.311us         1.39%       1.311us       1.311us             1  
-                                    aten::empty_strided         1.38%      35.041us         1.38%      35.041us       5.840us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         2.38%      60.441us         2.38%      60.441us      10.074us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.77%      70.298us         3.53%      89.841us       3.743us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.77%      19.543us         0.77%      19.543us       0.814us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         9.50%     241.544us         9.50%     241.544us       5.032us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.24%       6.100us         0.24%       6.100us       6.100us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     927.639us       987.31%     927.639us     927.639us             1  
+                                            torch_eager        10.07%     282.335us        99.80%       2.798ms       2.798ms       0.000us         0.00%      95.268us      95.268us             1  
+                                              aten::mul         5.75%     161.290us         9.68%     271.373us      11.307us      48.769us        51.91%      48.769us       2.032us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      48.769us        51.91%      48.769us       2.032us            24  
+                                            aten::copy_         3.66%     102.626us        71.21%       1.996ms     110.912us      30.720us        32.70%      32.032us       1.780us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.912us        24.39%      22.912us       1.909us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.467us        15.40%      14.467us       1.206us            12  
+                                            aten::clone         0.79%      22.060us        68.41%       1.918ms     319.628us       0.000us         0.00%       9.120us       1.520us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.808us         8.31%       7.808us       1.301us             6  
+                                              aten::sub         1.36%      38.040us         2.18%      61.002us      10.167us       7.265us         7.73%       7.265us       1.211us             6  
+                                              aten::add         1.15%      32.220us         1.90%      53.280us       8.880us       7.202us         7.67%       7.202us       1.200us             6  
+                                Activity Buffer Request        63.51%       1.780ms        63.51%       1.780ms       1.780ms       1.312us         1.40%       1.312us       1.312us             1  
+                                    aten::empty_strided         1.12%      31.490us         1.12%      31.490us       5.248us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         1.87%      52.452us         1.87%      52.452us       8.742us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.25%      63.104us         2.86%      80.042us       3.335us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.60%      16.938us         0.60%      16.938us       0.706us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.67%     215.090us         7.67%     215.090us       4.481us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.20%       5.470us         0.20%       5.470us       5.470us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.542ms
-Self CUDA time total: 94.139us
+Self CPU time total: 2.803ms
+Self CUDA time total: 93.956us
 
 
 
@@ -4324,27 +4106,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S128_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     928.327us       916.02%     928.327us     928.327us             1  
-                                            torch_eager        12.51%     290.049us        99.77%       2.313ms       2.313ms       0.000us         0.00%     102.689us     102.689us             1  
-                                              aten::mul         6.36%     147.401us        11.12%     257.946us      10.748us      52.800us        52.10%      52.800us       2.200us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      52.800us        52.10%      52.800us       2.200us            24  
-                                            aten::copy_         4.62%     107.204us        65.04%       1.508ms      83.777us      32.415us        31.99%      33.760us       1.876us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.511us        24.19%      24.511us       2.043us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.129us        15.92%      16.129us       1.344us            12  
-                                            aten::clone         0.98%      22.822us        61.74%       1.431ms     238.579us       0.000us         0.00%       9.249us       1.542us             6  
-                                              aten::add         1.37%      31.668us         2.34%      54.320us       9.053us       8.096us         7.99%       8.096us       1.349us             6  
-                                              aten::sub         1.57%      36.291us         2.61%      60.431us      10.072us       8.033us         7.93%       8.033us       1.339us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.904us         7.80%       7.904us       1.317us             6  
-                                Activity Buffer Request        46.02%       1.067ms        46.02%       1.067ms       1.067ms       1.345us         1.33%       1.345us       1.345us             1  
-                                    aten::empty_strided         1.38%      31.940us         1.38%      31.940us       5.323us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        11.71%     271.508us        11.71%     271.508us      45.251us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.99%      69.429us         3.79%      87.781us       3.658us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.79%      18.352us         0.79%      18.352us       0.765us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         9.47%     219.548us         9.47%     219.548us       4.574us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.23%       5.380us         0.23%       5.380us       5.380us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     918.847us       904.69%     918.847us     918.847us             1  
+                                            torch_eager        11.08%     278.185us        99.79%       2.506ms       2.506ms       0.000us         0.00%     102.877us     102.877us             1  
+                                              aten::mul         6.15%     154.372us        10.54%     264.762us      11.032us      52.638us        51.83%      52.638us       2.193us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      52.638us        51.83%      52.638us       2.193us            24  
+                                            aten::copy_         4.16%     104.580us        68.26%       1.714ms      95.219us      32.416us        31.92%      33.728us       1.874us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.641us        24.26%      24.641us       2.053us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.511us        16.26%      16.511us       1.376us            12  
+                                            aten::clone         0.84%      21.090us        65.15%       1.636ms     272.671us       0.000us         0.00%       9.087us       1.514us             6  
+                                              aten::sub         1.51%      38.031us         2.44%      61.190us      10.198us       8.288us         8.16%       8.288us       1.381us             6  
+                                              aten::add         1.29%      32.470us         2.19%      54.880us       9.147us       8.223us         8.10%       8.223us       1.371us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.775us         7.66%       7.775us       1.296us             6  
+                                Activity Buffer Request        52.27%       1.312ms        52.27%       1.312ms       1.312ms       1.312us         1.29%       1.312us       1.312us             1  
+                                    aten::empty_strided         1.29%      32.302us         1.29%      32.302us       5.384us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         9.44%     236.943us         9.44%     236.943us      39.491us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.53%      63.496us         3.16%      79.393us       3.308us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.63%      15.897us         0.63%      15.897us       0.662us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         8.60%     215.892us         8.60%     215.892us       4.498us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.21%       5.340us         0.21%       5.340us       5.340us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.319ms
-Self CUDA time total: 101.344us
+Self CPU time total: 2.511ms
+Self CUDA time total: 101.565us
 
 
 
@@ -4354,27 +4136,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S512_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.018ms      1082.59%       1.018ms       1.018ms             1  
-                                            torch_eager        11.47%     329.955us        99.81%       2.870ms       2.870ms       0.000us         0.00%      95.358us      95.358us             1  
-                                              aten::mul         5.65%     162.614us         9.86%     283.677us      11.820us      49.056us        52.16%      49.056us       2.044us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      49.056us        52.16%      49.056us       2.044us            24  
-                                            aten::copy_         3.88%     111.664us        68.17%       1.960ms     108.907us      30.720us        32.66%      32.032us       1.780us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.880us        24.33%      22.880us       1.907us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.270us        15.17%      14.270us       1.189us            12  
-                                            aten::clone         1.07%      30.831us        65.73%       1.890ms     315.021us       0.000us         0.00%       9.152us       1.525us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.840us         8.34%       7.840us       1.307us             6  
-                                              aten::add         1.15%      33.191us         2.07%      59.441us       9.907us       7.167us         7.62%       7.167us       1.194us             6  
-                                              aten::sub         1.59%      45.863us         2.59%      74.463us      12.411us       7.103us         7.55%       7.103us       1.184us             6  
-                                Activity Buffer Request        50.07%       1.440ms        50.07%       1.440ms       1.440ms       1.312us         1.40%       1.312us       1.312us             1  
-                                    aten::empty_strided         1.26%      36.310us         1.26%      36.310us       6.052us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        11.96%     343.839us        11.96%     343.839us      57.306us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.64%      75.860us         3.31%      95.264us       3.969us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.67%      19.404us         0.67%      19.404us       0.809us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.38%     240.995us         8.38%     240.995us       5.021us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.19%       5.330us         0.19%       5.330us       5.330us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     991.709us      1060.94%     991.709us     991.709us             1  
+                                            torch_eager        10.56%     336.649us        99.82%       3.183ms       3.183ms       0.000us         0.00%      94.755us      94.755us             1  
+                                              aten::mul         5.20%     165.794us         8.73%     278.295us      11.596us      48.674us        52.07%      48.674us       2.028us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      48.674us        52.07%      48.674us       2.028us            24  
+                                            aten::copy_         3.76%     119.863us        72.07%       2.298ms     127.674us      30.622us        32.76%      31.902us       1.772us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.878us        24.47%      22.878us       1.907us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.179us        15.17%      14.179us       1.182us            12  
+                                            aten::clone         0.88%      28.161us        69.55%       2.218ms     369.616us       0.000us         0.00%       9.024us       1.504us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us         8.28%       7.744us       1.291us             6  
+                                              aten::sub         1.28%      40.920us         2.05%      65.511us      10.918us       7.138us         7.64%       7.138us       1.190us             6  
+                                              aten::add         1.05%      33.330us         1.81%      57.620us       9.603us       7.041us         7.53%       7.041us       1.173us             6  
+                                Activity Buffer Request        55.60%       1.773ms        55.60%       1.773ms       1.773ms       1.280us         1.37%       1.280us       1.280us             1  
+                                    aten::empty_strided         1.06%      33.640us         1.06%      33.640us       5.607us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.74%     342.585us        10.74%     342.585us      57.097us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.14%      68.349us         2.66%      84.959us       3.540us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.52%      16.610us         0.52%      16.610us       0.692us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.03%     224.072us         7.03%     224.072us       4.668us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.18%       5.590us         0.18%       5.590us       5.590us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.876ms
-Self CUDA time total: 94.046us
+Self CPU time total: 3.189ms
+Self CUDA time total: 93.475us
 
 
 
@@ -4384,27 +4166,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S512_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     913.335us       900.40%     913.335us     913.335us             1  
-                                            torch_eager        10.58%     290.726us        99.81%       2.742ms       2.742ms       0.000us         0.00%     102.781us     102.781us             1  
-                                              aten::mul         5.30%     145.663us         9.31%     255.637us      10.652us      52.735us        51.99%      52.735us       2.197us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      52.735us        51.99%      52.735us       2.197us            24  
-                                            aten::copy_         3.74%     102.751us        70.53%       1.937ms     107.622us      32.638us        32.18%      33.982us       1.888us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.735us        24.38%      24.735us       2.061us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.064us        15.84%      16.064us       1.339us            12  
-                                            aten::clone         0.88%      24.121us        67.96%       1.867ms     311.110us       0.000us         0.00%       9.247us       1.541us             6  
-                                              aten::sub         1.29%      35.411us         2.16%      59.202us       9.867us       8.033us         7.92%       8.033us       1.339us             6  
-                                              aten::add         1.13%      30.931us         1.93%      52.952us       8.825us       8.031us         7.92%       8.031us       1.339us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.903us         7.79%       7.903us       1.317us             6  
-                                Activity Buffer Request        52.85%       1.452ms        52.85%       1.452ms       1.452ms       1.344us         1.32%       1.344us       1.344us             1  
-                                    aten::empty_strided         1.21%      33.351us         1.21%      33.351us       5.559us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        11.71%     321.577us        11.71%     321.577us      53.596us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.55%      69.990us         3.22%      88.522us       3.688us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.67%      18.532us         0.67%      18.532us       0.772us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         7.90%     216.969us         7.90%     216.969us       4.520us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.19%       5.091us         0.19%       5.091us       5.091us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     941.177us       926.36%     941.177us     941.177us             1  
+                                            torch_eager         9.56%     295.804us        99.83%       3.088ms       3.088ms       0.000us         0.00%     102.911us     102.911us             1  
+                                              aten::mul         5.03%     155.643us         8.60%     265.986us      11.083us      52.802us        51.97%      52.802us       2.200us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      52.802us        51.97%      52.802us       2.200us            24  
+                                            aten::copy_         3.66%     113.330us        73.34%       2.269ms     126.052us      32.447us        31.94%      33.759us       1.876us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.703us        24.31%      24.703us       2.059us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.350us        16.09%      16.350us       1.363us            12  
+                                            aten::clone         0.71%      21.820us        70.53%       2.182ms     363.694us       0.000us         0.00%       9.056us       1.509us             6  
+                                              aten::sub         1.30%      40.120us         2.07%      63.950us      10.658us       8.223us         8.09%       8.223us       1.370us             6  
+                                              aten::add         1.17%      36.201us         1.90%      58.931us       9.822us       8.127us         8.00%       8.127us       1.355us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us         7.62%       7.744us       1.291us             6  
+                                Activity Buffer Request        57.23%       1.771ms        57.23%       1.771ms       1.771ms       1.312us         1.29%       1.312us       1.312us             1  
+                                    aten::empty_strided         0.98%      30.371us         0.98%      30.371us       5.062us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.40%     321.885us        10.40%     321.885us      53.647us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.12%      65.592us         2.67%      82.622us       3.443us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.55%      17.030us         0.55%      17.030us       0.710us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.11%     219.985us         7.11%     219.985us       4.583us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.17%       5.340us         0.17%       5.340us       5.340us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.747ms
-Self CUDA time total: 101.437us
+Self CPU time total: 3.094ms
+Self CUDA time total: 101.599us
 
 
 
@@ -4414,27 +4196,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S512_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     929.433us       768.61%     929.433us     929.433us             1  
-                                            torch_eager        10.84%     297.701us        99.80%       2.742ms       2.742ms       0.000us         0.00%     122.716us     122.716us             1  
-                                              aten::mul         5.42%     148.850us         9.41%     258.632us      10.776us      62.014us        51.28%      62.014us       2.584us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      62.014us        51.28%      62.014us       2.584us            24  
-                                            aten::copy_         3.77%     103.682us        70.14%       1.927ms     107.043us      39.328us        32.52%      41.120us       2.284us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.800us        23.82%      28.800us       2.400us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.582us        16.19%      19.582us       1.632us            12  
-                                            aten::clone         0.88%      24.131us        67.45%       1.853ms     308.828us       0.000us         0.00%      12.320us       2.053us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.528us         8.71%      10.528us       1.755us             6  
-                                              aten::sub         1.29%      35.482us         2.16%      59.433us       9.905us       9.792us         8.10%       9.792us       1.632us             6  
-                                              aten::add         1.13%      31.104us         1.94%      53.172us       8.862us       9.790us         8.10%       9.790us       1.632us             6  
-                                Activity Buffer Request        52.94%       1.454ms        52.94%       1.454ms       1.454ms       1.792us         1.48%       1.792us       1.792us             1  
-                                    aten::empty_strided         1.18%      32.542us         1.18%      32.542us       5.424us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        11.19%     307.407us        11.19%     307.407us      51.235us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.56%      70.268us         3.25%      89.361us       3.723us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.70%      19.093us         0.70%      19.093us       0.796us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         7.91%     217.262us         7.91%     217.262us       4.526us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.20%       5.370us         0.20%       5.370us       5.370us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     943.963us       782.64%     943.963us     943.963us             1  
+                                            torch_eager         9.85%     301.136us        99.82%       3.051ms       3.051ms       0.000us         0.00%     122.468us     122.468us             1  
+                                              aten::mul         5.14%     157.189us         8.67%     264.988us      11.041us      61.985us        51.39%      61.985us       2.583us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      61.985us        51.39%      61.985us       2.583us            24  
+                                            aten::copy_         3.53%     107.981us        72.58%       2.218ms     123.247us      39.362us        32.64%      41.218us       2.290us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.802us        23.88%      28.802us       2.400us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.265us        15.97%      19.265us       1.605us            12  
+                                            aten::clone         0.97%      29.629us        70.14%       2.144ms     357.356us       0.000us         0.00%      12.416us       2.069us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.560us         8.76%      10.560us       1.760us             6  
+                                              aten::add         1.14%      34.930us         1.90%      58.161us       9.693us       9.633us         7.99%       9.633us       1.606us             6  
+                                              aten::sub         1.25%      38.210us         2.05%      62.510us      10.418us       9.632us         7.99%       9.632us       1.605us             6  
+                                Activity Buffer Request        57.00%       1.742ms        57.00%       1.742ms       1.742ms       1.856us         1.54%       1.856us       1.856us             1  
+                                    aten::empty_strided         1.01%      31.021us         1.01%      31.021us       5.170us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        10.03%     306.454us        10.03%     306.454us      51.076us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.23%      68.242us         2.79%      85.430us       3.560us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.56%      17.188us         0.56%      17.188us       0.716us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.10%     217.131us         7.10%     217.131us       4.524us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.18%       5.390us         0.18%       5.390us       5.390us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.747ms
-Self CUDA time total: 120.924us
+Self CPU time total: 3.057ms
+Self CUDA time total: 120.612us
 
 
 
@@ -4444,27 +4226,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S512_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     942.082us       549.37%     942.082us     942.082us             1  
-                                            torch_eager        20.10%     308.752us        99.67%       1.531ms       1.531ms       0.000us         0.00%     174.365us     174.365us             1  
-                                              aten::mul         9.79%     150.414us        16.96%     260.516us      10.855us      89.056us        51.93%      89.056us       3.711us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      89.056us        51.93%      89.056us       3.711us            24  
-                                            aten::copy_         6.91%     106.224us        46.22%     710.060us      39.448us      57.503us        33.53%      60.383us       3.355us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.511us        23.62%      40.511us       3.376us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.926us        14.54%      24.926us       2.077us            12  
-                                            aten::clone         1.37%      21.029us        40.87%     627.796us     104.633us       0.000us         0.00%      19.872us       3.312us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.992us         9.91%      16.992us       2.832us             6  
-                                              aten::sub         2.26%      34.730us         3.83%      58.781us       9.797us      12.479us         7.28%      12.479us       2.080us             6  
-                                              aten::add         2.00%      30.683us         3.45%      52.973us       8.829us      12.447us         7.26%      12.447us       2.075us             6  
-                                Activity Buffer Request        16.15%     248.056us        16.15%     248.056us     248.056us       2.880us         1.68%       2.880us       2.880us             1  
-                                    aten::empty_strided         2.04%      31.392us         2.04%      31.392us       5.232us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        18.97%     291.479us        18.97%     291.479us      48.580us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.49%      68.986us         5.70%      87.586us       3.649us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.21%      18.600us         1.21%      18.600us       0.775us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        14.37%     220.744us        14.37%     220.744us       4.599us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.33%       5.080us         0.33%       5.080us       5.080us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     928.245us       538.18%     928.245us     928.245us             1  
+                                            torch_eager        19.14%     292.425us        99.66%       1.523ms       1.523ms       0.000us         0.00%     175.325us     175.325us             1  
+                                              aten::mul        10.16%     155.270us        17.20%     262.742us      10.948us      89.630us        51.97%      89.630us       3.735us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      89.630us        51.97%      89.630us       3.735us            24  
+                                            aten::copy_         6.82%     104.170us        46.76%     714.441us      39.691us      57.920us        33.58%      60.768us       3.376us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.768us        23.64%      40.768us       3.397us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.927us        14.45%      24.927us       2.077us            12  
+                                            aten::clone         1.34%      20.471us        41.24%     630.180us     105.030us       0.000us         0.00%      20.000us       3.333us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.152us         9.94%      17.152us       2.859us             6  
+                                              aten::sub         2.56%      39.072us         4.07%      62.112us      10.352us      12.480us         7.24%      12.480us       2.080us             6  
+                                              aten::add         2.20%      33.610us         3.65%      55.810us       9.302us      12.447us         7.22%      12.447us       2.075us             6  
+                                Activity Buffer Request        16.69%     254.944us        16.69%     254.944us     254.944us       2.848us         1.65%       2.848us       2.848us             1  
+                                    aten::empty_strided         2.04%      31.181us         2.04%      31.181us       5.197us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        19.06%     291.294us        19.06%     291.294us      48.549us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.37%      66.700us         5.47%      83.522us       3.480us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.10%      16.822us         1.10%      16.822us       0.701us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        14.19%     216.745us        14.19%     216.745us       4.516us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.34%       5.240us         0.34%       5.240us       5.240us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.536ms
-Self CUDA time total: 171.485us
+Self CPU time total: 1.528ms
+Self CUDA time total: 172.477us
 
 
 
@@ -4474,27 +4256,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S2048_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     906.096us       748.31%     906.096us     906.096us             1  
-                                            torch_eager        18.91%     280.775us        99.66%       1.480ms       1.480ms       0.000us         0.00%     122.910us     122.910us             1  
-                                              aten::mul        10.01%     148.664us        17.45%     259.167us      10.799us      62.174us        51.35%      62.174us       2.591us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      62.174us        51.35%      62.174us       2.591us            24  
-                                            aten::copy_         6.88%     102.100us        46.50%     690.526us      38.363us      39.392us        32.53%      41.216us       2.290us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.800us        23.78%      28.800us       2.400us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.520us        16.12%      19.520us       1.627us            12  
-                                            aten::clone         1.45%      21.579us        41.36%     614.176us     102.363us       0.000us         0.00%      12.416us       2.069us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.592us         8.75%      10.592us       1.765us             6  
-                                              aten::sub         2.32%      34.432us         3.90%      57.973us       9.662us       9.760us         8.06%       9.760us       1.627us             6  
-                                              aten::add         2.12%      31.432us         3.61%      53.552us       8.925us       9.760us         8.06%       9.760us       1.627us             6  
-                                Activity Buffer Request        17.05%     253.136us        17.05%     253.136us     253.136us       1.824us         1.51%       1.824us       1.824us             1  
-                                    aten::empty_strided         2.06%      30.533us         2.06%      30.533us       5.089us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        18.50%     274.717us        18.50%     274.717us      45.786us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.53%      67.311us         5.78%      85.812us       3.575us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.25%      18.501us         1.25%      18.501us       0.771us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        14.60%     216.737us        14.60%     216.737us       4.515us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.34%       4.981us         0.34%       4.981us       4.981us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     923.899us       767.46%     923.899us     923.899us             1  
+                                            torch_eager        19.14%     287.798us        99.65%       1.499ms       1.499ms       0.000us         0.00%     122.144us     122.144us             1  
+                                              aten::mul        10.49%     157.698us        17.70%     266.255us      11.094us      61.982us        51.49%      61.982us       2.583us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      61.982us        51.49%      61.982us       2.583us            24  
+                                            aten::copy_         6.99%     105.118us        46.36%     697.187us      38.733us      39.264us        32.62%      41.024us       2.279us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.832us        23.95%      28.832us       2.403us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.138us        15.90%      19.138us       1.595us            12  
+                                            aten::clone         1.32%      19.822us        40.79%     613.519us     102.253us       0.000us         0.00%      12.192us       2.032us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.432us         8.67%      10.432us       1.739us             6  
+                                              aten::sub         2.51%      37.801us         4.08%      61.341us      10.224us       9.570us         7.95%       9.570us       1.595us             6  
+                                              aten::add         2.16%      32.471us         3.63%      54.661us       9.110us       9.568us         7.95%       9.568us       1.595us             6  
+                                Activity Buffer Request        16.71%     251.314us        16.71%     251.314us     251.314us       1.760us         1.46%       1.760us       1.760us             1  
+                                    aten::empty_strided         2.00%      30.060us         2.00%      30.060us       5.010us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        18.58%     279.394us        18.58%     279.394us      46.566us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.31%      64.750us         5.43%      81.609us       3.400us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.12%      16.859us         1.12%      16.859us       0.702us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        14.34%     215.648us        14.34%     215.648us       4.493us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.35%       5.220us         0.35%       5.220us       5.220us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.485ms
-Self CUDA time total: 121.086us
+Self CPU time total: 1.504ms
+Self CUDA time total: 120.384us
 
 
 
@@ -4504,27 +4286,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S2048_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     954.294us       555.32%     954.294us     954.294us             1  
-                                            torch_eager        11.21%     307.269us        99.82%       2.735ms       2.735ms       0.000us         0.00%     174.694us     174.694us             1  
-                                              aten::mul         5.59%     153.258us         9.69%     265.580us      11.066us      89.476us        52.07%      89.476us       3.728us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      89.476us        52.07%      89.476us       3.728us            24  
-                                            aten::copy_         3.78%     103.631us        69.46%       1.903ms     105.735us      57.505us        33.46%      60.353us       3.353us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.545us        23.59%      40.545us       3.379us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.865us        14.47%      24.865us       2.072us            12  
-                                            aten::clone         0.89%      24.491us        66.72%       1.828ms     304.733us       0.000us         0.00%      19.808us       3.301us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.960us         9.87%      16.960us       2.827us             6  
-                                              aten::add         1.15%      31.480us         1.96%      53.761us       8.960us      12.448us         7.24%      12.448us       2.075us             6  
-                                              aten::sub         1.31%      35.801us         2.17%      59.462us       9.910us      12.417us         7.23%      12.417us       2.070us             6  
-                                Activity Buffer Request        53.91%       1.477ms        53.91%       1.477ms       1.477ms       2.848us         1.66%       2.848us       2.848us             1  
-                                    aten::empty_strided         1.13%      30.930us         1.13%      30.930us       5.155us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.51%     260.666us         9.51%     260.666us      43.444us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.58%      70.761us         3.30%      90.449us       3.769us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.72%      19.688us         0.72%      19.688us       0.820us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.03%     220.086us         8.03%     220.086us       4.585us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.18%       5.030us         0.18%       5.030us       5.030us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     943.259us       547.68%     943.259us     943.259us             1  
+                                            torch_eager         9.82%     293.988us        99.82%       2.988ms       2.988ms       0.000us         0.00%     175.075us     175.075us             1  
+                                              aten::mul         5.17%     154.631us         8.81%     263.742us      10.989us      89.536us        51.99%      89.536us       3.731us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      89.536us        51.99%      89.536us       3.731us            24  
+                                            aten::copy_         3.66%     109.570us        72.53%       2.171ms     120.590us      57.795us        33.56%      60.643us       3.369us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.835us        23.71%      40.835us       3.403us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.896us        14.46%      24.896us       2.075us            12  
+                                            aten::clone         0.74%      22.030us        69.74%       2.087ms     347.874us       0.000us         0.00%      19.808us       3.301us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.960us         9.85%      16.960us       2.827us             6  
+                                              aten::add         1.10%      32.890us         1.87%      55.840us       9.307us      12.481us         7.25%      12.481us       2.080us             6  
+                                              aten::sub         1.28%      38.273us         2.11%      63.142us      10.524us      12.415us         7.21%      12.415us       2.069us             6  
+                                Activity Buffer Request        58.02%       1.736ms        58.02%       1.736ms       1.736ms       2.848us         1.65%       2.848us       2.848us             1  
+                                    aten::empty_strided         1.00%      30.050us         1.00%      30.050us       5.008us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         8.83%     264.325us         8.83%     264.325us      44.054us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.36%      70.650us         2.95%      88.161us       3.673us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.59%      17.511us         0.59%      17.511us       0.730us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.26%     217.282us         7.26%     217.282us       4.527us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.18%       5.289us         0.18%       5.289us       5.289us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.740ms
-Self CUDA time total: 171.846us
+Self CPU time total: 2.993ms
+Self CUDA time total: 172.227us
 
 
 
@@ -4534,27 +4316,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S2048_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     917.943us       324.46%     917.943us     917.943us             1  
-                                            torch_eager        18.90%     277.703us        99.65%       1.464ms       1.464ms       0.000us         0.00%     301.376us     301.376us             1  
-                                              aten::mul         9.84%     144.586us        17.44%     256.139us      10.672us     132.736us        46.92%     132.736us       5.531us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     132.736us        46.92%     132.736us       5.531us            24  
-                                            aten::copy_         7.06%     103.765us        45.63%     670.307us      37.239us     109.119us        38.57%     127.583us       7.088us            18  
-                                            aten::clone         1.58%      23.262us        40.78%     599.096us      99.849us       0.000us         0.00%      70.336us      11.723us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.247us        20.23%      57.247us       4.771us            12  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      51.872us        18.34%      51.872us       8.645us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      41.057us        14.51%      41.057us       3.421us            12  
-                                              aten::add         2.13%      31.271us         3.65%      53.632us       8.939us      20.545us         7.26%      20.545us       3.424us             6  
-                                              aten::sub         2.39%      35.109us         4.06%      59.711us       9.952us      20.512us         7.25%      20.512us       3.419us             6  
-                                Activity Buffer Request        16.07%     236.106us        16.07%     236.106us     236.106us      18.464us         6.53%      18.464us      18.464us             1  
-                                    aten::empty_strided         2.35%      34.500us         2.35%      34.500us       5.750us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        18.36%     269.767us        18.36%     269.767us      44.961us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.78%      70.183us         6.04%      88.753us       3.698us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.26%      18.570us         1.26%      18.570us       0.774us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        14.92%     219.185us        14.92%     219.185us       4.566us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.35%       5.090us         0.35%       5.090us       5.090us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     922.006us       322.11%     922.006us     922.006us             1  
+                                            torch_eager        19.42%     278.764us        99.64%       1.431ms       1.431ms       0.000us         0.00%     304.543us     304.543us             1  
+                                              aten::mul        10.68%     153.400us        18.09%     259.803us      10.825us     134.112us        46.85%     134.112us       5.588us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     134.112us        46.85%     134.112us       5.588us            24  
+                                            aten::copy_         7.65%     109.831us        44.83%     643.670us      35.759us     111.232us        38.86%     129.536us       7.196us            18  
+                                            aten::clone         1.43%      20.539us        38.82%     557.349us      92.892us       0.000us         0.00%      72.160us      12.027us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.376us        20.04%      57.376us       4.781us            12  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.856us        18.82%      53.856us       8.976us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      40.895us        14.29%      40.895us       3.408us            12  
+                                              aten::sub         2.68%      38.501us         4.30%      61.692us      10.282us      20.543us         7.18%      20.543us       3.424us             6  
+                                              aten::add         2.29%      32.829us         3.81%      54.730us       9.122us      20.352us         7.11%      20.352us       3.392us             6  
+                                Activity Buffer Request        16.08%     230.904us        16.08%     230.904us     230.904us      18.304us         6.39%      18.304us      18.304us             1  
+                                    aten::empty_strided         2.06%      29.601us         2.06%      29.601us       4.933us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        16.83%     241.674us        16.83%     241.674us      40.279us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.51%      64.754us         5.69%      81.743us       3.406us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.18%      16.989us         1.18%      16.989us       0.708us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        14.82%     212.756us        14.82%     212.756us       4.432us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.36%       5.240us         0.36%       5.240us       5.240us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.469ms
-Self CUDA time total: 282.912us
+Self CPU time total: 1.436ms
+Self CUDA time total: 286.239us
 
 
 
@@ -4564,27 +4346,27 @@ PROFILE TRACE: torch_eager | cuda_B1_S2048_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     931.832us       165.35%     931.832us     931.832us             1  
-                                            torch_eager        19.27%     283.137us        99.64%       1.464ms       1.464ms       0.000us         0.00%     587.261us     587.261us             1  
-                                            aten::copy_         7.04%     103.435us        44.90%     659.587us      36.644us     272.511us        48.36%     296.223us      16.457us            18  
-                                              aten::mul        10.36%     152.225us        18.18%     267.110us      11.130us     224.829us        39.90%     224.829us       9.368us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     224.829us        39.90%     224.829us       9.368us            24  
-                                            aten::clone         1.47%      21.550us        39.53%     580.673us      96.779us       0.000us         0.00%     205.855us      34.309us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     182.143us        32.32%     182.143us      30.357us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      90.368us        16.04%      90.368us       7.531us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      66.209us        11.75%      66.209us       5.517us            12  
-                                              aten::sub         2.39%      35.041us         4.07%      59.831us       9.972us      33.760us         5.99%      33.760us       5.627us             6  
-                                              aten::add         2.15%      31.591us         3.70%      54.401us       9.067us      32.449us         5.76%      32.449us       5.408us             6  
-                                Activity Buffer Request        16.23%     238.406us        16.23%     238.406us     238.406us      23.712us         4.21%      23.712us      23.712us             1  
-                                    aten::empty_strided         2.04%      29.960us         2.04%      29.960us       4.993us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.39%     255.475us        17.39%     255.475us      42.579us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.73%      69.441us         6.00%      88.092us       3.670us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.27%      18.651us         1.27%      18.651us       0.777us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.30%     224.756us        15.30%     224.756us       4.682us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.36%       5.280us         0.36%       5.280us       5.280us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     970.352us       169.72%     970.352us     970.352us             1  
+                                            torch_eager        19.50%     289.365us        99.64%       1.478ms       1.478ms       0.000us         0.00%     595.480us     595.480us             1  
+                                            aten::copy_         7.05%     104.551us        43.31%     642.598us      35.700us     273.596us        47.85%     297.340us      16.519us            18  
+                                              aten::mul        11.63%     172.532us        19.46%     288.666us      12.028us     232.863us        40.73%     232.863us       9.703us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     232.863us        40.73%     232.863us       9.703us            24  
+                                            aten::clone         1.45%      21.521us        37.67%     558.878us      93.146us       0.000us         0.00%     205.949us      34.325us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     182.205us        31.87%     182.205us      30.367us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      91.391us        15.98%      91.391us       7.616us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      65.277us        11.42%      65.277us       5.440us            12  
+                                              aten::sub         2.70%      40.111us         4.36%      64.701us      10.784us      32.768us         5.73%      32.768us       5.461us             6  
+                                              aten::add         2.31%      34.320us         3.88%      57.510us       9.585us      32.509us         5.69%      32.509us       5.418us             6  
+                                Activity Buffer Request        17.48%     259.324us        17.48%     259.324us     259.324us      23.744us         4.15%      23.744us      23.744us             1  
+                                    aten::empty_strided         2.00%      29.720us         2.00%      29.720us       4.953us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        14.68%     217.742us        14.68%     217.742us      36.290us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.50%      66.694us         5.68%      84.252us       3.511us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.18%      17.558us         1.18%      17.558us       0.732us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        15.16%     224.895us        15.16%     224.895us       4.685us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.36%       5.340us         0.36%       5.340us       5.340us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.469ms
-Self CUDA time total: 563.549us
+Self CPU time total: 1.484ms
+Self CUDA time total: 571.736us
 
 
 
@@ -4594,27 +4376,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S128_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     948.157us      1025.28%     948.157us     948.157us             1  
-                                            torch_eager        11.31%     303.890us        99.80%       2.681ms       2.681ms       0.000us         0.00%      93.597us      93.597us             1  
-                                              aten::mul         5.70%     153.152us         9.94%     267.009us      11.125us      49.696us        53.74%      49.696us       2.071us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      49.696us        53.74%      49.696us       2.071us            24  
-                                            aten::copy_         3.75%     100.883us        69.10%       1.857ms     103.143us      29.375us        31.76%      30.494us       1.694us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.592us        24.43%      22.592us       1.883us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.407us        14.50%      13.407us       1.117us            12  
-                                            aten::clone         0.85%      22.792us        66.32%       1.782ms     296.986us       0.000us         0.00%       7.902us       1.317us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.783us         7.33%       6.783us       1.131us             6  
-                                              aten::sub         1.31%      35.191us         2.17%      58.341us       9.724us       6.720us         7.27%       6.720us       1.120us             6  
-                                              aten::add         1.15%      30.820us         1.98%      53.181us       8.863us       6.687us         7.23%       6.687us       1.114us             6  
-                                Activity Buffer Request        53.95%       1.449ms        53.95%       1.449ms       1.449ms       1.119us         1.21%       1.119us       1.119us             1  
-                                    aten::empty_strided         1.15%      30.830us         1.15%      30.830us       5.138us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         9.13%     245.326us         9.13%     245.326us      40.888us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.61%      70.171us         3.31%      88.830us       3.701us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.69%      18.659us         0.69%      18.659us       0.777us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.20%     220.298us         8.20%     220.298us       4.590us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.20%       5.250us         0.20%       5.250us       5.250us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     936.155us      1011.59%     936.155us     936.155us             1  
+                                            torch_eager         9.66%     281.404us        99.82%       2.908ms       2.908ms       0.000us         0.00%      93.663us      93.663us             1  
+                                              aten::mul         5.48%     159.764us         9.36%     272.564us      11.357us      49.568us        53.56%      49.568us       2.065us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      49.568us        53.56%      49.568us       2.065us            24  
+                                            aten::copy_         3.70%     107.711us        72.25%       2.105ms     116.944us      29.407us        31.78%      30.527us       1.696us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.591us        24.41%      22.591us       1.883us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      13.568us        14.66%      13.568us       1.131us            12  
+                                            aten::clone         0.74%      21.551us        69.34%       2.020ms     336.695us       0.000us         0.00%       7.936us       1.323us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       6.816us         7.37%       6.816us       1.136us             6  
+                                              aten::sub         1.31%      38.128us         2.13%      61.912us      10.319us       6.815us         7.36%       6.815us       1.136us             6  
+                                              aten::add         1.08%      31.450us         1.84%      53.600us       8.933us       6.753us         7.30%       6.753us       1.126us             6  
+                                Activity Buffer Request        59.75%       1.741ms        59.75%       1.741ms       1.741ms       1.120us         1.21%       1.120us       1.120us             1  
+                                    aten::empty_strided         1.04%      30.170us         1.04%      30.170us       5.028us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         6.73%     196.044us         6.73%     196.044us      32.674us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.24%      65.300us         2.82%      82.022us       3.418us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.57%      16.722us         0.57%      16.722us       0.697us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.53%     219.305us         7.53%     219.305us       4.569us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.18%       5.160us         0.18%       5.160us       5.160us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.687ms
-Self CUDA time total: 92.478us
+Self CPU time total: 2.913ms
+Self CUDA time total: 92.543us
 
 
 
@@ -4624,27 +4406,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S128_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     924.823us       959.84%     924.823us     924.823us             1  
-                                            torch_eager        19.47%     279.525us        99.65%       1.430ms       1.430ms       0.000us         0.00%      97.664us      97.664us             1  
-                                              aten::mul        10.27%     147.364us        19.04%     273.370us      11.390us      51.165us        53.10%      51.165us       2.132us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      51.165us        53.10%      51.165us       2.132us            24  
-                                            aten::copy_         7.14%     102.519us        43.74%     627.869us      34.882us      30.913us        32.08%      32.225us       1.790us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      23.040us        23.91%      23.040us       1.920us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.274us        14.81%      14.274us       1.189us            12  
-                                            aten::clone         1.45%      20.838us        38.33%     550.144us      91.691us       0.000us         0.00%       9.185us       1.531us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.873us         8.17%       7.873us       1.312us             6  
-                                              aten::add         2.18%      31.279us         3.75%      53.900us       8.983us       7.137us         7.41%       7.137us       1.189us             6  
-                                              aten::sub         2.45%      35.101us         4.11%      58.931us       9.822us       7.137us         7.41%       7.137us       1.189us             6  
-                                Activity Buffer Request        15.34%     220.215us        15.34%     220.215us     220.215us       1.312us         1.36%       1.312us       1.312us             1  
-                                    aten::empty_strided         2.15%      30.891us         2.15%      30.891us       5.148us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.11%     245.545us        17.11%     245.545us      40.924us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.62%      66.322us         5.93%      85.082us       3.545us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.31%      18.760us         1.31%      18.760us       0.782us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        16.17%     232.047us        16.17%     232.047us       4.834us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.35%       5.041us         0.35%       5.041us       5.041us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     918.262us       956.86%     918.262us     918.262us             1  
+                                            torch_eager        20.02%     274.163us        99.62%       1.364ms       1.364ms       0.000us         0.00%      97.279us      97.279us             1  
+                                              aten::mul        11.52%     157.766us        19.39%     265.646us      11.069us      51.167us        53.32%      51.167us       2.132us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      51.167us        53.32%      51.167us       2.132us            24  
+                                            aten::copy_         7.76%     106.268us        42.02%     575.576us      31.976us      30.720us        32.01%      32.033us       1.780us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      22.912us        23.88%      22.912us       1.909us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      14.079us        14.67%      14.079us       1.173us            12  
+                                            aten::clone         1.48%      20.322us        36.02%     493.298us      82.216us       0.000us         0.00%       9.121us       1.520us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.808us         8.14%       7.808us       1.301us             6  
+                                              aten::sub         2.81%      38.541us         4.49%      61.481us      10.247us       7.072us         7.37%       7.072us       1.179us             6  
+                                              aten::add         2.42%      33.131us         4.04%      55.302us       9.217us       7.007us         7.30%       7.007us       1.168us             6  
+                                Activity Buffer Request        16.17%     221.544us        16.17%     221.544us     221.544us       1.313us         1.37%       1.313us       1.313us             1  
+                                    aten::empty_strided         2.33%      31.950us         2.33%      31.950us       5.325us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.69%     187.513us        13.69%     187.513us      31.252us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.61%      63.101us         5.84%      79.961us       3.332us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.23%      16.860us         1.23%      16.860us       0.702us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        15.57%     213.242us        15.57%     213.242us       4.443us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.38%       5.270us         0.38%       5.270us       5.270us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.435ms
-Self CUDA time total: 96.352us
+Self CPU time total: 1.370ms
+Self CUDA time total: 95.966us
 
 
 
@@ -4654,27 +4436,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S128_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     915.886us       880.13%     915.886us     915.886us             1  
-                                            torch_eager        19.45%     278.057us        99.65%       1.425ms       1.425ms       0.000us         0.00%     105.374us     105.374us             1  
-                                              aten::mul        10.44%     149.250us        18.09%     258.645us      10.777us      55.325us        53.17%      55.325us       2.305us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      55.325us        53.17%      55.325us       2.305us            24  
-                                            aten::copy_         7.22%     103.283us        44.53%     636.707us      35.373us      32.575us        31.30%      33.887us       1.883us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.703us        23.74%      24.703us       2.059us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.162us        15.53%      16.162us       1.347us            12  
-                                            aten::clone         1.49%      21.291us        38.97%     557.204us      92.867us       0.000us         0.00%       9.184us       1.531us             6  
-                                              aten::sub         2.42%      34.610us         4.09%      58.491us       9.749us       8.096us         7.78%       8.096us       1.349us             6  
-                                              aten::add         2.18%      31.210us         3.76%      53.710us       8.952us       8.066us         7.75%       8.066us       1.344us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.872us         7.56%       7.872us       1.312us             6  
-                                Activity Buffer Request        15.88%     227.005us        15.88%     227.005us     227.005us       1.312us         1.26%       1.312us       1.312us             1  
-                                    aten::empty_strided         2.12%      30.341us         2.12%      30.341us       5.057us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.11%     244.667us        17.11%     244.667us      40.778us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.81%      68.755us         6.12%      87.484us       3.645us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.31%      18.729us         1.31%      18.729us       0.780us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.21%     217.528us        15.21%     217.528us       4.532us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.35%       5.011us         0.35%       5.011us       5.011us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     929.528us       892.96%     929.528us     929.528us             1  
+                                            torch_eager        20.25%     278.528us        99.63%       1.370ms       1.370ms       0.000us         0.00%     105.439us     105.439us             1  
+                                              aten::mul        11.59%     159.422us        19.60%     269.583us      11.233us      55.326us        53.15%      55.326us       2.305us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      55.326us        53.15%      55.326us       2.305us            24  
+                                            aten::copy_         7.64%     105.130us        41.59%     572.021us      31.779us      32.351us        31.08%      33.695us       1.872us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.607us        23.64%      24.607us       2.051us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.418us        15.77%      16.418us       1.368us            12  
+                                            aten::clone         1.49%      20.431us        35.49%     488.057us      81.343us       0.000us         0.00%       9.088us       1.515us             6  
+                                              aten::sub         2.60%      35.723us         4.36%      59.953us       9.992us       8.258us         7.93%       8.258us       1.376us             6  
+                                              aten::add         2.46%      33.770us         4.07%      55.940us       9.323us       8.160us         7.84%       8.160us       1.360us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.744us         7.44%       7.744us       1.291us             6  
+                                Activity Buffer Request        16.10%     221.454us        16.10%     221.454us     221.454us       1.344us         1.29%       1.344us       1.344us             1  
+                                    aten::empty_strided         2.25%      30.990us         2.25%      30.990us       5.165us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.30%     182.863us        13.30%     182.863us      30.477us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.81%      66.212us         6.02%      82.825us       3.451us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.21%      16.613us         1.21%      16.613us       0.692us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        15.93%     219.135us        15.93%     219.135us       4.565us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.37%       5.090us         0.37%       5.090us       5.090us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.430ms
-Self CUDA time total: 104.062us
+Self CPU time total: 1.375ms
+Self CUDA time total: 104.095us
 
 
 
@@ -4684,27 +4466,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S128_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     926.227us       747.17%     926.227us     926.227us             1  
-                                            torch_eager        10.87%     288.725us        99.79%       2.651ms       2.651ms       0.000us         0.00%     125.755us     125.755us             1  
-                                              aten::mul         5.66%     150.315us         9.84%     261.507us      10.896us      65.119us        52.53%      65.119us       2.713us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      65.119us        52.53%      65.119us       2.713us            24  
-                                            aten::copy_         3.77%     100.152us        69.45%       1.845ms     102.495us      39.455us        31.83%      41.246us       2.291us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.928us        23.34%      28.928us       2.411us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.390us        15.64%      19.390us       1.616us            12  
-                                            aten::clone         0.89%      23.522us        66.73%       1.773ms     295.426us       0.000us         0.00%      12.318us       2.053us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.527us         8.49%      10.527us       1.755us             6  
-                                              aten::add         1.16%      30.840us         2.00%      53.221us       8.870us       9.759us         7.87%       9.759us       1.626us             6  
-                                              aten::sub         1.31%      34.853us         2.22%      58.863us       9.811us       9.631us         7.77%       9.631us       1.605us             6  
-                                Activity Buffer Request        54.50%       1.448ms        54.50%       1.448ms       1.448ms       1.791us         1.44%       1.791us       1.791us             1  
-                                    aten::empty_strided         1.16%      30.740us         1.16%      30.740us       5.123us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         8.93%     237.245us         8.93%     237.245us      39.541us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.65%      70.502us         3.36%      89.223us       3.718us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.70%      18.721us         0.70%      18.721us       0.780us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.19%     217.516us         8.19%     217.516us       4.532us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.21%       5.590us         0.21%       5.590us       5.590us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     943.134us       762.57%     943.134us     943.134us             1  
+                                            torch_eager         9.91%     288.756us        99.81%       2.907ms       2.907ms       0.000us         0.00%     125.503us     125.503us             1  
+                                              aten::mul         5.47%     159.428us         9.14%     266.247us      11.094us      65.088us        52.63%      65.088us       2.712us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      65.088us        52.63%      65.088us       2.712us            24  
+                                            aten::copy_         3.82%     111.411us        72.08%       2.100ms     116.650us      39.391us        31.85%      41.215us       2.290us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.799us        23.29%      28.799us       2.400us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.200us        15.52%      19.200us       1.600us            12  
+                                            aten::clone         0.71%      20.821us        69.14%       2.014ms     335.649us       0.000us         0.00%      12.416us       2.069us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.592us         8.56%      10.592us       1.765us             6  
+                                              aten::sub         1.35%      39.440us         2.20%      63.980us      10.663us       9.632us         7.79%       9.632us       1.605us             6  
+                                              aten::add         1.16%      33.802us         1.92%      55.961us       9.327us       9.568us         7.74%       9.568us       1.595us             6  
+                                Activity Buffer Request        59.81%       1.742ms        59.81%       1.742ms       1.742ms       1.824us         1.47%       1.824us       1.824us             1  
+                                    aten::empty_strided         1.06%      30.871us         1.06%      30.871us       5.145us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         6.32%     184.202us         6.32%     184.202us      30.700us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.20%      64.120us         2.78%      80.888us       3.370us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.58%      16.768us         0.58%      16.768us       0.699us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.39%     215.298us         7.39%     215.298us       4.485us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.19%       5.660us         0.19%       5.660us       5.660us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.656ms
-Self CUDA time total: 123.964us
+Self CPU time total: 2.913ms
+Self CUDA time total: 123.679us
 
 
 
@@ -4714,27 +4496,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S512_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     925.174us       889.06%     925.174us     925.174us             1  
-                                            torch_eager        20.56%     293.458us        99.64%       1.423ms       1.423ms       0.000us         0.00%     105.438us     105.438us             1  
-                                              aten::mul        10.42%     148.708us        18.32%     261.500us      10.896us      55.264us        53.11%      55.264us       2.303us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      55.264us        53.11%      55.264us       2.303us            24  
-                                            aten::copy_         7.08%     101.081us        43.33%     618.656us      34.370us      32.670us        31.39%      34.046us       1.891us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.703us        23.74%      24.703us       2.059us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.128us        15.50%      16.128us       1.344us            12  
-                                            aten::clone         1.49%      21.220us        38.03%     542.913us      90.485us       0.000us         0.00%       9.343us       1.557us             6  
-                                              aten::sub         2.38%      33.992us         4.03%      57.481us       9.580us       8.064us         7.75%       8.064us       1.344us             6  
-                                              aten::add         2.21%      31.510us         3.80%      54.250us       9.042us       8.064us         7.75%       8.064us       1.344us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.967us         7.66%       7.967us       1.328us             6  
-                                Activity Buffer Request        14.99%     214.036us        14.99%     214.036us     214.036us       1.376us         1.32%       1.376us       1.376us             1  
-                                    aten::empty_strided         2.13%      30.461us         2.13%      30.461us       5.077us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.05%     243.458us        17.05%     243.458us      40.576us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.68%      66.831us         5.99%      85.500us       3.562us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.31%      18.669us         1.31%      18.669us       0.778us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.35%     219.102us        15.35%     219.102us       4.565us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.36%       5.101us         0.36%       5.101us       5.101us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     926.451us       888.37%     926.451us     926.451us             1  
+                                            torch_eager        20.56%     277.090us        99.61%       1.342ms       1.342ms       0.000us         0.00%     105.599us     105.599us             1  
+                                              aten::mul        11.75%     158.363us        19.88%     267.883us      11.162us      55.423us        53.14%      55.423us       2.309us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      55.423us        53.14%      55.423us       2.309us            24  
+                                            aten::copy_         7.94%     107.035us        40.62%     547.383us      30.410us      32.352us        31.02%      33.664us       1.870us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      24.640us        23.63%      24.640us       2.053us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      16.512us        15.83%      16.512us       1.376us            12  
+                                            aten::clone         1.47%      19.840us        34.29%     462.099us      77.016us       0.000us         0.00%       9.024us       1.504us             6  
+                                              aten::sub         2.93%      39.461us         4.68%      63.054us      10.509us       8.287us         7.95%       8.287us       1.381us             6  
+                                              aten::add         2.50%      33.680us         4.16%      56.100us       9.350us       8.225us         7.89%       8.225us       1.371us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us       7.712us         7.39%       7.712us       1.285us             6  
+                                Activity Buffer Request        14.74%     198.654us        14.74%     198.654us     198.654us       1.312us         1.26%       1.312us       1.312us             1  
+                                    aten::empty_strided         2.26%      30.481us         2.26%      30.481us       5.080us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.39%     180.523us        13.39%     180.523us      30.087us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.73%      63.708us         5.98%      80.630us       3.360us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.26%      16.922us         1.26%      16.922us       0.705us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        16.08%     216.704us        16.08%     216.704us       4.515us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.39%       5.231us         0.39%       5.231us       5.231us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.428ms
-Self CUDA time total: 104.062us
+Self CPU time total: 1.348ms
+Self CUDA time total: 104.287us
 
 
 
@@ -4744,27 +4526,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S512_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     901.909us       727.35%     901.909us     901.909us             1  
-                                            torch_eager        19.87%     274.810us        99.60%       1.377ms       1.377ms       0.000us         0.00%     125.791us     125.791us             1  
-                                              aten::mul        10.85%     149.967us        18.79%     259.807us      10.825us      65.086us        52.49%      65.086us       2.712us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      65.086us        52.49%      65.086us       2.712us            24  
-                                            aten::copy_         7.46%     103.216us        42.83%     592.168us      32.898us      39.518us        31.87%      41.310us       2.295us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.862us        23.28%      28.862us       2.405us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.395us        15.64%      19.395us       1.616us            12  
-                                            aten::clone         1.61%      22.200us        37.56%     519.385us      86.564us       0.000us         0.00%      12.448us       2.075us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.656us         8.59%      10.656us       1.776us             6  
-                                              aten::add         2.23%      30.899us         3.81%      52.660us       8.777us       9.730us         7.85%       9.730us       1.622us             6  
-                                              aten::sub         2.44%      33.801us         4.13%      57.151us       9.525us       9.665us         7.79%       9.665us       1.611us             6  
-                                Activity Buffer Request        13.62%     188.345us        13.62%     188.345us     188.345us       1.792us         1.45%       1.792us       1.792us             1  
-                                    aten::empty_strided         2.34%      32.371us         2.34%      32.371us       5.395us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        17.39%     240.467us        17.39%     240.467us      40.078us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.87%      67.397us         6.22%      86.038us       3.585us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.35%      18.641us         1.35%      18.641us       0.777us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.56%     215.091us        15.56%     215.091us       4.481us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.40%       5.540us         0.40%       5.540us       5.540us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     931.662us       754.64%     931.662us     931.662us             1  
+                                            torch_eager        20.88%     278.302us        99.60%       1.328ms       1.328ms       0.000us         0.00%     125.281us     125.281us             1  
+                                              aten::mul        11.71%     156.112us        20.55%     273.936us      11.414us      65.153us        52.77%      65.153us       2.715us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      65.153us        52.77%      65.153us       2.715us            24  
+                                            aten::copy_         7.95%     105.951us        39.52%     526.779us      29.265us      39.169us        31.73%      40.993us       2.277us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      28.737us        23.28%      28.737us       2.395us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      19.135us        15.50%      19.135us       1.595us            12  
+                                            aten::clone         1.44%      19.200us        33.27%     443.406us      73.901us       0.000us         0.00%      12.256us       2.043us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      10.432us         8.45%      10.432us       1.739us             6  
+                                              aten::sub         2.81%      37.440us         4.58%      61.110us      10.185us       9.632us         7.80%       9.632us       1.605us             6  
+                                              aten::add         2.52%      33.611us         4.17%      55.611us       9.268us       9.503us         7.70%       9.503us       1.584us             6  
+                                Activity Buffer Request        13.21%     176.083us        13.21%     176.083us     176.083us       1.824us         1.48%       1.824us       1.824us             1  
+                                    aten::empty_strided         2.29%      30.570us         2.29%      30.570us       5.095us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.82%     184.192us        13.82%     184.192us      30.699us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.90%      65.274us         6.16%      82.123us       3.422us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.26%      16.849us         1.26%      16.849us       0.702us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        16.81%     224.047us        16.81%     224.047us       4.668us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.40%       5.310us         0.40%       5.310us       5.310us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.383ms
-Self CUDA time total: 123.999us
+Self CPU time total: 1.333ms
+Self CUDA time total: 123.457us
 
 
 
@@ -4774,27 +4556,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S512_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     944.661us       533.26%     944.661us     944.661us             1  
-                                            torch_eager        10.70%     284.298us        99.79%       2.652ms       2.652ms       0.000us         0.00%     180.029us     180.029us             1  
-                                              aten::mul         6.06%     161.074us        10.27%     272.980us      11.374us      94.781us        53.50%      94.781us       3.949us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      94.781us        53.50%      94.781us       3.949us            24  
-                                            aten::copy_         3.97%     105.392us        69.06%       1.835ms     101.961us      57.664us        32.55%      60.545us       3.364us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.607us        22.92%      40.607us       3.384us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.703us        13.94%      24.703us       2.059us            12  
-                                            aten::clone         0.89%      23.759us        66.19%       1.759ms     293.179us       0.000us         0.00%      19.938us       3.323us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.057us         9.63%      17.057us       2.843us             6  
-                                              aten::sub         1.37%      36.511us         2.33%      61.971us      10.329us      12.383us         6.99%      12.383us       2.064us             6  
-                                              aten::add         1.17%      31.070us         2.01%      53.400us       8.900us      12.320us         6.95%      12.320us       2.053us             6  
-                                Activity Buffer Request        53.91%       1.433ms        53.91%       1.433ms       1.433ms       2.881us         1.63%       2.881us       2.881us             1  
-                                    aten::empty_strided         1.17%      31.132us         1.17%      31.132us       5.189us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         8.85%     235.245us         8.85%     235.245us      39.208us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.64%      70.123us         3.36%      89.202us       3.717us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.72%      19.079us         0.72%      19.079us       0.795us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.35%     221.788us         8.35%     221.788us       4.621us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.21%       5.460us         0.21%       5.460us       5.460us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     944.092us       532.26%     944.092us     944.092us             1  
+                                            torch_eager         9.66%     282.874us        99.81%       2.921ms       2.921ms       0.000us         0.00%     180.253us     180.253us             1  
+                                              aten::mul         5.51%     161.402us         9.28%     271.603us      11.317us      95.040us        53.58%      95.040us       3.960us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      95.040us        53.58%      95.040us       3.960us            24  
+                                            aten::copy_         3.62%     106.065us        72.07%       2.109ms     117.193us      57.663us        32.51%      60.543us       3.364us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.703us        22.95%      40.703us       3.392us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.670us        13.91%      24.670us       2.056us            12  
+                                            aten::clone         0.77%      22.428us        69.22%       2.026ms     337.680us       0.000us         0.00%      19.840us       3.307us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.960us         9.56%      16.960us       2.827us             6  
+                                              aten::add         1.16%      34.010us         1.95%      57.150us       9.525us      12.383us         6.98%      12.383us       2.064us             6  
+                                              aten::sub         1.32%      38.563us         2.15%      62.972us      10.495us      12.287us         6.93%      12.287us       2.048us             6  
+                                Activity Buffer Request        59.97%       1.755ms        59.97%       1.755ms       1.755ms       2.880us         1.62%       2.880us       2.880us             1  
+                                    aten::empty_strided         1.05%      30.691us         1.05%      30.691us       5.115us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         6.31%     184.633us         6.31%     184.633us      30.772us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.32%      67.977us         2.88%      84.170us       3.507us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.55%      16.193us         0.55%      16.193us       0.675us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.56%     221.262us         7.56%     221.262us       4.610us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.19%       5.669us         0.19%       5.669us       5.669us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.657ms
-Self CUDA time total: 177.148us
+Self CPU time total: 2.927ms
+Self CUDA time total: 177.373us
 
 
 
@@ -4804,27 +4586,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S512_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     954.138us       321.69%     954.138us     954.138us             1  
-                                            torch_eager        11.45%     309.471us        99.80%       2.697ms       2.697ms       0.000us         0.00%     313.854us     313.854us             1  
-                                              aten::mul         5.62%     151.933us         9.84%     265.955us      11.081us     144.896us        48.85%     144.896us       6.037us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     144.896us        48.85%     144.896us       6.037us            24  
-                                            aten::copy_         3.99%     107.722us        68.69%       1.856ms     103.120us     111.039us        37.44%     128.287us       7.127us            18  
-                                            aten::clone         1.05%      28.369us        65.82%       1.779ms     296.444us       0.000us         0.00%      70.944us      11.824us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.343us        19.33%      57.343us       4.779us            12  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.696us        18.10%      53.696us       8.949us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      40.671us        13.71%      40.671us       3.389us            12  
-                                              aten::sub         1.32%      35.620us         2.23%      60.211us      10.035us      20.448us         6.89%      20.448us       3.408us             6  
-                                              aten::add         1.16%      31.420us         1.99%      53.831us       8.972us      20.223us         6.82%      20.223us       3.371us             6  
-                                Activity Buffer Request        53.66%       1.450ms        53.66%       1.450ms       1.450ms      17.248us         5.82%      17.248us      17.248us             1  
-                                    aten::empty_strided         1.25%      33.832us         1.25%      33.832us       5.639us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync         8.57%     231.556us         8.57%     231.556us      38.593us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         2.58%      69.773us         3.29%      88.953us       3.706us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.71%      19.180us         0.71%      19.180us       0.799us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel         8.44%     228.015us         8.44%     228.015us       4.750us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.20%       5.370us         0.20%       5.370us       5.370us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     956.029us       320.35%     956.029us     956.029us             1  
+                                            torch_eager        10.28%     306.488us        99.82%       2.977ms       2.977ms       0.000us         0.00%     316.194us     316.194us             1  
+                                              aten::mul         5.10%     152.001us         8.95%     266.845us      11.119us     146.560us        49.11%     146.560us       6.107us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     146.560us        49.11%     146.560us       6.107us            24  
+                                            aten::copy_         3.72%     110.901us        71.64%       2.137ms     118.718us     110.754us        37.11%     128.514us       7.140us            18  
+                                            aten::clone         0.97%      28.901us        68.99%       2.058ms     342.957us       0.000us         0.00%      70.944us      11.824us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.570us        19.29%      57.570us       4.797us            12  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.184us        17.82%      53.184us       8.864us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      41.120us        13.78%      41.120us       3.427us            12  
+                                              aten::add         1.16%      34.740us         1.93%      57.500us       9.583us      20.641us         6.92%      20.641us       3.440us             6  
+                                              aten::sub         1.34%      39.998us         2.18%      65.101us      10.850us      20.479us         6.86%      20.479us       3.413us             6  
+                                Activity Buffer Request        59.58%       1.777ms        59.58%       1.777ms       1.777ms      17.760us         5.95%      17.760us      17.760us             1  
+                                    aten::empty_strided         1.05%      31.260us         1.05%      31.260us       5.210us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         6.26%     186.663us         6.26%     186.663us      31.111us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.24%      66.809us         2.82%      84.238us       3.510us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.58%      17.429us         0.58%      17.429us       0.726us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         7.54%     224.919us         7.54%     224.919us       4.686us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.18%       5.469us         0.18%       5.469us       5.469us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.702ms
-Self CUDA time total: 296.606us
+Self CPU time total: 2.983ms
+Self CUDA time total: 298.434us
 
 
 
@@ -4834,27 +4616,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S2048_H8_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     930.130us       525.53%     930.130us     930.130us             1  
-                                            torch_eager        19.64%     282.826us        99.65%       1.435ms       1.435ms       0.000us         0.00%     179.836us     179.836us             1  
-                                              aten::mul        10.48%     150.844us        18.43%     265.387us      11.058us      94.845us        53.59%      94.845us       3.952us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      94.845us        53.59%      94.845us       3.952us            24  
-                                            aten::copy_         8.38%     120.684us        44.09%     634.887us      35.272us      57.502us        32.49%      60.350us       3.353us            18  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.478us        22.87%      40.478us       3.373us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.641us        13.92%      24.641us       2.053us            12  
-                                            aten::clone         1.49%      21.461us        38.48%     554.053us      92.342us       0.000us         0.00%      19.872us       3.312us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      17.024us         9.62%      17.024us       2.837us             6  
-                                              aten::sub         2.41%      34.731us         4.09%      58.881us       9.813us      12.353us         6.98%      12.353us       2.059us             6  
-                                              aten::add         2.13%      30.662us         3.72%      53.511us       8.919us      12.288us         6.94%      12.288us       2.048us             6  
-                                Activity Buffer Request        15.30%     220.275us        15.30%     220.275us     220.275us       2.848us         1.61%       2.848us       2.848us             1  
-                                    aten::empty_strided         2.11%      30.450us         2.11%      30.450us       5.075us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        15.99%     230.296us        15.99%     230.296us      38.383us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.74%      68.240us         6.08%      87.483us       3.645us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.34%      19.243us         1.34%      19.243us       0.802us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.64%     225.174us        15.64%     225.174us       4.691us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.35%       5.110us         0.35%       5.110us       5.110us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     916.392us       515.61%     916.392us     916.392us             1  
+                                            torch_eager        19.58%     274.201us        99.60%       1.394ms       1.394ms       0.000us         0.00%     180.610us     180.610us             1  
+                                              aten::mul        11.24%     157.371us        18.87%     264.183us      11.008us      95.074us        53.49%      95.074us       3.961us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      95.074us        53.49%      95.074us       3.961us            24  
+                                            aten::copy_         7.77%     108.775us        43.49%     608.863us      33.826us      57.825us        32.54%      60.705us       3.373us            18  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      40.897us        23.01%      40.897us       3.408us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      24.831us        13.97%      24.831us       2.069us            12  
+                                            aten::clone         1.40%      19.580us        37.38%     523.368us      87.228us       0.000us         0.00%      19.808us       3.301us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      16.928us         9.52%      16.928us       2.821us             6  
+                                              aten::add         2.38%      33.360us         4.00%      56.040us       9.340us      12.416us         6.99%      12.416us       2.069us             6  
+                                              aten::sub         2.76%      38.582us         4.39%      61.472us      10.245us      12.415us         6.99%      12.415us       2.069us             6  
+                                Activity Buffer Request        18.14%     253.955us        18.14%     253.955us     253.955us       2.880us         1.62%       2.880us       2.880us             1  
+                                    aten::empty_strided         2.13%      29.860us         2.13%      29.860us       4.977us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.38%     187.273us        13.38%     187.273us      31.212us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.53%      63.391us         5.73%      80.293us       3.346us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.21%      16.902us         1.21%      16.902us       0.704us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        15.09%     211.242us        15.09%     211.242us       4.401us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.40%       5.600us         0.40%       5.600us       5.600us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.440ms
-Self CUDA time total: 176.988us
+Self CPU time total: 1.400ms
+Self CUDA time total: 177.730us
 
 
 
@@ -4864,27 +4646,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S2048_H8_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     931.347us       313.60%     931.347us     931.347us             1  
-                                            torch_eager        20.13%     283.358us        99.65%       1.403ms       1.403ms       0.000us         0.00%     314.679us     314.679us             1  
-                                              aten::mul        10.72%     150.883us        18.79%     264.457us      11.019us     145.371us        48.95%     145.371us       6.057us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     145.371us        48.95%     145.371us       6.057us            24  
-                                            aten::copy_         7.40%     104.164us        42.97%     604.868us      33.604us     110.845us        37.32%     128.541us       7.141us            18  
-                                            aten::clone         1.53%      21.600us        37.15%     522.944us      87.157us       0.000us         0.00%      71.357us      11.893us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.184us        19.25%      57.184us       4.765us            12  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      53.661us        18.07%      53.661us       8.944us             6  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      40.767us        13.73%      40.767us       3.397us            12  
-                                              aten::add         2.28%      32.151us         3.88%      54.682us       9.114us      20.446us         6.88%      20.446us       3.408us             6  
-                                              aten::sub         2.39%      33.622us         4.06%      57.171us       9.528us      20.321us         6.84%      20.321us       3.387us             6  
-                                Activity Buffer Request        14.77%     207.975us        14.77%     207.975us     207.975us      17.696us         5.96%      17.696us      17.696us             1  
-                                    aten::empty_strided         2.15%      30.270us         2.15%      30.270us       5.045us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        16.22%     228.377us        16.22%     228.377us      38.063us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.75%      66.830us         6.13%      86.290us       3.595us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.38%      19.460us         1.38%      19.460us       0.811us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.91%     224.006us        15.91%     224.006us       4.667us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.35%       4.971us         0.35%       4.971us       4.971us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     934.618us       312.71%     934.618us     934.618us             1  
+                                            torch_eager        20.60%     280.895us        99.62%       1.358ms       1.358ms       0.000us         0.00%     316.921us     316.921us             1  
+                                              aten::mul        11.57%     157.759us        19.61%     267.373us      11.141us     146.460us        49.00%     146.460us       6.102us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     146.460us        49.00%     146.460us       6.102us            24  
+                                            aten::copy_         8.07%     110.072us        41.19%     561.700us      31.206us     111.966us        37.46%     130.013us       7.223us            18  
+                                            aten::clone         1.51%      20.600us        34.77%     474.096us      79.016us       0.000us         0.00%      72.670us      12.112us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      57.343us        19.19%      57.343us       4.779us            12  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us      54.623us        18.28%      54.623us       9.104us             6  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      40.448us        13.53%      40.448us       3.371us            12  
+                                              aten::add         2.59%      35.260us         4.22%      57.590us       9.598us      20.288us         6.79%      20.288us       3.381us             6  
+                                              aten::sub         2.60%      35.410us         4.30%      58.621us       9.770us      20.160us         6.75%      20.160us       3.360us             6  
+                                Activity Buffer Request        14.73%     200.853us        14.73%     200.853us     200.853us      18.047us         6.04%      18.047us      18.047us             1  
+                                    aten::empty_strided         2.18%      29.660us         2.18%      29.660us       4.943us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.85%     188.823us        13.85%     188.823us      31.471us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.75%      64.754us         6.01%      81.922us       3.413us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.26%      17.168us         1.26%      17.168us       0.715us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        15.92%     217.107us        15.92%     217.107us       4.523us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.38%       5.180us         0.38%       5.180us       5.180us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.408ms
-Self CUDA time total: 296.983us
+Self CPU time total: 1.364ms
+Self CUDA time total: 298.874us
 
 
 
@@ -4894,27 +4676,27 @@ PROFILE TRACE: torch_eager | cuda_B2_S2048_H32_D64_R32
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     931.511us       159.85%     931.511us     931.511us             1  
-                                            torch_eager        19.89%     283.237us        99.62%       1.419ms       1.419ms       0.000us         0.00%     606.457us     606.457us             1  
-                                            aten::copy_         7.21%     102.593us        43.52%     619.697us      34.428us     267.708us        45.94%     291.419us      16.190us            18  
-                                              aten::mul        10.56%     150.425us        18.55%     264.165us      11.007us     249.406us        42.80%     249.406us      10.392us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     249.406us        42.80%     249.406us      10.392us            24  
-                                            aten::clone         1.52%      21.631us        38.04%     541.603us      90.267us       0.000us         0.00%     201.277us      33.546us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     177.566us        30.47%     177.566us      29.594us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      90.142us        15.47%      90.142us       7.512us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      65.632us        11.26%      65.632us       5.469us            12  
-                                              aten::add         2.16%      30.762us         3.77%      53.662us       8.944us      32.832us         5.63%      32.832us       5.472us             6  
-                                              aten::sub         2.53%      36.013us         4.23%      60.192us      10.032us      32.800us         5.63%      32.800us       5.467us             6  
-                                Activity Buffer Request        14.90%     212.145us        14.90%     212.145us     212.145us      23.711us         4.07%      23.711us      23.711us             1  
-                                    aten::empty_strided         2.14%      30.440us         2.14%      30.440us       5.073us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        16.99%     241.846us        16.99%     241.846us      40.308us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         4.71%      67.093us         6.00%      85.482us       3.562us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         1.29%      18.389us         1.29%      18.389us       0.766us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        15.73%     223.932us        15.73%     223.932us       4.665us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize         0.38%       5.360us         0.38%       5.360us       5.360us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us     956.919us       161.50%     956.919us     956.919us             1  
+                                            torch_eager        21.30%     289.504us        99.57%       1.353ms       1.353ms       0.000us         0.00%     616.281us     616.281us             1  
+                                            aten::copy_         7.84%     106.532us        38.89%     528.548us      29.364us     278.013us        46.92%     301.788us      16.766us            18  
+                                              aten::mul        11.95%     162.407us        20.79%     282.469us      11.770us     248.703us        41.97%     248.703us      10.363us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     248.703us        41.97%     248.703us      10.363us            24  
+                                            aten::clone         1.53%      20.799us        32.73%     444.735us      74.123us       0.000us         0.00%     210.204us      35.034us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     186.429us        31.46%     186.429us      31.072us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      91.584us        15.46%      91.584us       7.632us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      65.790us        11.10%      65.790us       5.483us            12  
+                                              aten::add         2.44%      33.161us         4.08%      55.501us       9.250us      32.927us         5.56%      32.927us       5.488us             6  
+                                              aten::sub         2.95%      40.030us         4.74%      64.440us      10.740us      32.863us         5.55%      32.863us       5.477us             6  
+                                Activity Buffer Request        13.07%     177.663us        13.07%     177.663us     177.663us      23.775us         4.01%      23.775us      23.775us             1  
+                                    aten::empty_strided         2.15%      29.270us         2.15%      29.270us       4.878us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync        13.63%     185.172us        13.63%     185.172us      30.862us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         4.83%      65.662us         6.08%      82.660us       3.444us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         1.25%      16.998us         1.25%      16.998us       0.708us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel        16.63%     225.993us        16.63%     225.993us       4.708us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize         0.43%       5.780us         0.43%       5.780us       5.780us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 1.424ms
-Self CUDA time total: 582.746us
+Self CPU time total: 1.359ms
+Self CUDA time total: 592.506us
 
 
 
@@ -4924,59 +4706,105 @@ PROFILE TRACE: torch_eager | cuda_B2_S2048_H32_D128_R64
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                    Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-                                            torch_eager        13.84%     306.170us        64.60%       1.429ms       1.429ms       0.000us         0.00%       1.835ms       1.835ms             1  
-                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.808ms       102.17%       1.808ms       1.808ms             1  
-                                            aten::copy_         5.17%     114.346us        26.90%     594.995us      33.055us     791.984us        44.77%     858.095us      47.672us            18  
-                                              aten::mul         6.78%     150.032us        12.17%     269.044us      11.210us     828.790us        46.85%     828.790us      34.533us            24  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     828.790us        46.85%     828.790us      34.533us            24  
-                                            aten::clone         1.04%      23.090us        22.74%     502.934us      83.822us       0.000us         0.00%     626.230us     104.372us             6  
-                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     560.119us        31.66%     560.119us      93.353us             6  
-void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     231.865us        13.11%     231.865us      19.322us            12  
-void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     148.413us         8.39%     148.413us      12.368us            12  
-                                              aten::sub         1.69%      37.309us         2.75%      60.900us      10.150us      90.142us         5.10%      90.142us      15.024us             6  
-                                Activity Buffer Request         8.38%     185.324us         8.38%     185.324us     185.324us      66.111us         3.74%      66.111us      66.111us             1  
-                                              aten::add         1.41%      31.181us         2.49%      55.022us       9.170us      58.271us         3.29%      58.271us       9.712us             6  
-                                    aten::empty_strided         1.45%      31.982us         1.45%      31.982us       5.330us       0.000us         0.00%       0.000us       0.000us             6  
-                                        cudaMemcpyAsync        10.29%     227.584us        10.29%     227.584us      37.931us       0.000us         0.00%       0.000us       0.000us             6  
-                                            aten::slice         3.11%      68.695us         3.96%      87.553us       3.648us       0.000us         0.00%       0.000us       0.000us            24  
-                                       aten::as_strided         0.85%      18.858us         0.85%      18.858us       0.786us       0.000us         0.00%       0.000us       0.000us            24  
-                                       cudaLaunchKernel        10.59%     234.185us        10.59%     234.185us       4.879us       0.000us         0.00%       0.000us       0.000us            48  
-                                  cudaDeviceSynchronize        35.40%     782.770us        35.40%     782.770us     782.770us       0.000us         0.00%       0.000us       0.000us             1  
+                                            torch_eager        12.69%     276.287us        61.52%       1.340ms       1.340ms       0.000us         0.00%       1.863ms       1.863ms             1  
+                                            torch_eager         0.00%       0.000us         0.00%       0.000us       0.000us       1.835ms       102.22%       1.835ms       1.835ms             1  
+                                            aten::copy_         5.01%     109.060us        24.98%     544.137us      30.230us     806.007us        44.89%     873.590us      48.533us            18  
+                                              aten::mul         7.11%     154.844us        12.06%     262.604us      10.942us     842.615us        46.93%     842.615us      35.109us            24  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     842.615us        46.93%     842.615us      35.109us            24  
+                                            aten::clone         1.01%      22.000us        21.12%     459.916us      76.653us       0.000us         0.00%     622.361us     103.727us             6  
+                         Memcpy DtoD (Device -> Device)         0.00%       0.000us         0.00%       0.000us       0.000us     554.778us        30.90%     554.778us      92.463us             6  
+void at::native::elementwise_kernel<128, 4, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     251.229us        13.99%     251.229us      20.936us            12  
+void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     146.939us         8.18%     146.939us      12.245us            12  
+                                              aten::sub         1.90%      41.421us         3.00%      65.411us      10.902us      88.573us         4.93%      88.573us      14.762us             6  
+                                Activity Buffer Request         8.49%     184.983us         8.49%     184.983us     184.983us      67.583us         3.76%      67.583us      67.583us             1  
+                                              aten::add         1.54%      33.561us         2.59%      56.461us       9.410us      58.366us         3.25%      58.366us       9.728us             6  
+                                    aten::empty_strided         1.42%      30.960us         1.42%      30.960us       5.160us       0.000us         0.00%       0.000us       0.000us             6  
+                                        cudaMemcpyAsync         8.70%     189.543us         8.70%     189.543us      31.591us       0.000us         0.00%       0.000us       0.000us             6  
+                                            aten::slice         2.99%      65.113us         3.77%      82.061us       3.419us       0.000us         0.00%       0.000us       0.000us            24  
+                                       aten::as_strided         0.78%      16.948us         0.78%      16.948us       0.706us       0.000us         0.00%       0.000us       0.000us            24  
+                                       cudaLaunchKernel         9.88%     215.201us         9.88%     215.201us       4.483us       0.000us         0.00%       0.000us       0.000us            48  
+                                  cudaDeviceSynchronize        38.48%     838.063us        38.48%     838.063us     838.063us       0.000us         0.00%       0.000us       0.000us             1  
 -------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
-Self CPU time total: 2.212ms
-Self CUDA time total: 1.769ms
+Self CPU time total: 2.178ms
+Self CUDA time total: 1.796ms
 
 
 impl                     wl                  p50(ms)  ok
-torch_eager              cuda_B1_S128_H32_D128_R64     0.21  True
-torch_eager              cuda_B1_S128_H32_D64_R32     0.22  True
-torch_eager              cuda_B1_S128_H8_D128_R64     0.22  True
-torch_eager              cuda_B1_S128_H8_D64_R32     0.17  True
-torch_eager              cuda_B1_S2048_H32_D128_R64     0.22  True
-torch_eager              cuda_B1_S2048_H32_D64_R32     0.21  True
-torch_eager              cuda_B1_S2048_H8_D128_R64     0.21  True
+torch_eager              cuda_B1_S128_H32_D128_R64     0.22  True
+torch_eager              cuda_B1_S128_H32_D64_R32     0.23  True
+torch_eager              cuda_B1_S128_H8_D128_R64     0.23  True
+torch_eager              cuda_B1_S128_H8_D64_R32     0.18  True
+torch_eager              cuda_B1_S2048_H32_D128_R64     0.23  True
+torch_eager              cuda_B1_S2048_H32_D64_R32     0.22  True
+torch_eager              cuda_B1_S2048_H8_D128_R64     0.22  True
 torch_eager              cuda_B1_S2048_H8_D64_R32     0.22  True
 torch_eager              cuda_B1_S512_H32_D128_R64     0.22  True
-torch_eager              cuda_B1_S512_H32_D64_R32     0.21  True
-torch_eager              cuda_B1_S512_H8_D128_R64     0.21  True
-torch_eager              cuda_B1_S512_H8_D64_R32     0.21  True
-torch_eager              cuda_B2_S128_H32_D128_R64     0.21  True
+torch_eager              cuda_B1_S512_H32_D64_R32     0.22  True
+torch_eager              cuda_B1_S512_H8_D128_R64     0.23  True
+torch_eager              cuda_B1_S512_H8_D64_R32     0.23  True
+torch_eager              cuda_B2_S128_H32_D128_R64     0.22  True
 torch_eager              cuda_B2_S128_H32_D64_R32     0.22  True
-torch_eager              cuda_B2_S128_H8_D128_R64     0.21  True
+torch_eager              cuda_B2_S128_H8_D128_R64     0.22  True
 torch_eager              cuda_B2_S128_H8_D64_R32     0.22  True
-torch_eager              cuda_B2_S2048_H32_D128_R64     0.64  True
+torch_eager              cuda_B2_S2048_H32_D128_R64     0.65  True
 torch_eager              cuda_B2_S2048_H32_D64_R32     0.23  True
 torch_eager              cuda_B2_S2048_H8_D128_R64     0.22  True
 torch_eager              cuda_B2_S2048_H8_D64_R32     0.22  True
 torch_eager              cuda_B2_S512_H32_D128_R64     0.22  True
-torch_eager              cuda_B2_S512_H32_D64_R32     0.22  True
-torch_eager              cuda_B2_S512_H8_D128_R64     0.21  True
-torch_eager              cuda_B2_S512_H8_D64_R32     0.21  True
+torch_eager              cuda_B2_S512_H32_D64_R32     0.23  True
+torch_eager              cuda_B2_S512_H8_D128_R64     0.22  True
+torch_eager              cuda_B2_S512_H8_D64_R32     0.23  True
 
▶ UV Install Logs
diff --git a/rotary/index.html b/rotary/index.html index 633090ff5b9faf18aceca8d5ed9307277253f7a1..4f5e8ac6442391b00b79ef9df4bab278fb32299a 100644 --- a/rotary/index.html +++ b/rotary/index.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
diff --git a/rotary/results/artifacts/combine/latency.svg b/rotary/results/artifacts/combine/latency.svg index 36f9217b1247fd55602a048202775fbf3d19cd24..c55182f3c42cf6272b118c30e08ae838ff8b6108 100644 --- a/rotary/results/artifacts/combine/latency.svg +++ b/rotary/results/artifacts/combine/latency.svg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1df6ff7a8f4a24eba95824695c07fcf25601f7f648a0a0773f7d1bc7119d9fd2 -size 37849 +oid sha256:0a4f1b049a53cce2974edf15a23c4345c4b61ed101112698c1678c755429abdb +size 37863 diff --git a/rotary/results/combined_results.html b/rotary/results/combined_results.html index 28fa630c24a01c9c2557497761cffaee1e2da610..e2efc2f488fab6169c686e3eb4f6d73a7e868a22 100644 --- a/rotary/results/combined_results.html +++ b/rotary/results/combined_results.html @@ -57,8 +57,12 @@ // For HTML files, navigate within iframe if (fullPath.endsWith('.html') || fullPath.endsWith('/')) { const pathParts = fullPath.split('/').filter(p => p); - const targetFile = pathParts[pathParts.length - 1] || 'index.html'; - window.location.href = targetFile; + // remove "#/" prefix if present + let iframePath = fullPath; + if (iframePath.startsWith('#/')) { + iframePath = iframePath.slice(2); + } + window.location.href = "/" + iframePath; } else { // For non-HTML files (raw .py, etc), open directly window.open(href, '_blank'); @@ -99,16 +103,16 @@ --bg-error: #fdf2f2; --bg-artifact: #e6f3ff; --bg-artifact-hover: #d0e7ff; - + --text-primary: #333; --text-secondary: #656d76; --text-error: #c53030; --text-link: #0969da; - + --border-primary: #e1e5e9; --border-error: #e53e3e; --border-cell-failed: #d73a49; - + --shadow: rgba(0, 0, 0, 0.1); } @@ -120,32 +124,26 @@ --bg-error: #1a0f0f; --bg-artifact: #151515; --bg-artifact-hover: #1a1a1a; - + --text-primary: #e0e0e0; --text-secondary: #888888; --text-error: #ff6b6b; --text-link: #64b5f6; - + --border-primary: #2a2a2a; --border-error: #ff6b6b; --border-cell-failed: #ff6b6b; - + --shadow: rgba(255, 255, 255, 0.05); } - /* Monocolor UI theme: black/white background, all text/borders single blue */ -:root[data-ui="monocolor"] { - --mono-color: #0a66ff; -} - +:root[data-ui="monocolor"] { --mono-color: #0a66ff; } :root[data-ui="monocolor"][data-theme="light"] { --bg-primary: #ffffff; } - :root[data-ui="monocolor"][data-theme="dark"] { --bg-primary: #000000; } - :root[data-ui="monocolor"] { --bg-secondary: var(--bg-primary); --bg-tertiary: var(--bg-primary); @@ -165,76 +163,25 @@ --shadow: none; } - -:root[data-ui="monocolor"] a { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] a { color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button, :root[data-ui="monocolor"] .theme-toggle, :root[data-ui="monocolor"] .reset-toggle, -:root[data-ui="monocolor"] .back-button { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .back-button { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } :root[data-ui="monocolor"] .menu-button:hover, :root[data-ui="monocolor"] .theme-toggle:hover, :root[data-ui="monocolor"] .reset-toggle:hover, -:root[data-ui="monocolor"] .back-button:hover { - background: var(--bg-primary); - color: var(--mono-color); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .menu-dropdown { - background: var(--bg-primary); - border-color: var(--mono-color); - box-shadow: none; -} - -:root[data-ui="monocolor"] .menu-item { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .system-info { - background: var(--bg-primary); - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .cell { - border-color: var(--mono-color); - background: var(--bg-primary); -} - -:root[data-ui="monocolor"] .cell-header { - background: var(--bg-primary); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .artifact:hover { - background: var(--bg-primary); -} - +:root[data-ui="monocolor"] .back-button:hover { background: var(--bg-primary); color: var(--mono-color); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .menu-dropdown { background: var(--bg-primary); border-color: var(--mono-color); box-shadow: none; } +:root[data-ui="monocolor"] .menu-item { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .system-info { background: var(--bg-primary); border-color: var(--mono-color); } +:root[data-ui="monocolor"] .cell { border-color: var(--mono-color); background: var(--bg-primary); } +:root[data-ui="monocolor"] .cell-header { background: var(--bg-primary); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .artifact:hover { background: var(--bg-primary); } :root[data-ui="monocolor"] .artifact-preview img, -:root[data-ui="monocolor"] .artifact-preview svg { - border-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .status-widget { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .artifact-preview svg { border-color: var(--mono-color); } +:root[data-ui="monocolor"] .status-widget { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } :root[data-ui="monocolor"] .minimap, :root[data-ui="monocolor"] .file-explorer, :root[data-ui="monocolor"] .tools-widget { @@ -242,54 +189,23 @@ border-color: var(--mono-color); color: var(--mono-color); } - :root[data-ui="monocolor"] .cell-code { background: var(--bg-primary); border-bottom-color: var(--mono-color); } - :root[data-ui="monocolor"] .tools-title, :root[data-ui="monocolor"] .file-explorer-section-title, -:root[data-ui="monocolor"] .minimap-title { - color: var(--mono-color); - border-bottom-color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button { - background: var(--bg-primary); - border-color: var(--mono-color); - color: var(--mono-color); -} - -:root[data-ui="monocolor"] .tool-button.active { - border-color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-title { color: var(--mono-color); border-bottom-color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button { background: var(--bg-primary); border-color: var(--mono-color); color: var(--mono-color); } +:root[data-ui="monocolor"] .tool-button.active { border-color: var(--mono-color); } :root[data-ui="monocolor"] .file-explorer-item, -:root[data-ui="monocolor"] .minimap-item { - color: var(--mono-color); -} - +:root[data-ui="monocolor"] .minimap-item { color: var(--mono-color); } /* Force Pygments code to mono blue on mono bg */ -:root[data-ui="monocolor"] .highlight { - background: var(--bg-primary) !important; - color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight { background: var(--bg-primary) !important; color: var(--mono-color) !important; } :root[data-ui="monocolor"] .highlight *, -:root[data-ui="monocolor"] .highlight .hll { - color: var(--mono-color) !important; - background: transparent !important; - border-color: var(--mono-color) !important; -} - +:root[data-ui="monocolor"] .highlight .hll { color: var(--mono-color) !important; background: transparent !important; border-color: var(--mono-color) !important; } /* Default code font + metrics (overridable via frontmatter) */ -:root { - --code-font-size: 0.95rem; - --code-line-height: 1.5; - --code-pad-y: 0.75rem; -} - +:root { --code-font-size: 0.95rem; --code-line-height: 1.5; --code-pad-y: 0.75rem; } /* Minimal UI theme overrides base variables for a flatter, 90s look */ :root[data-ui="none"] { --bg-primary: #ffffff; @@ -311,11 +227,9 @@ --shadow: none; } - html { overscroll-behavior: none; } - body { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; line-height: 1.4; @@ -327,7 +241,6 @@ body { transition: background-color 0.2s ease, color 0.2s ease; overscroll-behavior: none; } - /* Minimal "none" UI theme overrides */ :root[data-ui="none"] body { font-family: 'Times New Roman', Times, serif; @@ -351,11 +264,7 @@ body { gap: 0.25rem; z-index: 1000; } - -.controls-buttons { - display: flex; - gap: 0.5rem; -} +.controls-buttons { display: flex; gap: 0.5rem; } .menu-button { position: relative; @@ -369,7 +278,6 @@ body { font-size: 0.9rem; user-select: none; } - /* Keep default control styling when widgets are enabled, even in minimal UI */ :root[data-ui="none"][data-widgets="on"] .menu-button, :root[data-ui="none"][data-widgets="on"] .theme-toggle, @@ -384,7 +292,6 @@ body { color: var(--text-primary); background: var(--bg-tertiary); } - /* Controls state indicator (top-right) */ /* Status widget (bottom-right) */ .status-widget { @@ -401,17 +308,8 @@ body { color: var(--text-secondary); z-index: 100; } - -.status-widget strong { - color: var(--text-primary); -} - -:root[data-ui="none"][data-widgets="on"] .status-widget { - background: #f6f6f6; - border-color: #ccc; - color: #222; -} - +.status-widget strong { color: var(--text-primary); } +:root[data-ui="none"][data-widgets="on"] .status-widget { background: #f6f6f6; border-color: #ccc; color: #222; } :root[data-ui="none"][data-widgets="on"] .menu-button:hover, :root[data-ui="none"][data-widgets="on"] .theme-toggle:hover, :root[data-ui="none"][data-widgets="on"] .reset-toggle:hover, @@ -437,12 +335,7 @@ body { z-index: 1001; margin-top: 4px; } - -:root[data-ui="none"][data-widgets="on"] .menu-dropdown { - background: #ffffff; - border: 1px solid #cccccc; - box-shadow: none; -} +:root[data-ui="none"][data-widgets="on"] .menu-dropdown { background: #ffffff; border: 1px solid #cccccc; box-shadow: none; } .menu-button.active .menu-dropdown { opacity: 1; @@ -459,11 +352,7 @@ body { border-bottom: 1px solid var(--border-primary); cursor: pointer; } - -:root[data-ui="none"] .menu-item { - color: #000; - border-bottom: 1px solid #eee; -} +:root[data-ui="none"] .menu-item { color: #000; border-bottom: 1px solid #eee; } .menu-item:last-child { border-bottom: none; @@ -527,8 +416,7 @@ body { font-family: monospace; } -.theme-toggle, -.reset-toggle { +.theme-toggle, .reset-toggle { background: var(--bg-secondary); border: 1px solid var(--border-primary); border-radius: 2px; @@ -543,8 +431,7 @@ body { letter-spacing: 0; } -.theme-toggle:hover, -.reset-toggle:hover { +.theme-toggle:hover, .reset-toggle:hover { background: var(--bg-tertiary); border-color: var(--text-secondary); color: var(--text-primary); @@ -566,20 +453,16 @@ body { opacity: 0.9; transition: opacity 0.2s ease; } - /* Hide widgets and controls when disabled via frontmatter */ :root[data-widgets="off"] .controls, :root[data-widgets="off"] .minimap, :root[data-widgets="off"] .file-explorer, :root[data-widgets="off"] .tools-widget, -:root[data-widgets="off"] .status-widget { - display: none !important; -} +:root[data-widgets="off"] .status-widget { display: none !important; } .file-explorer { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -603,18 +486,15 @@ body { left: 0; width: 100vw; height: 100vh; - z-index: 80; - /* under widgets (100) and controls (1000) */ + z-index: 80; /* under widgets (100) and controls (1000) */ display: block; - pointer-events: none; - /* enabled only when a tool is active */ + pointer-events: none; /* enabled only when a tool is active */ } /* Tools widget */ .tools-widget { position: fixed; - bottom: 20px; - /* default; JS will stack */ + bottom: 20px; /* default; JS will stack */ right: 20px; left: auto; top: auto; @@ -627,7 +507,6 @@ body { z-index: 100; opacity: 0.95; } - .tools-title { font-weight: bold; color: var(--text-secondary); @@ -637,13 +516,7 @@ body { cursor: grab; user-select: none; } - -.tools-row { - display: flex; - gap: 0.4rem; - flex-wrap: wrap; -} - +.tools-row { display: flex; gap: 0.4rem; flex-wrap: wrap; } .tool-button { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -655,19 +528,10 @@ body { font-size: 0.75rem; user-select: none; } +.tool-button:hover { color: var(--text-primary); } +.tool-button.active { color: var(--text-primary); border-color: var(--text-secondary); background: var(--bg-secondary); } -.tool-button:hover { - color: var(--text-primary); -} - -.tool-button.active { - color: var(--text-primary); - border-color: var(--text-secondary); - background: var(--bg-secondary); -} - -.minimap:hover, -.file-explorer:hover { +.minimap:hover, .file-explorer:hover { opacity: 1; } @@ -677,8 +541,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -707,29 +570,12 @@ body { font-weight: normal; } -.minimap-heading.h1 { - padding-left: 0.5rem; -} - -.minimap-heading.h2 { - padding-left: 1rem; -} - -.minimap-heading.h3 { - padding-left: 1.5rem; -} - -.minimap-heading.h4 { - padding-left: 2rem; -} - -.minimap-heading.h5 { - padding-left: 2.5rem; -} - -.minimap-heading.h6 { - padding-left: 3rem; -} +.minimap-heading.h1 { padding-left: 0.5rem; } +.minimap-heading.h2 { padding-left: 1rem; } +.minimap-heading.h3 { padding-left: 1.5rem; } +.minimap-heading.h4 { padding-left: 2rem; } +.minimap-heading.h5 { padding-left: 2.5rem; } +.minimap-heading.h6 { padding-left: 3rem; } .minimap-cell { color: var(--text-link); @@ -747,8 +593,7 @@ body { margin-bottom: 0.5rem; padding-bottom: 0.25rem; border-bottom: 1px solid var(--border-primary); - cursor: grab; - /* drag handle */ + cursor: grab; /* drag handle */ user-select: none; } @@ -792,10 +637,7 @@ body { /* Hide widgets on smaller screens */ @media (max-width: 768px) { - - .minimap, - .file-explorer, - .tools-widget { + .minimap, .file-explorer, .tools-widget { display: none; } } @@ -807,13 +649,7 @@ body { overflow: hidden; background: var(--bg-secondary); } - -:root[data-ui="none"] .cell { - margin: 1em 0; - border: none; - background: transparent; -} - +:root[data-ui="none"] .cell { margin: 1em 0; border: none; background: transparent; } .cell-header { background: var(--bg-secondary); padding: 0.5rem 1rem; @@ -821,72 +657,39 @@ body { font-family: inherit; font-size: 0.85rem; } - -:root[data-ui="none"] .cell-header { - background: transparent; - border: none; - padding: 0; - font-weight: bold; -} - -:root[data-ui="none"] .cell-content { - padding: 0; -} - +:root[data-ui="none"] .cell-header { background: transparent; border: none; padding: 0; font-weight: bold; } +:root[data-ui="none"] .cell-content { padding: 0; } :root[data-ui="none"] .copy-button, :root[data-ui="none"] .collapse-indicators, :root[data-ui="none"] .cell-meta, -:root[data-ui="none"] .cell-outputs-header { - display: none !important; -} - -:root[data-ui="none"] pre, -:root[data-ui="none"] code { - font-family: Menlo, Monaco, 'Courier New', monospace; -} - -:root[data-ui="none"] .code-content pre { - background: #f9f9f9; - border: 1px solid #ddd; - padding: 8px; -} - -:root[data-ui="none"] .output { - background: transparent; - border: none; - padding: 0.25em 0; -} - -color: var(--text-secondary); -cursor: pointer; -user-select: none; -transition: background-color 0.2s ease; +:root[data-ui="none"] .cell-outputs-header { display: none !important; } +:root[data-ui="none"] pre, :root[data-ui="none"] code { font-family: Menlo, Monaco, 'Courier New', monospace; } +:root[data-ui="none"] .code-content pre { background: #f9f9f9; border: 1px solid #ddd; padding: 8px; } +:root[data-ui="none"] .output { background: transparent; border: none; padding: 0.25em 0; } + color: var(--text-secondary); + cursor: pointer; + user-select: none; + transition: background-color 0.2s ease; } - .cell-header:hover { background: var(--bg-tertiary); } - .collapse-indicators { color: var(--text-secondary); font-size: 0.8rem; opacity: 0.7; } - .collapse-indicators span:hover { color: var(--text-primary); opacity: 1; } - .cell-code { display: block; background: var(--bg-code); } - .cell-code.collapsed { display: none; } - .cell-code pre { margin: 0; padding: 0.75rem; @@ -894,17 +697,14 @@ transition: background-color 0.2s ease; overflow-x: auto; color: var(--text-primary); } - .cell-output { padding: 0.75rem; /* background: var(--bg-primary); */ background: var(--bg-secondary); } - .cell-output.collapsed { display: none; } - .cell-stdout { background: var(--bg-tertiary); padding: 0.75rem; @@ -925,21 +725,15 @@ transition: background-color 0.2s ease; color: var(--text-primary); /* key bits */ - overflow: auto; - /* show scrollbars when needed */ - max-width: 100%; - /* respects whatever layout width you give it */ + overflow: auto; /* show scrollbars when needed */ + max-width: 100%; /* respects whatever layout width you give it */ } .cell-stdout .stdout-text { - margin: 0; - /* reset pre default margin */ - white-space: pre; - /* keep line breaks, NO wrapping */ - display: inline-block; - /* shrink-to-content */ - min-width: max-content; - /* allow very long lines to define intrinsic width */ + margin: 0; /* reset pre default margin */ + white-space: pre; /* keep line breaks, NO wrapping */ + display: inline-block; /* shrink-to-content */ + min-width: max-content; /* allow very long lines to define intrinsic width */ font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; tab-size: 2; } @@ -954,11 +748,9 @@ transition: background-color 0.2s ease; color: var(--text-error); white-space: pre-wrap; } - .uv-install-logs { margin: 0.5rem 0; } - .uv-logs-header { cursor: pointer; padding: 0.75rem; @@ -968,7 +760,6 @@ transition: background-color 0.2s ease; color: var(--text-secondary); user-select: none; } - .uv-logs-content { background: var(--bg-secondary); padding: 1rem; @@ -979,17 +770,14 @@ transition: background-color 0.2s ease; color: var(--text-secondary); overflow-x: auto; } - .cell-artifacts { margin: 1rem 0; } - .cell-artifacts h4 { margin: 0 0 0.5rem 0; color: var(--text-secondary); font-size: 0.9rem; } - .artifact { display: inline-block; background: var(--bg-artifact); @@ -1003,22 +791,18 @@ transition: background-color 0.2s ease; transition: background-color 0.2s ease; border: 1px solid var(--border-primary); } - .artifact:hover { background: var(--bg-artifact-hover); } - .artifact-preview { margin-top: 1rem; } - .artifact-preview img { max-width: 100%; height: auto; border: 1px solid var(--border-primary); border-radius: 1px; } - .artifact-preview svg { max-width: 100%; height: auto; @@ -1026,33 +810,27 @@ transition: background-color 0.2s ease; border-radius: 1px; display: block; } - /* Style SVG text elements */ .artifact-preview svg g { fill: var(--text-primary) !important; } - /* Auto-theme SVG elements */ .artifact-preview svg { background: transparent; } - /* Invert SVG images in dark mode */ :root[data-theme="dark"] .artifact-preview img[src$=".svg"] { filter: invert(0.9) hue-rotate(180deg); } - /* Keep SVG images readable in monocolor mode */ :root[data-ui="monocolor"] .artifact-preview img[src$=".svg"] { filter: none; } - /* CSV table styling */ .artifact-csv { margin-top: 1rem; overflow-x: auto; } - .csv-table { width: 100%; border-collapse: collapse; @@ -1061,24 +839,20 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-primary); border-radius: 1px; } - .csv-table th, .csv-table td { padding: 0.5rem 0.75rem; text-align: left; border: 1px solid var(--border-primary); } - .csv-table th { background: var(--bg-tertiary); font-weight: 600; color: var(--text-primary); } - .csv-table tbody tr:hover { background: var(--bg-artifact-hover); } - .artifact-csv-error { margin-top: 1rem; padding: 1rem; @@ -1087,27 +861,22 @@ transition: background-color 0.2s ease; border: 1px solid var(--border-error); border-radius: 1px; } - .cell-failed { border-color: var(--border-cell-failed); } - .cell-failed .cell-header { background: var(--bg-error); color: var(--text-error); } - .cell-commented { opacity: 0.6; border-style: dashed; } - .cell-commented .cell-header { background: var(--bg-secondary); color: var(--text-secondary); font-style: italic; } - .run-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1119,17 +888,14 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .run-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .run-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1141,24 +907,20 @@ transition: background-color 0.2s ease; font-family: inherit; margin-left: 4px; } - .copy-btn:hover { color: var(--text-primary); background: var(--bg-primary); } - .copy-btn:disabled { opacity: 0.6; cursor: not-allowed; } - .copy-btn.copied { color: #4caf50; background: var(--bg-primary); border-color: #4caf50; transition: all 0.2s ease; } - .raw-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1172,13 +934,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .raw-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .github-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1192,13 +952,11 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .github-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .hf-btn { background: var(--bg-tertiary); border: 1px solid var(--border-primary); @@ -1212,18 +970,15 @@ transition: background-color 0.2s ease; text-decoration: none; display: inline-block; } - .hf-btn:hover { color: var(--text-primary); background: var(--bg-primary); text-decoration: none; } - .output-stale { opacity: 0.5; position: relative; } - .output-stale::after { content: '⏳ updating...'; position: absolute; @@ -1236,77 +991,41 @@ transition: background-color 0.2s ease; color: var(--text-secondary); border: 1px solid var(--border-primary); } - -h1, -h2, -h3, -h4, -h5, -h6 { +h1, h2, h3, h4, h5, h6 { margin-top: 1.5rem; margin-bottom: 0.75rem; color: var(--text-primary); } - h1 { margin-top: 0; margin-bottom: 1rem; } - p { margin: 0.75rem 0; color: var(--text-primary); } - a { color: var(--text-link); } - img { max-width: 100%; height: auto; border-radius: 1px; box-shadow: none; } - -pre, -code { +pre, code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace; font-size: var(--code-font-size); } - -.code-wrap { - position: relative; -} - -.code-line-highlight { - display: none; - position: absolute; - left: 0; - right: 0; - height: 1.5em; - background: rgba(255, 235, 170, 0.35); - pointer-events: none; - border-left: 3px solid #f4c542; -} - -.line-number { - cursor: pointer; - text-decoration: none; - color: var(--text-secondary); - padding: 0 0.25rem; -} - -.line-number.selected { - background: rgba(255, 235, 170, 0.4); - color: var(--text-primary); -} +.code-wrap { position: relative; } +.code-line-highlight { display: none; position: absolute; left: 0; right: 0; height: 1.5em; background: rgba(255, 235, 170, 0.35); pointer-events: none; border-left: 3px solid #f4c542; } +.line-number { cursor: pointer; text-decoration: none; color: var(--text-secondary); padding: 0 0.25rem; } +.line-number.selected { background: rgba(255, 235, 170, 0.4); color: var(--text-primary); } /* Line numbers */ .highlight-with-lines { display: flex; } - .line-numbers { background: var(--bg-tertiary); padding: var(--code-pad-y) 0.5rem; @@ -1318,21 +1037,14 @@ code { text-align: right; border-right: 1px solid var(--border-primary); } - .line-numbers .line-number { display: block; line-height: var(--code-line-height); } - .highlight-with-lines .highlight { flex: 1; } - -.highlight .hll { - background-color: transparent; -} - -/* don't conflict with our highlight */ +.highlight .hll { background-color: transparent; } /* don't conflict with our highlight */ .highlight pre { white-space: pre; margin: 0; @@ -1344,37 +1056,177 @@ code { .cell-code.collapsed { display: none; } - .cell-code.expanded { display: block; } - { - % if config.collapse_code % -} - -.cell-code { - display: none; -} - - { - % else % -} - .cell-code { display: block; border-bottom: 1px solid var(--border-primary); } - { - % endif % -} - { - { - pygments_css - } -} +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="light"] .highlight .hll { background-color: #ffffcc } +[data-theme="light"] .highlight { background: #f8f8f8; } +[data-theme="light"] .highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +[data-theme="light"] .highlight .err { border: 1px solid #F00 } /* Error */ +[data-theme="light"] .highlight .k { color: #008000; font-weight: bold } /* Keyword */ +[data-theme="light"] .highlight .o { color: #666 } /* Operator */ +[data-theme="light"] .highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +[data-theme="light"] .highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +[data-theme="light"] .highlight .cp { color: #9C6500 } /* Comment.Preproc */ +[data-theme="light"] .highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +[data-theme="light"] .highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +[data-theme="light"] .highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +[data-theme="light"] .highlight .gd { color: #A00000 } /* Generic.Deleted */ +[data-theme="light"] .highlight .ge { font-style: italic } /* Generic.Emph */ +[data-theme="light"] .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="light"] .highlight .gr { color: #E40000 } /* Generic.Error */ +[data-theme="light"] .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +[data-theme="light"] .highlight .gi { color: #008400 } /* Generic.Inserted */ +[data-theme="light"] .highlight .go { color: #717171 } /* Generic.Output */ +[data-theme="light"] .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +[data-theme="light"] .highlight .gs { font-weight: bold } /* Generic.Strong */ +[data-theme="light"] .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +[data-theme="light"] .highlight .gt { color: #04D } /* Generic.Traceback */ +[data-theme="light"] .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +[data-theme="light"] .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +[data-theme="light"] .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +[data-theme="light"] .highlight .kp { color: #008000 } /* Keyword.Pseudo */ +[data-theme="light"] .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +[data-theme="light"] .highlight .kt { color: #B00040 } /* Keyword.Type */ +[data-theme="light"] .highlight .m { color: #666 } /* Literal.Number */ +[data-theme="light"] .highlight .s { color: #BA2121 } /* Literal.String */ +[data-theme="light"] .highlight .na { color: #687822 } /* Name.Attribute */ +[data-theme="light"] .highlight .nb { color: #008000 } /* Name.Builtin */ +[data-theme="light"] .highlight .nc { color: #00F; font-weight: bold } /* Name.Class */ +[data-theme="light"] .highlight .no { color: #800 } /* Name.Constant */ +[data-theme="light"] .highlight .nd { color: #A2F } /* Name.Decorator */ +[data-theme="light"] .highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +[data-theme="light"] .highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +[data-theme="light"] .highlight .nf { color: #00F } /* Name.Function */ +[data-theme="light"] .highlight .nl { color: #767600 } /* Name.Label */ +[data-theme="light"] .highlight .nn { color: #00F; font-weight: bold } /* Name.Namespace */ +[data-theme="light"] .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +[data-theme="light"] .highlight .nv { color: #19177C } /* Name.Variable */ +[data-theme="light"] .highlight .ow { color: #A2F; font-weight: bold } /* Operator.Word */ +[data-theme="light"] .highlight .w { color: #BBB } /* Text.Whitespace */ +[data-theme="light"] .highlight .mb { color: #666 } /* Literal.Number.Bin */ +[data-theme="light"] .highlight .mf { color: #666 } /* Literal.Number.Float */ +[data-theme="light"] .highlight .mh { color: #666 } /* Literal.Number.Hex */ +[data-theme="light"] .highlight .mi { color: #666 } /* Literal.Number.Integer */ +[data-theme="light"] .highlight .mo { color: #666 } /* Literal.Number.Oct */ +[data-theme="light"] .highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +[data-theme="light"] .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +[data-theme="light"] .highlight .sc { color: #BA2121 } /* Literal.String.Char */ +[data-theme="light"] .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +[data-theme="light"] .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +[data-theme="light"] .highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +[data-theme="light"] .highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +[data-theme="light"] .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +[data-theme="light"] .highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +[data-theme="light"] .highlight .sx { color: #008000 } /* Literal.String.Other */ +[data-theme="light"] .highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +[data-theme="light"] .highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +[data-theme="light"] .highlight .ss { color: #19177C } /* Literal.String.Symbol */ +[data-theme="light"] .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +[data-theme="light"] .highlight .fm { color: #00F } /* Name.Function.Magic */ +[data-theme="light"] .highlight .vc { color: #19177C } /* Name.Variable.Class */ +[data-theme="light"] .highlight .vg { color: #19177C } /* Name.Variable.Global */ +[data-theme="light"] .highlight .vi { color: #19177C } /* Name.Variable.Instance */ +[data-theme="light"] .highlight .vm { color: #19177C } /* Name.Variable.Magic */ +[data-theme="light"] .highlight .il { color: #666 } /* Literal.Number.Integer.Long */ + +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +[data-theme="dark"] .highlight .hll { background-color: #49483e } +[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ /* Ensure our code metrics override Pygments defaults */ .highlight pre { @@ -1386,76 +1238,23 @@ code { font-family: 'Cascadia Mono', 'Cascadia Code', 'JetBrains Mono', 'SF Mono', Monaco, 'Consolas', monospace !important; border: none; } - -.line-numbers { - line-height: var(--code-line-height) !important; -} - -.line-numbers .line-number { - line-height: var(--code-line-height) !important; -} +.line-numbers { line-height: var(--code-line-height) !important; } +.line-numbers .line-number { line-height: var(--code-line-height) !important; } /* Custom CSS from frontmatter */ - { - { - config.custom_css - } -} - - { - # Override code font size from frontmatter (accept number as px) # -} - - { - % if config.code_font_size is not none % -} - - { - % if config.code_font_size is string % -} - -:root { - --code-font-size: { - { - config.code_font_size - } - } - ; -} - - { - % else % -} -:root { - --code-font-size: { - { - config.code_font_size - } - } - px; -} - { - % endif % -} - - { - % endif % -} /* Cursor for tools */ -body[data-tool="arrow"] .main-content { +body[data-tool="arrow"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, crosshair; } - -body[data-tool="pen"] .main-content { +body[data-tool="pen"] .main-content { cursor: url('data:image/svg+xml;utf8,') 4 20, pointer; } - -body[data-tool="eraser"] .main-content { +body[data-tool="eraser"] .main-content { cursor: url('data:image/svg+xml;utf8,') 12 12, auto; } @@ -1468,14 +1267,12 @@ body[data-tool="eraser"] .main-content { text-transform: uppercase; letter-spacing: 0.5px; } - .color-row { display: grid; grid-template-columns: repeat(6, 1fr); gap: 0.25rem; margin-bottom: 0.5rem; } - .color-swatch { width: 18px; height: 18px; @@ -1485,17 +1282,14 @@ body[data-tool="eraser"] .main-content { transition: all 0.2s ease; position: relative; } - .color-swatch:hover { transform: scale(1.1); border-color: var(--text-secondary); } - .color-swatch.selected { border-color: var(--text-primary); box-shadow: 0 0 0 2px var(--text-link); } - .color-swatch.selected::after { content: '✓'; position: absolute; @@ -1507,7 +1301,6 @@ body[data-tool="eraser"] .main-content { font-weight: bold; text-shadow: 1px 1px 1px black; } - .color-input { width: 24px; height: 24px; @@ -1519,7 +1312,6 @@ body[data-tool="eraser"] .main-content { grid-column: span 2; justify-self: center; } - .color-input:hover { border-color: var(--text-secondary); } @@ -1531,7 +1323,6 @@ body[data-tool="eraser"] .main-content { gap: 0.5rem; margin-top: 0.75rem; } - .thickness-slider { flex: 1; -webkit-appearance: none; @@ -1543,11 +1334,9 @@ body[data-tool="eraser"] .main-content { opacity: 0.7; transition: opacity 0.2s; } - .thickness-slider:hover { opacity: 1; } - .thickness-slider::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; @@ -1557,7 +1346,6 @@ body[data-tool="eraser"] .main-content { border-radius: 50%; cursor: pointer; } - .thickness-slider::-moz-range-thumb { width: 12px; height: 12px; @@ -1566,7 +1354,6 @@ body[data-tool="eraser"] .main-content { cursor: pointer; border: none; } - .thickness-value { font-size: 0.7rem; color: var(--text-secondary); @@ -1592,18 +1379,18 @@ body[data-tool="eraser"] .main-content { } @keyframes spin { - to { - transform: rotate(360deg); - } + to { transform: rotate(360deg); } } .loading-skeleton { display: inline-block; background: var(--bg-tertiary); - background: linear-gradient(90deg, - var(--bg-tertiary) 25%, - var(--bg-secondary) 50%, - var(--bg-tertiary) 75%); + background: linear-gradient( + 90deg, + var(--bg-tertiary) 25%, + var(--bg-secondary) 50%, + var(--bg-tertiary) 75% + ); background-size: 200% 100%; animation: loading-shimmer 2s ease-in-out infinite; border-radius: 2px; @@ -1613,13 +1400,8 @@ body[data-tool="eraser"] .main-content { } @keyframes loading-shimmer { - 0% { - background-position: -200% 0; - } - - 100% { - background-position: 200% 0; - } + 0% { background-position: -200% 0; } + 100% { background-position: 200% 0; } } /* Loading state for cell output */ @@ -4092,7 +3874,7 @@ body[data-tool="eraser"] .main-content {
Generated on:
- Linux x86_64 | Linux-5.10.244-240.970.amzn2.x86_64-x86_64-with-glibc2.35 + Linux x86_64 | Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35
@@ -4107,7 +3889,7 @@ body[data-tool="eraser"] .main-content { - 2025-10-31T20:14:10.200761 + 2025-11-10T22:11:51.846305 image/svg+xml @@ -4451,109 +4233,109 @@ body[data-tool="eraser"] .main-content { - + - + - 0.1 + 0.1 - + - + - 0.2 + 0.2 - + - + - 0.3 + 0.3 - + - + - 0.4 + 0.4 - + - + - 0.5 + 0.5 - + - + - 0.6 + 0.6 - + - + - 0.7 + 0.7 - + - + - 0.8 + 0.8 @@ -4561,67 +4343,67 @@ body[data-tool="eraser"] .main-content { - + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + @@ -4679,7 +4461,7 @@ body[data-tool="eraser"] .main-content { ▼ output ▶ uv-logs | -Cell: combine | 4.46s +Cell: combine | 4.57s | Raw @@ -4769,10 +4551,10 @@ impl wl p50(ms) ok hf_kernels_rotary cuda_B1_S128_H32_D128_R64 0.09 True hf_kernels_rotary cuda_B1_S128_H32_D64_R32 0.09 True hf_kernels_rotary cuda_B1_S128_H8_D128_R64 0.09 True -hf_kernels_rotary cuda_B1_S128_H8_D64_R32 0.08 True +hf_kernels_rotary cuda_B1_S128_H8_D64_R32 0.07 True hf_kernels_rotary cuda_B1_S2048_H32_D128_R64 0.26 True -hf_kernels_rotary cuda_B1_S2048_H32_D64_R32 0.10 True -hf_kernels_rotary cuda_B1_S2048_H8_D128_R64 0.10 True +hf_kernels_rotary cuda_B1_S2048_H32_D64_R32 0.09 True +hf_kernels_rotary cuda_B1_S2048_H8_D128_R64 0.09 True hf_kernels_rotary cuda_B1_S2048_H8_D64_R32 0.09 True hf_kernels_rotary cuda_B1_S512_H32_D128_R64 0.09 True hf_kernels_rotary cuda_B1_S512_H32_D64_R32 0.09 True @@ -4783,37 +4565,37 @@ hf_kernels_rotary cuda_B2_S128_H32_D64_R32 0.09 True hf_kernels_rotary cuda_B2_S128_H8_D128_R64 0.09 True hf_kernels_rotary cuda_B2_S128_H8_D64_R32 0.09 True hf_kernels_rotary cuda_B2_S2048_H32_D128_R64 0.85 True -hf_kernels_rotary cuda_B2_S2048_H32_D64_R32 0.27 True +hf_kernels_rotary cuda_B2_S2048_H32_D64_R32 0.26 True hf_kernels_rotary cuda_B2_S2048_H8_D128_R64 0.09 True hf_kernels_rotary cuda_B2_S2048_H8_D64_R32 0.09 True hf_kernels_rotary cuda_B2_S512_H32_D128_R64 0.09 True hf_kernels_rotary cuda_B2_S512_H32_D64_R32 0.09 True hf_kernels_rotary cuda_B2_S512_H8_D128_R64 0.09 True hf_kernels_rotary cuda_B2_S512_H8_D64_R32 0.09 True -torch_eager cuda_B1_S128_H32_D128_R64 0.21 True -torch_eager cuda_B1_S128_H32_D64_R32 0.22 True -torch_eager cuda_B1_S128_H8_D128_R64 0.22 True -torch_eager cuda_B1_S128_H8_D64_R32 0.17 True -torch_eager cuda_B1_S2048_H32_D128_R64 0.22 True -torch_eager cuda_B1_S2048_H32_D64_R32 0.21 True -torch_eager cuda_B1_S2048_H8_D128_R64 0.21 True +torch_eager cuda_B1_S128_H32_D128_R64 0.22 True +torch_eager cuda_B1_S128_H32_D64_R32 0.23 True +torch_eager cuda_B1_S128_H8_D128_R64 0.23 True +torch_eager cuda_B1_S128_H8_D64_R32 0.18 True +torch_eager cuda_B1_S2048_H32_D128_R64 0.23 True +torch_eager cuda_B1_S2048_H32_D64_R32 0.22 True +torch_eager cuda_B1_S2048_H8_D128_R64 0.22 True torch_eager cuda_B1_S2048_H8_D64_R32 0.22 True torch_eager cuda_B1_S512_H32_D128_R64 0.22 True -torch_eager cuda_B1_S512_H32_D64_R32 0.21 True -torch_eager cuda_B1_S512_H8_D128_R64 0.21 True -torch_eager cuda_B1_S512_H8_D64_R32 0.21 True -torch_eager cuda_B2_S128_H32_D128_R64 0.21 True +torch_eager cuda_B1_S512_H32_D64_R32 0.22 True +torch_eager cuda_B1_S512_H8_D128_R64 0.23 True +torch_eager cuda_B1_S512_H8_D64_R32 0.23 True +torch_eager cuda_B2_S128_H32_D128_R64 0.22 True torch_eager cuda_B2_S128_H32_D64_R32 0.22 True -torch_eager cuda_B2_S128_H8_D128_R64 0.21 True +torch_eager cuda_B2_S128_H8_D128_R64 0.22 True torch_eager cuda_B2_S128_H8_D64_R32 0.22 True -torch_eager cuda_B2_S2048_H32_D128_R64 0.64 True +torch_eager cuda_B2_S2048_H32_D128_R64 0.65 True torch_eager cuda_B2_S2048_H32_D64_R32 0.23 True torch_eager cuda_B2_S2048_H8_D128_R64 0.22 True torch_eager cuda_B2_S2048_H8_D64_R32 0.22 True torch_eager cuda_B2_S512_H32_D128_R64 0.22 True -torch_eager cuda_B2_S512_H32_D64_R32 0.22 True -torch_eager cuda_B2_S512_H8_D128_R64 0.21 True -torch_eager cuda_B2_S512_H8_D64_R32 0.21 True +torch_eager cuda_B2_S512_H32_D64_R32 0.23 True +torch_eager cuda_B2_S512_H8_D128_R64 0.22 True +torch_eager cuda_B2_S512_H8_D64_R32 0.23 True GENERATING COMBINED VISUALIZATION @@ -4833,7 +4615,7 @@ Implementations included:
▶ UV Install Logs
@@ -4846,7 +4628,7 @@ Installed 37 packages in 229ms - 2025-10-31T20:14:10.200761 + 2025-11-10T22:11:51.846305 image/svg+xml @@ -5190,109 +4972,109 @@ Installed 37 packages in 229ms - + - + - 0.1 + 0.1 - + - + - 0.2 + 0.2 - + - + - 0.3 + 0.3 - + - + - 0.4 + 0.4 - + - + - 0.5 + 0.5 - + - + - 0.6 + 0.6 - + - + - 0.7 + 0.7 - + - + - 0.8 + 0.8 @@ -5300,67 +5082,67 @@ Installed 37 packages in 229ms - + - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + +