File size: 8,626 Bytes
d8c3a70
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
{"ts": "2025-11-10T21:59:28Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E2", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 153.23935800000754, "p50": 154.66906200003905, "p90": 155.3045599999905, "mean": 154.4065966000062, "iqr": 1.9825210000021798, "raw_times": [154.66906200003905, 153.23935800000754, 153.3220389999883, 155.3045599999905, 155.4979640000056], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 150.5313740000247, "peak_bytes": 416866816, "ok": true, "absmax": 2.765655517578125e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.765655517578125e-05, "mae": 2.0696452338597737e-06, "mse": 7.332408985538663e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T21:59:51Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S512_E4", "batch": 1, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 200.93769199996814, "p50": 201.49722299998984, "p90": 202.5282779999884, "mean": 202.0041708000008, "iqr": 1.4469799999687893, "raw_times": [201.08129800001961, 203.97636300003796, 200.93769199996814, 201.49722299998984, 202.5282779999884], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 201.3829520000172, "peak_bytes": 632035840, "ok": true, "absmax": 1.621246337890625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.621246337890625e-05, "mae": 9.61917862696282e-07, "mse": 1.59423277530657e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:00:35Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E2", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 367.22704099997827, "p50": 367.62146799998163, "p90": 367.7445199999738, "mean": 367.9209119999882, "iqr": 0.4843269999810218, "raw_times": [369.7513380000146, 367.2601929999928, 367.62146799998163, 367.7445199999738, 367.22704099997827], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 368.07921899998064, "peak_bytes": 643844608, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.0501920516835526e-06, "mse": 7.1848811622476916e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:01:22Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B1_S1024_E4", "batch": 1, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 392.9537719999985, "p50": 394.19261099999403, "p90": 394.552635000025, "mean": 394.1458786000112, "iqr": 1.3762300000053074, "raw_times": [393.1764050000197, 394.19261099999403, 392.9537719999985, 394.552635000025, 395.85397000001876], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 399.8835020000229, "peak_bytes": 823386112, "ok": true, "absmax": 1.3589859008789062e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.3589859008789062e-05, "mae": 9.400179123986163e-07, "mse": 1.5130355735665235e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:02:51Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E2", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 734.9415649999855, "p50": 736.2597970000024, "p90": 736.4179590000504, "mean": 736.8552042000147, "iqr": 0.5320090000395794, "raw_times": [735.8859500000108, 736.4179590000504, 734.9415649999855, 736.2597970000024, 740.7707500000242], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 738.5199589999729, "peak_bytes": 1036112384, "ok": true, "absmax": 3.2901763916015625e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 3.2901763916015625e-05, "mae": 2.0572656467265915e-06, "mse": 7.247809123700488e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:04:32Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S512_E4", "batch": 4, "seq_len": 512, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 795.7670519999738, "p50": 798.8816239999323, "p90": 799.2389810000304, "mean": 798.3748011999751, "iqr": 0.5543240000633887, "raw_times": [798.684656999967, 798.8816239999323, 799.3016919999718, 799.2389810000304, 795.7670519999738], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 796.8497300000763, "peak_bytes": 1235263488, "ok": true, "absmax": 1.430511474609375e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.430511474609375e-05, "mae": 9.400343401466671e-07, "mse": 1.5107844445957919e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:07:29Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E2", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 2, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1474.9918590000561, "p50": 1483.5365430000138, "p90": 1484.378332999995, "mean": 1483.3181600000216, "iqr": 3.7910559999545512, "raw_times": [1480.5872770000406, 1474.9918590000561, 1484.378332999995, 1483.5365430000138, 1493.0967880000026], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1492.4540110000635, "peak_bytes": 1861947904, "ok": true, "absmax": 2.6226043701171875e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 2.6226043701171875e-05, "mae": 2.060702854578267e-06, "mse": 7.262949790198814e-12, "ref": "naive_moe"}, "err": null}
{"ts": "2025-11-10T22:10:52Z", "run": "1939dc0ee47a4164bf38304335c67bc8", "impl": "binned_torch", "tags": {"family": "pytorch", "backend": "eager"}, "wl": {"name": "cuda_B4_S1024_E4", "batch": 4, "seq_len": 1024, "hidden_dim": 2880, "expert_dim": 5760, "num_experts": 4, "top_k": 2, "dtype": "float32", "device": "cuda"}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L40S", "sm": "8.9", "py": "3.11.14", "plat": "Linux-6.12.53-69.119.amzn2023.x86_64-x86_64-with-glibc2.35"}, "lat_ms": {"p10": 1594.949616000008, "p50": 1601.9022579999955, "p90": 1602.6959760000636, "mean": 1600.7068320000144, "iqr": 2.6664300000902585, "raw_times": [1601.9022579999955, 1600.0295459999734, 1594.949616000008, 1602.6959760000636, 1603.9567640000314], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 1575.0532499999963, "peak_bytes": 2062163968, "ok": true, "absmax": 1.5974044799804688e-05, "corr": {"ok": true, "rtol": 0.01, "atol": 0.01, "absmax": 1.5974044799804688e-05, "mae": 9.529014732834185e-07, "mse": 1.5621694476192216e-12, "ref": "naive_moe"}, "err": null}