Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions problems/amd/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,10 @@ def main():
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
logger.log("benchmark-count", len(tests))
Expand Down
6 changes: 3 additions & 3 deletions problems/amd/mla-decode/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,10 @@ def main():
if mode == "test":
return run_testing(logger, tests)

if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, tests)
if mode == "leaderboard":

if mode == "public":
warm_up(tests[0])
result = benchmark(tests[-1], True, 100, 30e9)
if isinstance(result, Stats):
Expand Down
4 changes: 2 additions & 2 deletions problems/amd_distributed/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,10 +546,10 @@ def main():
with mp_context.Pool(n_gpus) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
logger.log("benchmark-count", len(tests))
Expand Down
4 changes: 2 additions & 2 deletions problems/bioml/trimul/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,10 @@ def main():
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
logger.log("benchmark-count", len(tests))
Expand Down
4 changes: 2 additions & 2 deletions problems/nvidia/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,10 +449,10 @@ def main():
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
run_single_benchmark(pool, tests[0], False, 1000, 5e8)
logger.log("benchmark-count", len(tests))
passed = True
Expand Down
4 changes: 2 additions & 2 deletions problems/nvidia/eval_better_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,10 +472,10 @@ def main():
with mp_context.Pool(1, initializer=_init_worker) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# Warmup all test shapes to ensure consistent benchmarking
for test in tests:
run_single_benchmark(pool, test, False, 1000, 5e8)
Expand Down
4 changes: 2 additions & 2 deletions problems/nvidia/eval_better_bench_grouped_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,10 @@ def main():
with mp_context.Pool(1, initializer=_init_worker) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# Warmup all test shapes to ensure consistent benchmarking
for test in tests:
run_single_benchmark(pool, test, False, 50, 5e8)
Expand Down
4 changes: 2 additions & 2 deletions problems/nvidia/nvfp4_gemm/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,10 @@ def build_test_string(tests: list[dict]):
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# Step 1: Compile kernel once (outside of timing)
logger.log("compile", "start")
compile_success, compile_error = pool.apply(_compile_kernel_once)
Expand Down
4 changes: 2 additions & 2 deletions problems/nvidia/nvfp4_group_gemm/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,10 @@ def main():
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
logger.log("benchmark-count", len(tests))
Expand Down
6 changes: 3 additions & 3 deletions problems/pmpp/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,10 @@ def main():
if mode == "test":
return run_testing(logger, tests)

if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, tests)
if mode == "leaderboard":

if mode == "public":
warm_up(tests[0])
result = benchmark(tests[-1], True, 100, 30e9)
if isinstance(result, Stats):
Expand Down
4 changes: 2 additions & 2 deletions problems/pmpp_v2/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,10 +343,10 @@ def main():
with mp_context.Pool(1) as pool:
if mode == "test":
return run_testing(logger, pool, tests)
if mode == "benchmark":
if mode == "private":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if mode == "public":
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
logger.log("benchmark-count", len(tests))
Expand Down