From 61c5e7f20b65705921d9b50a68a68c86540b2c1e Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Thu, 19 Feb 2026 14:22:56 -0800 Subject: [PATCH 1/4] init --- perf-changelog.yaml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index e169bb419..eae61bff5 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -671,3 +671,42 @@ - "Environment: VLLM_ROCM_USE_AITER=1" - "TP=2 and TP=4, concurrency 4-64 for 1k1k, 1k8k, and 8k1k sequence lengths" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/755 + +- config-keys: + # NVIDIA single-node + - dsr1-fp4-b200-sglang + - dsr1-fp4-b200-trt + - dsr1-fp4-b200-trt-mtp + - dsr1-fp8-b200-sglang + - dsr1-fp8-b200-sglang-mtp + - dsr1-fp8-b200-trt + - dsr1-fp8-b200-trt-mtp + - dsr1-fp8-h200-sglang + - dsr1-fp8-h200-trt + - dsr1-fp8-h200-trt-mtp + - gptoss-fp4-b200-trt + - gptoss-fp4-b200-vllm + - gptoss-fp4-h100-vllm + - gptoss-fp4-h200-trt + - gptoss-fp4-h200-vllm + - qwen3.5-bf16-b200-sglang + # AMD single-node + - dsr1-fp4-mi355x-sglang + - dsr1-fp4-mi355x-atom + - dsr1-fp4-mi355x-atom-mtp + - dsr1-fp8-mi300x-sglang + - dsr1-fp8-mi325x-sglang + - dsr1-fp8-mi355x-sglang + - dsr1-fp8-mi355x-atom + - dsr1-fp8-mi355x-atom-mtp + - gptoss-fp4-mi300x-vllm + - gptoss-fp4-mi325x-vllm + - gptoss-fp4-mi355x-vllm + - gptoss-fp4-mi355x-atom + - kimik2.5-int4-mi355x-vllm + - minimaxm2.5-fp8-mi355x-vllm + - qwen3.5-bf16-mi355x-sglang + description: + - Rerun GSM8k eval results to all single-node scenarios + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/763 + evals-only: true From 173c687bc3449cc3a0354fc05900b95abfd466b3 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 1 Mar 2026 15:04:38 -0800 Subject: [PATCH 2/4] newline --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index bf80ab1a4..b0f77bc2b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -767,4 +767,4 @@ description: - GSM8k eval results for minimax h200 pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/838 - evals-only: true \ No newline at end of file + evals-only: true From 24d641f578b6898b0443f3652668b626d6fd937b Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 1 Mar 2026 16:23:25 -0800 Subject: [PATCH 3/4] tmp fallback --- benchmarks/benchmark_lib.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh index 84213b00f..36489fe5a 100644 --- a/benchmarks/benchmark_lib.sh +++ b/benchmarks/benchmark_lib.sh @@ -470,6 +470,8 @@ def _le_parse_generations(outputs, **kwargs): tmp[idx] = content except Exception: tmp = [""] + if not tmp: + tmp = [""] res.extend(tmp) return res From d8ae4d9ec076d5b4764533dc843a5ba738591f28 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 1 Mar 2026 17:27:45 -0800 Subject: [PATCH 4/4] lower conc --- benchmarks/single_node/minimaxm2.5_fp8_h200.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh index 9c8f1b271..34834c503 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh @@ -49,7 +49,7 @@ run_benchmark_serving \ # After throughput, run evaluation only if RUN_EVAL is true if [ "${RUN_EVAL}" = "true" ]; then - run_eval --framework lm-eval --port "$PORT" --concurrent-requests $CONC + run_eval --framework lm-eval --port "$PORT" --concurrent-requests 8 append_lm_eval_summary fi set +x