diff --git a/benchmarks/benchmark_lib.sh b/benchmarks/benchmark_lib.sh index 84213b00f..36489fe5a 100644 --- a/benchmarks/benchmark_lib.sh +++ b/benchmarks/benchmark_lib.sh @@ -470,6 +470,8 @@ def _le_parse_generations(outputs, **kwargs): tmp[idx] = content except Exception: tmp = [""] + if not tmp: + tmp = [""] res.extend(tmp) return res diff --git a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh index 9c8f1b271..34834c503 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh @@ -49,7 +49,7 @@ run_benchmark_serving \ # After throughput, run evaluation only if RUN_EVAL is true if [ "${RUN_EVAL}" = "true" ]; then - run_eval --framework lm-eval --port "$PORT" --concurrent-requests $CONC + run_eval --framework lm-eval --port "$PORT" --concurrent-requests 8 append_lm_eval_summary fi set +x diff --git a/perf-changelog.yaml b/perf-changelog.yaml index da2c82b7e..d73149c97 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -790,3 +790,10 @@ - "Image: vllm/vllm-openai-rocm:v0.15.1" - "TP=8, concurrency 4-64 for 1k1k, 1k8k, and 8k1k sequence lengths" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/825 + +- config-keys: + - minimaxm2.5-fp8-h200-vllm + description: + - GSM8k eval results for minimax h200 + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/838 + evals-only: true