From 58e333c74dc1f591af1e1bfab4203499a598abcb Mon Sep 17 00:00:00 2001
From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com>
Date: Thu, 26 Feb 2026 16:03:13 +0000
Subject: [PATCH 1/2] Update NVIDIA single-node DSR1 SGLang images from
 v0.5.6-v0.5.8 to v0.5.9
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- dsr1-fp4-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64
- dsr1-fp8-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64
- dsr1-fp8-b200-sglang-mtp: v0.5.8-cu130-amd64 → v0.5.9-cu130-amd64
- dsr1-fp8-h200-sglang: v0.5.7-cu129-amd64 → v0.5.9-cu129-amd64
- Fix deprecated SGL_ENABLE_JIT_DEEPGEMM → SGLANG_ENABLE_JIT_DEEPGEMM
  in dsr1_fp8_b200.sh
- Update perf-changelog.yaml with change details

Closes #813

Co-authored-by: Cameron Quilici <cquil11@users.noreply.github.com>
---
 .github/configs/nvidia-master.yaml      |  8 ++++----
 benchmarks/single_node/dsr1_fp8_b200.sh |  2 +-
 perf-changelog.yaml                     | 17 ++++++++++++++++-
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index b4084e5e4..178694a78 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -1650,7 +1650,7 @@ dsr1-fp8-b300-dynamo-trt:
         dp-attn: true
 
 dsr1-fp4-b200-sglang:
-  image: lmsysorg/sglang:v0.5.6-cu129-amd64
+  image: lmsysorg/sglang:v0.5.9-cu129-amd64
   model: nvidia/DeepSeek-R1-0528-FP4-V2
   model-prefix: dsr1
   runner: b200
@@ -1760,7 +1760,7 @@ dsr1-fp4-b200-trt-mtp:
     - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-b200-sglang:
-  image: lmsysorg/sglang:v0.5.6-cu129-amd64
+  image: lmsysorg/sglang:v0.5.9-cu129-amd64
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: b200
@@ -1827,7 +1827,7 @@ kimik2.5-int4-b200-vllm:
     - { tp: 8, conc-start: 4, conc-end: 64 }
 
 dsr1-fp8-b200-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.8-cu130-amd64
+  image: lmsysorg/sglang:v0.5.9-cu130-amd64
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: b200
@@ -1906,7 +1906,7 @@ dsr1-fp8-b200-trt-mtp:
     - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
 
 dsr1-fp8-h200-sglang:
-  image: lmsysorg/sglang:v0.5.7-cu129-amd64
+  image: lmsysorg/sglang:v0.5.9-cu129-amd64
   model: deepseek-ai/DeepSeek-R1-0528
   model-prefix: dsr1
   runner: h200
diff --git a/benchmarks/single_node/dsr1_fp8_b200.sh b/benchmarks/single_node/dsr1_fp8_b200.sh
index 015f45f30..5d1088b5e 100644
--- a/benchmarks/single_node/dsr1_fp8_b200.sh
+++ b/benchmarks/single_node/dsr1_fp8_b200.sh
@@ -20,7 +20,7 @@ nvidia-smi
 
 hf download "$MODEL"
 
-export SGL_ENABLE_JIT_DEEPGEMM=false
+export SGLANG_ENABLE_JIT_DEEPGEMM=false
 export SGLANG_ENABLE_FLASHINFER_GEMM=true
 SERVER_LOG=/workspace/server.log
 PORT=${PORT:-8888}
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 9c4c9e438..731ea5f61 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -733,4 +733,19 @@
     - "Extend concurrency range to conc-end: 256 across all sequence lengths (1k1k, 1k8k, 8k1k)"
     - "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699
-  
+
+- config-keys:
+    - dsr1-fp4-b200-sglang
+    - dsr1-fp8-b200-sglang
+    - dsr1-fp8-b200-sglang-mtp
+    - dsr1-fp8-h200-sglang
+  description:
+    - "Update SGLang image to v0.5.9 for NVIDIA single-node DeepSeek R1 configs"
+    - "dsr1-fp4-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64"
+    - "dsr1-fp8-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64"
+    - "dsr1-fp8-b200-sglang-mtp: v0.5.8-cu130-amd64 → v0.5.9-cu130-amd64"
+    - "dsr1-fp8-h200-sglang: v0.5.7-cu129-amd64 → v0.5.9-cu129-amd64"
+    - "Fix deprecated SGL_ENABLE_JIT_DEEPGEMM → SGLANG_ENABLE_JIT_DEEPGEMM in dsr1_fp8_b200.sh"
+    - "SGLang 0.5.9 gains: Flashinfer 0.6.3, TRT-LLM NSA kernels for Blackwell, SpecV2 GC bug fix, MoE fused kernel optimizations"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX
+

From 150f85de1e0b65c94a0b4e4266511765125fc9a7 Mon Sep 17 00:00:00 2001
From: Cameron Quilici <cjquilici@gmail.com>
Date: Thu, 26 Feb 2026 10:07:00 -0600
Subject: [PATCH 2/2] Update perf-changelog.yaml

---
 perf-changelog.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 731ea5f61..9feb2499f 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -747,5 +747,5 @@
     - "dsr1-fp8-h200-sglang: v0.5.7-cu129-amd64 → v0.5.9-cu129-amd64"
     - "Fix deprecated SGL_ENABLE_JIT_DEEPGEMM → SGLANG_ENABLE_JIT_DEEPGEMM in dsr1_fp8_b200.sh"
     - "SGLang 0.5.9 gains: Flashinfer 0.6.3, TRT-LLM NSA kernels for Blackwell, SpecV2 GC bug fix, MoE fused kernel optimizations"
-  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/814