Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/configs/nvidia-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1650,7 +1650,7 @@ dsr1-fp8-b300-dynamo-trt:
dp-attn: true

dsr1-fp4-b200-sglang:
image: lmsysorg/sglang:v0.5.6-cu129-amd64
image: lmsysorg/sglang:v0.5.9-cu129-amd64
model: nvidia/DeepSeek-R1-0528-FP4-V2
model-prefix: dsr1
runner: b200
Expand Down Expand Up @@ -1760,7 +1760,7 @@ dsr1-fp4-b200-trt-mtp:
- { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256, spec-decoding: mtp }

dsr1-fp8-b200-sglang:
image: lmsysorg/sglang:v0.5.6-cu129-amd64
image: lmsysorg/sglang:v0.5.9-cu129-amd64
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: b200
Expand Down Expand Up @@ -1827,7 +1827,7 @@ kimik2.5-int4-b200-vllm:
- { tp: 8, conc-start: 4, conc-end: 64 }

dsr1-fp8-b200-sglang-mtp:
image: lmsysorg/sglang:v0.5.8-cu130-amd64
image: lmsysorg/sglang:v0.5.9-cu130-amd64
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: b200
Expand Down Expand Up @@ -1906,7 +1906,7 @@ dsr1-fp8-b200-trt-mtp:
- { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }

dsr1-fp8-h200-sglang:
image: lmsysorg/sglang:v0.5.7-cu129-amd64
image: lmsysorg/sglang:v0.5.9-cu129-amd64
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: h200
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/single_node/dsr1_fp8_b200.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ nvidia-smi

hf download "$MODEL"

export SGL_ENABLE_JIT_DEEPGEMM=false
export SGLANG_ENABLE_JIT_DEEPGEMM=false
export SGLANG_ENABLE_FLASHINFER_GEMM=true
SERVER_LOG=/workspace/server.log
PORT=${PORT:-8888}
Expand Down
17 changes: 16 additions & 1 deletion perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -733,4 +733,19 @@
- "Extend concurrency range to conc-end: 256 across all sequence lengths (1k1k, 1k8k, 8k1k)"
- "Fix MTP 1k8k conc-start from 256 to 4 to enable full concurrency sweep"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/699


- config-keys:
- dsr1-fp4-b200-sglang
- dsr1-fp8-b200-sglang
- dsr1-fp8-b200-sglang-mtp
- dsr1-fp8-h200-sglang
description:
- "Update SGLang image to v0.5.9 for NVIDIA single-node DeepSeek R1 configs"
- "dsr1-fp4-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64"
- "dsr1-fp8-b200-sglang: v0.5.6-cu129-amd64 → v0.5.9-cu129-amd64"
- "dsr1-fp8-b200-sglang-mtp: v0.5.8-cu130-amd64 → v0.5.9-cu130-amd64"
- "dsr1-fp8-h200-sglang: v0.5.7-cu129-amd64 → v0.5.9-cu129-amd64"
- "Fix deprecated SGL_ENABLE_JIT_DEEPGEMM → SGLANG_ENABLE_JIT_DEEPGEMM in dsr1_fp8_b200.sh"
- "SGLang 0.5.9 gains: Flashinfer 0.6.3, TRT-LLM NSA kernels for Blackwell, SpecV2 GC bug fix, MoE fused kernel optimizations"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/814

Loading