From 8ce74552d254c85110d003f1b67b547d8b0e68d4 Mon Sep 17 00:00:00 2001 From: wufeisheng Date: Tue, 10 Feb 2026 18:59:20 +0800 Subject: [PATCH] fix tbo --- .../model_executor/layers/moe/fused_moe_deepgemm_backend.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py index e0ad590e412..b4d6b62a264 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_deepgemm_backend.py @@ -354,7 +354,6 @@ def apply_ep_prefill( ffn_out, m_indices, ) - del permute_input # swiglu ffn_out = paddle.incubate.nn.functional.swiglu(ffn_out, None) @@ -373,7 +372,6 @@ def apply_ep_prefill( ) ffn_in_x_scale_tensor = ffn_in_x_scale_tensor.T[: ffn_in_x.shape[0]] - del ffn_out ffn_out = paddle.empty( (token_all_num, getattr(layer, self.added_weight_attrs[1]).shape[1]), dtype=paddle.bfloat16, @@ -385,7 +383,6 @@ def apply_ep_prefill( ffn_out, m_indices, ) - del ffn_in_x # prmt back per rank tmp_ffn_out = fastdeploy.model_executor.ops.gpu.ep_moe_expert_combine( @@ -397,7 +394,6 @@ def apply_ep_prefill( False, # norm_topk_prob 1.0, ) - del ffn_out else: tmp_ffn_out = paddle.empty([0, hidden_size], paddle.bfloat16)