From 91603002c5aade70536d2ba7a39233ca0697a699 Mon Sep 17 00:00:00 2001 From: Min Guo Date: Tue, 5 May 2026 14:08:13 -0700 Subject: [PATCH] Revert the code change block qnn lowering Summary: ScalelessRMSNorm block the model lowering, revert to the torch.nn.RMSNorm, the ScalelessRMSNorm block qnn lowering Differential Revision: D103916912 --- examples/models/llama/static_attention.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index 72ce31438d6..07d2cf98aec 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -898,18 +898,11 @@ def _init_wo(self, config: ModelArgs) -> None: def _init_qk_norms(self, config: ModelArgs, is_kv_shared_layer: bool) -> None: if self.use_qk_norm: - if getattr(config, "qk_norm_affine", True): - self.q_norm = torch.nn.RMSNorm(self.head_dim, config.norm_eps) - if is_kv_shared_layer: - self.k_norm = nn.Identity() - else: - self.k_norm = torch.nn.RMSNorm(self.head_dim, config.norm_eps) + self.q_norm = torch.nn.RMSNorm(self.head_dim, config.norm_eps) + if is_kv_shared_layer: + self.k_norm = nn.Identity() else: - self.q_norm = ScalelessRMSNorm(self.head_dim, eps=config.norm_eps) - if is_kv_shared_layer: - self.k_norm = nn.Identity() - else: - self.k_norm = ScalelessRMSNorm(self.head_dim, eps=config.norm_eps) + self.k_norm = torch.nn.RMSNorm(self.head_dim, config.norm_eps) else: self.q_norm = torch.nn.Identity() self.k_norm = torch.nn.Identity()