From 401af46161fd53e83fbd470f7ef4bd9bf74538ed Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Thu, 5 Feb 2026 14:45:34 -0500
Subject: [PATCH 1/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/tests/test_modules.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py
index 8e0c4965170..4abec8f7fe3 100644
--- a/backends/apple/metal/tests/test_modules.py
+++ b/backends/apple/metal/tests/test_modules.py
@@ -694,12 +694,18 @@ def quantize_model(model: nn.Module, qlinear: str, qlinear_group_size: int = 32)
     else:
         raise ValueError(f"Unsupported linear quantization config '{qlinear}'.")
 
-    def linear_filter(module, fqn):
-        if isinstance(module, torch.nn.Linear):
-            # Check if hidden dimension is divisible by group size
-            return qlinear_group_size == 0 or (
-                module.weight.shape[1] % qlinear_group_size == 0
-            )
+    def linear_filter(m, fqn):
+        if isinstance(m, torch.nn.Linear):
+            if qlinear_group_size == 0:
+                raise ValueError(
+                    f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})"
+                )
+            if m.weight.shape[1] % 8 != 0:
+                raise ValueError(
+                    f"Metal int4 quantization requires weight dimension K to be multiple of 8. "
+                    f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]})"
+                )
+            return True
         return False
 
     quantize_(model, linear_config, filter_fn=linear_filter)

From 87f15295607efce8d9d712a5dd715ecc4871a8aa Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Thu, 5 Feb 2026 15:05:24 -0500
Subject: [PATCH 2/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/tests/test_modules.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py
index 4abec8f7fe3..73ba3a8ed65 100644
--- a/backends/apple/metal/tests/test_modules.py
+++ b/backends/apple/metal/tests/test_modules.py
@@ -287,7 +287,7 @@ def forward(self, x: torch.Tensor):
 class LinearInt4_QMV_IMPL_small_odd(nn.Module):
     def __init__(self):
         super().__init__()
-        self.linear = nn.Linear(8, 3, bias=True)
+        self.linear = nn.Linear(32, 3, bias=True)
 
     def forward(self, x: torch.Tensor):
         return self.linear(x)
@@ -295,7 +295,7 @@ def forward(self, x: torch.Tensor):
 
 MODULE_REGISTRY["linear_int4_qmv_impl_small_odd"] = {
     "model_class": LinearInt4_QMV_IMPL_small_odd,
-    "input_shapes": [(1, 8)],
+    "input_shapes": [(1, 32)],
     "description": "Linear int4 quantization dispatching to qmv_impl",
     "qlinear": "fpa4w",
     "qlinear_group_size": 32,
@@ -312,7 +312,7 @@ def forward(self, x: torch.Tensor):
 class LinearInt4_QMV_IMPL_small_even(nn.Module):
     def __init__(self):
         super().__init__()
-        self.linear = nn.Linear(8, 10, bias=True)
+        self.linear = nn.Linear(32, 10, bias=True)
 
     def forward(self, x: torch.Tensor):
         return self.linear(x)
@@ -320,7 +320,7 @@ def forward(self, x: torch.Tensor):
 
 MODULE_REGISTRY["linear_int4_qmv_impl_small_even"] = {
     "model_class": LinearInt4_QMV_IMPL_small_even,
-    "input_shapes": [(1, 8)],
+    "input_shapes": [(1, 32)],
     "description": "Linear int4 quantization dispatching to qmv_impl",
     "qlinear": "fpa4w",
     "qlinear_group_size": 32,
@@ -700,10 +700,10 @@ def linear_filter(m, fqn):
                 raise ValueError(
                     f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})"
                 )
-            if m.weight.shape[1] % 8 != 0:
+            if m.weight.shape[1] % qlinear_group_size != 0:
                 raise ValueError(
-                    f"Metal int4 quantization requires weight dimension K to be multiple of 8. "
-                    f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]})"
+                    f"Metal int4 quantization requires weight dimension (K) to be multiple of group_size. "
+                    f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})" # noqa: E501
                 )
             return True
         return False

From cf89a2b06298ff114ac67949d7889723ef08aab1 Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Thu, 5 Feb 2026 15:15:49 -0500
Subject: [PATCH 3/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/tests/test_modules.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py
index 73ba3a8ed65..403ce355381 100644
--- a/backends/apple/metal/tests/test_modules.py
+++ b/backends/apple/metal/tests/test_modules.py
@@ -696,14 +696,10 @@ def quantize_model(model: nn.Module, qlinear: str, qlinear_group_size: int = 32)
 
     def linear_filter(m, fqn):
         if isinstance(m, torch.nn.Linear):
-            if qlinear_group_size == 0:
-                raise ValueError(
-                    f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})"
-                )
             if m.weight.shape[1] % qlinear_group_size != 0:
                 raise ValueError(
                     f"Metal int4 quantization requires weight dimension (K) to be multiple of group_size. "
-                    f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})" # noqa: E501
+                    f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})"  # noqa: E501
                 )
             return True
         return False