From 401af46161fd53e83fbd470f7ef4bd9bf74538ed Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Thu, 5 Feb 2026 14:45:34 -0500 Subject: [PATCH 1/3] Update [ghstack-poisoned] --- backends/apple/metal/tests/test_modules.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index 8e0c4965170..4abec8f7fe3 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -694,12 +694,18 @@ def quantize_model(model: nn.Module, qlinear: str, qlinear_group_size: int = 32) else: raise ValueError(f"Unsupported linear quantization config '{qlinear}'.") - def linear_filter(module, fqn): - if isinstance(module, torch.nn.Linear): - # Check if hidden dimension is divisible by group size - return qlinear_group_size == 0 or ( - module.weight.shape[1] % qlinear_group_size == 0 - ) + def linear_filter(m, fqn): + if isinstance(m, torch.nn.Linear): + if qlinear_group_size == 0: + raise ValueError( + f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})" + ) + if m.weight.shape[1] % 8 != 0: + raise ValueError( + f"Metal int4 quantization requires weight dimension K to be multiple of 8. " + f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]})" + ) + return True return False quantize_(model, linear_config, filter_fn=linear_filter) From 87f15295607efce8d9d712a5dd715ecc4871a8aa Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Thu, 5 Feb 2026 15:05:24 -0500 Subject: [PATCH 2/3] Update [ghstack-poisoned] --- backends/apple/metal/tests/test_modules.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index 4abec8f7fe3..73ba3a8ed65 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -287,7 +287,7 @@ def forward(self, x: torch.Tensor): class LinearInt4_QMV_IMPL_small_odd(nn.Module): def __init__(self): super().__init__() - self.linear = nn.Linear(8, 3, bias=True) + self.linear = nn.Linear(32, 3, bias=True) def forward(self, x: torch.Tensor): return self.linear(x) @@ -295,7 +295,7 @@ def forward(self, x: torch.Tensor): MODULE_REGISTRY["linear_int4_qmv_impl_small_odd"] = { "model_class": LinearInt4_QMV_IMPL_small_odd, - "input_shapes": [(1, 8)], + "input_shapes": [(1, 32)], "description": "Linear int4 quantization dispatching to qmv_impl", "qlinear": "fpa4w", "qlinear_group_size": 32, @@ -312,7 +312,7 @@ def forward(self, x: torch.Tensor): class LinearInt4_QMV_IMPL_small_even(nn.Module): def __init__(self): super().__init__() - self.linear = nn.Linear(8, 10, bias=True) + self.linear = nn.Linear(32, 10, bias=True) def forward(self, x: torch.Tensor): return self.linear(x) @@ -320,7 +320,7 @@ def forward(self, x: torch.Tensor): MODULE_REGISTRY["linear_int4_qmv_impl_small_even"] = { "model_class": LinearInt4_QMV_IMPL_small_even, - "input_shapes": [(1, 8)], + "input_shapes": [(1, 32)], "description": "Linear int4 quantization dispatching to qmv_impl", "qlinear": "fpa4w", "qlinear_group_size": 32, @@ -700,10 +700,10 @@ def linear_filter(m, fqn): raise ValueError( f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})" ) - if m.weight.shape[1] % 8 != 0: + if m.weight.shape[1] % qlinear_group_size != 0: raise ValueError( - f"Metal int4 quantization requires weight dimension K to be multiple of 8. " - f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]})" + f"Metal int4 quantization requires weight dimension (K) to be multiple of group_size. " + f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})" # noqa: E501 ) return True return False From cf89a2b06298ff114ac67949d7889723ef08aab1 Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Thu, 5 Feb 2026 15:15:49 -0500 Subject: [PATCH 3/3] Update [ghstack-poisoned] --- backends/apple/metal/tests/test_modules.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/backends/apple/metal/tests/test_modules.py b/backends/apple/metal/tests/test_modules.py index 73ba3a8ed65..403ce355381 100644 --- a/backends/apple/metal/tests/test_modules.py +++ b/backends/apple/metal/tests/test_modules.py @@ -696,14 +696,10 @@ def quantize_model(model: nn.Module, qlinear: str, qlinear_group_size: int = 32) def linear_filter(m, fqn): if isinstance(m, torch.nn.Linear): - if qlinear_group_size == 0: - raise ValueError( - f"Invalid group_size=0 for Metal int4 quantization (layer: {fqn})" - ) if m.weight.shape[1] % qlinear_group_size != 0: raise ValueError( f"Metal int4 quantization requires weight dimension (K) to be multiple of group_size. " - f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})" # noqa: E501 + f"Layer {fqn} has weight shape {m.weight.shape} (K={m.weight.shape[1]}, group_size={qlinear_group_size})" # noqa: E501 ) return True return False