diff --git a/src/twinkle/sampler/vllm_sampler/vllm_sampler.py b/src/twinkle/sampler/vllm_sampler/vllm_sampler.py index 915e012f..62f15630 100644 --- a/src/twinkle/sampler/vllm_sampler/vllm_sampler.py +++ b/src/twinkle/sampler/vllm_sampler/vllm_sampler.py @@ -122,8 +122,7 @@ def __init__(self, model_id: str, engine_args: Dict[str, Any] = None, device_mes # fix: On NPU, monkey_patch_model can trigger Triton compatibility errors and abort sampler init. # fix: Explicitly skip this patch on NPU and keep it for non-NPU paths only. # NPU platform may trigger triton errors with monkey_patch_model - if Platform.get_platform().device_prefix() != 'npu': - self._run_in_loop(self.engine.engine.collective_rpc('monkey_patch_model')) + self._run_in_loop(self.engine.engine.collective_rpc('monkey_patch_model')) VLLMLoraWeights()(self)