diff --git a/mellea/backends/huggingface.py b/mellea/backends/huggingface.py index 84507299e..732b5aa70 100644 --- a/mellea/backends/huggingface.py +++ b/mellea/backends/huggingface.py @@ -48,6 +48,7 @@ ) from ..core.base import AbstractMelleaTool from ..formatters import ChatFormatter, TemplateFormatter, granite as granite_formatters +from ..formatters.granite.base.util import _GuidanceLogitsProcessor from ..helpers import message_to_openai_message, messages_to_docs, send_to_queue from ..stdlib.components import Intrinsic, Message from ..stdlib.requirements import ALoraRequirement, LLMaJRequirement @@ -159,65 +160,6 @@ def _cleanup_kv_cache(cache_info: HFAloraCacheInfo) -> None: torch.cuda.empty_cache() -# modified from VLLM v0.9.2 code base -# https://github.com/vllm-project/vllm/blob/v0.9.2/vllm/model_executor/guided_decoding/guidance_logits_processors.py -class _GuidanceLogitsProcessor: - def __init__(self, grammar: str, ll_tokenizer: llguidance.LLTokenizer) -> None: - self.grammar = grammar - self.vocab_size: int = ll_tokenizer.vocab_size - self.ll_tokenizer: llguidance.LLTokenizer = ll_tokenizer - self.ll_matchers: list[llguidance.LLMatcher] = [] - self.bitmasks: list[torch.Tensor] = [] - self.new_sampling: bool = False - self.batch_size: int = -1 - - def __call__( - self, batch_input_ids: torch.Tensor, batch_scores: torch.Tensor - ) -> torch.Tensor: - i_batch, _ = batch_input_ids.shape - s_batch, _ = batch_scores.shape - assert i_batch == s_batch - - # s_batch, s_vocab = batch_scores.shape - # assert s_vocab == self.vocab_size - # - # NOTE: somehow, this does not hold. s_vocab is not same as either of - # * self._tokenizer._tokenizer.get_vocab_size(with_added_tokens=True) == self.vocab_size == ll_tokenizer.vocab_size - # * self._tokenizer._tokenizer.get_vocab_size(with_added_tokens=False) - - if self.batch_size != i_batch: - self.batch_size = i_batch - self.bitmasks = [ - llguidance.torch.allocate_token_bitmask(1, self.vocab_size) # type: ignore[attr-defined] - for _ in range(self.batch_size) - ] - - self.ll_matchers = [ - llguidance.LLMatcher(self.ll_tokenizer, self.grammar) - for _ in range(self.batch_size) - ] - - for input_ids, scores, ll_matcher, bitmask in zip( - batch_input_ids, batch_scores, self.ll_matchers, self.bitmasks - ): - if self.new_sampling and len(input_ids) > 0: - ll_matcher.consume_token( # type: ignore[attr-defined] - input_ids.tolist()[-1] - ) - err = ll_matcher.get_error() # type: ignore[attr-defined] - if err: - MelleaLogger.get_logger().warning("Error in LLMatcher: %s", err) - - llguidance.torch.fill_next_token_bitmask(ll_matcher, bitmask, 0) - llguidance.torch.apply_token_bitmask_inplace( - scores, bitmask.to(scores.device) - ) # type: ignore[attr-defined] - - self.new_sampling = True - - return batch_scores - - class LocalHFBackend(FormatterBackend, AdapterMixin): """The LocalHFBackend uses Huggingface's transformers library for inference, and uses a Formatter to convert `Component`s into prompts. This backend also supports Activated LoRAs (ALoras)](https://arxiv.org/pdf/2504.12397). @@ -620,7 +562,10 @@ async def _generate_from_intrinsic( generate_input, other_input = ( granite_formatters.base.util.chat_completion_request_to_transformers_inputs( # type: ignore - rewritten, self._tokenizer, self._model + rewritten, + self._tokenizer, + self._model, + ll_tokenizer=self._llguidance_tokenizer, ) ) diff --git a/mellea/formatters/granite/base/util.py b/mellea/formatters/granite/base/util.py index 79af3ff76..62314408b 100644 --- a/mellea/formatters/granite/base/util.py +++ b/mellea/formatters/granite/base/util.py @@ -17,7 +17,11 @@ # Third Party import pydantic +from ....core.utils import MelleaLogger + if TYPE_CHECKING: + import llguidance + import torch from transformers import PreTrainedModel, PreTrainedTokenizerBase # First Party @@ -112,11 +116,68 @@ def load_transformers_lora(local_or_remote_path: str) -> tuple: return model, tokenizer +# Modified from VLLM v0.9.2 code base +# https://github.com/vllm-project/vllm/blob/v0.9.2/vllm/model_executor/guided_decoding/guidance_logits_processors.py +class _GuidanceLogitsProcessor: + """A HuggingFace logits processor that enforces an llguidance grammar.""" + + def __init__(self, grammar: str, ll_tokenizer: llguidance.LLTokenizer) -> None: + """Initialize the processor with a compiled grammar and an llguidance tokenizer.""" + self.grammar = grammar + self.vocab_size: int = ll_tokenizer.vocab_size + self.ll_tokenizer: llguidance.LLTokenizer = ll_tokenizer + self.ll_matchers: list[llguidance.LLMatcher] = [] + self.bitmasks: list[torch.Tensor] = [] + self.new_sampling: bool = False + self.batch_size: int = -1 + + def __call__( + self, batch_input_ids: torch.Tensor, batch_scores: torch.Tensor + ) -> torch.Tensor: + """Apply the grammar's allowed-token bitmask to ``batch_scores`` in place.""" + with import_optional("llguidance"): + import llguidance + import llguidance.torch + + i_batch, _ = batch_input_ids.shape + s_batch, _ = batch_scores.shape + assert i_batch == s_batch + + if self.batch_size != i_batch: + self.batch_size = i_batch + self.bitmasks = [ + llguidance.torch.allocate_token_bitmask(1, self.vocab_size) # type: ignore[attr-defined] + for _ in range(self.batch_size) + ] + self.ll_matchers = [ + llguidance.LLMatcher(self.ll_tokenizer, self.grammar) + for _ in range(self.batch_size) + ] + + for input_ids, scores, ll_matcher, bitmask in zip( + batch_input_ids, batch_scores, self.ll_matchers, self.bitmasks + ): + if self.new_sampling and len(input_ids) > 0: + ll_matcher.consume_token(input_ids.tolist()[-1]) # type: ignore[attr-defined] + err = ll_matcher.get_error() # type: ignore[attr-defined] + if err: + MelleaLogger.get_logger().warning("Error in LLMatcher: %s", err) + + llguidance.torch.fill_next_token_bitmask(ll_matcher, bitmask, 0) + llguidance.torch.apply_token_bitmask_inplace( # type: ignore[attr-defined] + scores, bitmask.to(scores.device) + ) + + self.new_sampling = True + return batch_scores + + def chat_completion_request_to_transformers_inputs( request: dict, tokenizer: PreTrainedTokenizerBase | None = None, model: PreTrainedModel | None = None, constrained_decoding_prefix: str | None = None, + ll_tokenizer: llguidance.LLTokenizer | None = None, ) -> tuple[dict, dict]: """Translate an OpenAI-style chat completion request. @@ -125,11 +186,15 @@ def chat_completion_request_to_transformers_inputs( Args: request: Request as parsed JSON or equivalent dataclass. - tokenizer: HuggingFace tokenizer for the model. Only required if the request - uses constrained decoding. - model: HuggingFace model object. Only required if the request uses constrained - decoding. + tokenizer: HuggingFace tokenizer. Required for constrained decoding unless + ``ll_tokenizer`` is provided, and required when ``constrained_decoding_prefix`` + is set. + model: HuggingFace model object. Used for ``model.device`` placement and only + required when ``constrained_decoding_prefix`` is set. constrained_decoding_prefix: Optional generation prefix to append to the prompt. + ll_tokenizer: Pre-built ``llguidance.LLTokenizer``. Only used when the request + uses constrained decoding; if not provided, one is constructed from + ``tokenizer``. Pass an existing instance to avoid the construction cost. Returns: Tuple of ``(generate_input, other_input)`` where ``generate_input`` contains @@ -137,12 +202,12 @@ def chat_completion_request_to_transformers_inputs( additional parameters for ``generate_with_transformers``. Raises: - ImportError: If ``torch``, ``transformers``, or ``xgrammar`` packages + ImportError: If ``torch``, ``transformers``, or ``llguidance`` packages are not installed (the latter only when constrained decoding is used). TypeError: If ``tokenizer.apply_chat_template()`` returns an unexpected type. ValueError: If padding or end-of-sequence token IDs cannot be determined from the tokenizer, or if a constrained-decoding request is made - without passing a ``tokenizer`` or ``model`` argument. + without the required ``tokenizer``/``ll_tokenizer``/``model`` arguments. """ with import_optional("torch"): # Third Party @@ -191,7 +256,8 @@ def chat_completion_request_to_transformers_inputs( # generate() will fail with many different creative error messages if tokens aren't # on the right device. - input_tokens = input_tokens.to(model.device) # type: ignore[union-attr] + if model is not None: + input_tokens = input_tokens.to(model.device) generate_input["input_tokens"] = input_tokens # The generate() method sometimes needs to know what is the integer ID @@ -234,38 +300,40 @@ def chat_completion_request_to_transformers_inputs( ): # Constrained decoding in Hugging Face requires using a third-party library # to create a callback function to be invoked from inside generate() - with import_optional("xgrammar"): + with import_optional("llguidance"): # Third Party - import xgrammar as xgr # type: ignore[import-not-found] - if tokenizer is None: - raise ValueError( - "Request specifies constrained decoding, but no " - "tokenizer object was passed to this function." - ) - if model is None: + import llguidance + import llguidance.hf + if tokenizer is None and ll_tokenizer is None: raise ValueError( - "Request specifies constrained decoding, but no " - "tokenizer object was passed to this function." + "Request specifies constrained decoding, but neither a " + "tokenizer nor an ll_tokenizer was passed to this function." ) - # Different parts of a Hugging Face model will have different opinions about - # the number of tokens in the tokenizer's vocabulary, because of course they do. - # Gather together all the possibilities and pick the biggest one. - vocab_size = max(tokenizer.vocab_size, len(tokenizer), model.vocab_size) - - tokenizer_info = xgr.TokenizerInfo.from_huggingface( - tokenizer, vocab_size=vocab_size - ) - grammar_compiler = xgr.GrammarCompiler(tokenizer_info) - compiled_grammar = grammar_compiler.compile_json_schema( + if ll_tokenizer is None: + # HF model components disagree on vocab size (resized embeddings, added + # special tokens, etc.). Pass the maximum so the bitmask covers every + # token id the model can emit. llguidance defaults to the tokenizer's + # value when n_vocab is None, which can be smaller than model.vocab_size. + n_vocab = max(tokenizer.vocab_size, len(tokenizer)) # type: ignore[union-attr,arg-type] + if model is not None: + n_vocab = max(n_vocab, model.vocab_size) + ll_tokenizer = llguidance.hf.from_tokenizer(tokenizer, n_vocab=n_vocab) # type: ignore[arg-type] + + grammar = llguidance.LLMatcher.grammar_from_json_schema( request["extra_body"]["structured_outputs"]["json"] ) - logits_processor = xgr.contrib.hf.LogitsProcessor(compiled_grammar) + logits_processor = _GuidanceLogitsProcessor(grammar, ll_tokenizer) # The "logits_processor" argument to generate() must be a list. generate_input["logits_processor"] = [logits_processor] # type: ignore[assignment] if constrained_decoding_prefix is not None: + if tokenizer is None or model is None: + raise ValueError( + "constrained_decoding_prefix requires both a tokenizer " + "and a model to be passed to this function." + ) # Some models generate boilerplate before getting to the place where the # logits processor should activate. Append that boilerplate to the prompt, # since the logits processor we just created will diff --git a/pyproject.toml b/pyproject.toml index 35823fb9c..652124920 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,6 @@ hf = [ "peft>=0.18.1", # Native aLoRA support added in PEFT 0.18.0 "transformers>=4.53.2,<5", "trl==0.19.1", - "xgrammar==0.1.33", # Necessary for granite_common intrinsics. Pinned due to Issue 990. "huggingface-hub>=0.33.4", ] diff --git a/uv.lock b/uv.lock index cfd231228..5985c9612 100644 --- a/uv.lock +++ b/uv.lock @@ -3387,7 +3387,6 @@ all = [ { name = "trl" }, { name = "typer" }, { name = "uvicorn" }, - { name = "xgrammar" }, ] backends = [ { name = "accelerate" }, @@ -3401,7 +3400,6 @@ backends = [ { name = "peft" }, { name = "transformers" }, { name = "trl" }, - { name = "xgrammar" }, ] cli = [ { name = "typer" }, @@ -3424,7 +3422,6 @@ hf = [ { name = "peft" }, { name = "transformers" }, { name = "trl" }, - { name = "xgrammar" }, ] hooks = [ { name = "cpex" }, @@ -3584,7 +3581,6 @@ requires-dist = [ { name = "trl", marker = "extra == 'hf'", specifier = "==0.19.1" }, { name = "typer", marker = "extra == 'cli'" }, { name = "uvicorn", marker = "extra == 'server'" }, - { name = "xgrammar", marker = "extra == 'hf'", specifier = "==0.1.33" }, ] provides-extras = ["hf", "litellm", "watsonx", "tools", "telemetry", "docling", "granite-retriever", "cli", "server", "sandbox", "switch", "backends", "hooks", "all"] @@ -8023,47 +8019,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/c7/8528ac2dfa2c1e6708f647df7ae144ead13f0a31146f43c7264b4942bf12/wrapt-2.1.2-py3-none-any.whl", hash = "sha256:b8fd6fa2b2c4e7621808f8c62e8317f4aae56e59721ad933bac5239d913cf0e8", size = 43993, upload-time = "2026-03-06T02:53:12.905Z" }, ] -[[package]] -name = "xgrammar" -version = "0.1.33" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "pydantic", version = "2.12.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' and python_full_version < '3.14'" }, - { name = "pydantic", version = "2.13.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12' or python_full_version >= '3.14'" }, - { name = "torch" }, - { name = "transformers" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/db/43/e5dfddb1d2a4fccf3e3a88f103e88698cdefc3182f4e169a359ffe1c1794/xgrammar-0.1.33.tar.gz", hash = "sha256:8dbe5fc3d76651ab1fac7a68fc2a118b885fa0ec7189927fb6e0dce0081aea99", size = 2398956, upload-time = "2026-03-27T10:16:36.582Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/df/695172c6e16e3145ebeffadf7045d1b43d874990da19c7519b01c49ef45a/xgrammar-0.1.33-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:b5f0bbabe128ff5c985b77b4315d25f19a3c7247a0847708a4a484ae6214041a", size = 22766437, upload-time = "2026-03-27T10:14:49.587Z" }, - { url = "https://files.pythonhosted.org/packages/a0/de/14ab62bfa6035d0ad276f10f0795fb957cfafb0e3ebc77e87ef36befc461/xgrammar-0.1.33-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:08c7befefb38c89bf368c26a8e75d00204f03fc303eed61ca570dd3f568d9ead", size = 22703373, upload-time = "2026-03-27T10:14:53.501Z" }, - { url = "https://files.pythonhosted.org/packages/4b/16/f8297e0e3b468636d8e0190002badfe4a6d8d1c2af295fea2d164e7b5a8a/xgrammar-0.1.33-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a5f561e676df8c9e941c7a2f6df9612bbf645bf1fc714b4a9282cf75cff532f8", size = 42132308, upload-time = "2026-03-27T10:14:58.545Z" }, - { url = "https://files.pythonhosted.org/packages/12/e0/629b892a3810446097635dd1be7e4d977107c42232efb229d70e5c827227/xgrammar-0.1.33-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bc9151d9f0d05862c253998c533f04c000273f57180fb6a4e3623e321fd47db", size = 42204526, upload-time = "2026-03-27T10:15:03.299Z" }, - { url = "https://files.pythonhosted.org/packages/29/f5/aee458b54919ef989ca21d4a39721a51b8a3cba37614148b863968ef5c8c/xgrammar-0.1.33-cp311-cp311-win_amd64.whl", hash = "sha256:27f0cf751b9130805c7db745a7abb86f05228d58523d8388b0b970cada6dee0a", size = 7222644, upload-time = "2026-03-27T10:15:06.366Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5f/9a1ebc9505392ff626b9ba8fca54d46bdba454af80551169676ee5cd27d4/xgrammar-0.1.33-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:16d05f8f9df9852f2055e112adf2ce22440062979bc3dc66869a0b7c2f93eb0a", size = 22765695, upload-time = "2026-03-27T10:15:09.38Z" }, - { url = "https://files.pythonhosted.org/packages/82/82/7081feb505873238583a003f790b10ce84d66ab3a8e8e244e8c1c4729d70/xgrammar-0.1.33-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bb9e0ed6748b8e82d4569be4ebc8c9e60694369295c4fed0ebaa4bab4aa4eb4", size = 22702262, upload-time = "2026-03-27T10:15:12.9Z" }, - { url = "https://files.pythonhosted.org/packages/4e/04/43d4baca876f5ae1b45897ec30a59801a2da37f16da1fcd85f9555e4c125/xgrammar-0.1.33-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c803e60d791854c5d1f271ece7e1f34d73c82dd4a8b2a06b7af5331482a78ac", size = 42133168, upload-time = "2026-03-27T10:15:16.994Z" }, - { url = "https://files.pythonhosted.org/packages/f0/a8/672833a3cff027253793aa999401d8364896ebf396967e475c7a878b895f/xgrammar-0.1.33-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b8eaa533282a0efb0835db6998ae72e7b3c7875d7a52e360ffebff9b78c30a", size = 42205803, upload-time = "2026-03-27T10:15:21.599Z" }, - { url = "https://files.pythonhosted.org/packages/04/38/3fd9f21b101871b4b7f86ee2e15fe6d0cb61a3753f18b391bdee22c74810/xgrammar-0.1.33-cp312-cp312-win_amd64.whl", hash = "sha256:94fea66b41feb28be7e91f95f078986cbc850f42f7adb2d8987634eadf1fb94b", size = 7222161, upload-time = "2026-03-27T10:15:24.636Z" }, - { url = "https://files.pythonhosted.org/packages/dc/b1/cce9f6d12b9de0db8b86401ea739fe79ac555f3da56e47faa5b874d41e42/xgrammar-0.1.33-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e5b46b922fb04fd1848198da5273ddc20f16693fba5871bac1837f1c90f59584", size = 22702353, upload-time = "2026-03-27T10:15:27.203Z" }, - { url = "https://files.pythonhosted.org/packages/6b/55/4d186d4065f645a051be992919c51aaf96cfa8a32f7ecc8512a6e41f969f/xgrammar-0.1.33-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7eec984a20fd54d4c79536d99e2515bac54bd4e1380162fa047f5ff45bdf6d8", size = 42133430, upload-time = "2026-03-27T10:15:31.409Z" }, - { url = "https://files.pythonhosted.org/packages/2b/ca/db765035b3bb1854bdb833c118e0f09dacc623ce5e867466d63610d635fa/xgrammar-0.1.33-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d705f62d91a3675997a81d09aa371c375d7793ce1021aff7b7ed5a92021c7379", size = 42206830, upload-time = "2026-03-27T10:15:35.574Z" }, - { url = "https://files.pythonhosted.org/packages/f5/17/635fc8933b35f24d0749fe177209abb5b526c99a2d098abb71c0e601f356/xgrammar-0.1.33-cp313-cp313-win_amd64.whl", hash = "sha256:2c626de8f503858efa28cab099cbb1719c4926af4250e8dea8efddfa2c6b6c91", size = 7222102, upload-time = "2026-03-27T10:15:38.617Z" }, - { url = "https://files.pythonhosted.org/packages/0e/fd/ec456e86c2d3f1c01addac8123765ee3b77c1aa1c62298f0f0fdf57d5499/xgrammar-0.1.33-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:59b9c3bb8cc0c5c82155cdc4ff0fd9f7b82348346d9e11307d5df2d3cc81eb3b", size = 22765922, upload-time = "2026-03-27T10:15:41.617Z" }, - { url = "https://files.pythonhosted.org/packages/ac/24/8afe2a6dd42f1d1ac2f702c9fdc757e724f4af4c6eae5508e063acfb70a9/xgrammar-0.1.33-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a55912a1d7dcd3d2134617083c28cffe97ba4eaafff39bd41ca28a5ffda66073", size = 22702579, upload-time = "2026-03-27T10:15:45.14Z" }, - { url = "https://files.pythonhosted.org/packages/b0/79/8fbd675aa49b180d0912aeb90fa72dca9bb1f476724f76d3097561cca161/xgrammar-0.1.33-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bba458ffe06b3774be3a24eaf58dc217eec3a781ba41340c2eecf76aa9347aa3", size = 42133038, upload-time = "2026-03-27T10:15:48.98Z" }, - { url = "https://files.pythonhosted.org/packages/fa/c5/64558fd11130624267f788be5d665f898f627b87c6916b523c6e0d4cebf9/xgrammar-0.1.33-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:776a15eaadda463987fba97d8a07b60c262c96353d800fc8639efedb57b7cbbb", size = 42206382, upload-time = "2026-03-27T10:15:53.458Z" }, - { url = "https://files.pythonhosted.org/packages/9c/37/921445f60d8e8cba7caa006d7583e8a059b07e6d7eb5c9dc150c2219415a/xgrammar-0.1.33-cp314-cp314-win_amd64.whl", hash = "sha256:247dec78e11f5c361f7f3f2bc571574c118fc88045bf13fc792b6432fc32bc68", size = 7312992, upload-time = "2026-03-27T10:15:56.274Z" }, - { url = "https://files.pythonhosted.org/packages/a2/31/fb51cd12733e53c3832f6cfadf5f958414102c80c477beb8b233bd4324f4/xgrammar-0.1.33-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:915451b9c86c840e44500782696650ef5535eab87662d748d649f0569d84c322", size = 22768311, upload-time = "2026-03-27T10:15:59.231Z" }, - { url = "https://files.pythonhosted.org/packages/6e/1e/f3de2066ac889a8b4f4589b617364a07a46aea40097c8e0abd62b61f72a4/xgrammar-0.1.33-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c98e2c89bb5efa31e5f3648159dc3477dde51bbf3da8138d6b6fdc49c8fddd44", size = 22705773, upload-time = "2026-03-27T10:16:02.918Z" }, - { url = "https://files.pythonhosted.org/packages/70/fb/523113e066b74428b843e66baed815671faa1dd366a2967b687498aa8cba/xgrammar-0.1.33-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51e52aef50c2d91122a23ce67f7b187fc6caffa620b7412fd3a5eebb00a29377", size = 42134611, upload-time = "2026-03-27T10:16:07.459Z" }, - { url = "https://files.pythonhosted.org/packages/c5/07/6ea6bf8efff3c29c07f594f1e8665dc3ed43abdad86a6a27da9a3ddcbbef/xgrammar-0.1.33-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24cbb91580da8ac6c86de0464339c1ca1899fb0032d604175bdb384c1a13b9b7", size = 42197758, upload-time = "2026-03-27T10:16:12.504Z" }, - { url = "https://files.pythonhosted.org/packages/e3/05/813842e384723c636ad61b6902117dd689d8c04a8d34e0da91b35fbb9f8b/xgrammar-0.1.33-cp314-cp314t-win_amd64.whl", hash = "sha256:ae8877dc35cbdf07b23ce5584e093cca36ed72ad35798d4ce6ed858cd3b19fea", size = 7317563, upload-time = "2026-03-27T10:16:15.774Z" }, -] - [[package]] name = "xlsxwriter" version = "3.2.9"