From 4e84accc6a52bbe6728ee1730abfb7c1766157a0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 10:20:12 +0000 Subject: [PATCH 1/4] Bump the cargo group across 1 directory with 2 updates Bumps the cargo group with 2 updates in the / directory: [rb-sys](https://github.com/oxidize-rb/rb-sys) and [tiktoken-rs](https://github.com/zurawiki/tiktoken-rs). Updates `rb-sys` from 0.9.124 to 0.9.127 - [Release notes](https://github.com/oxidize-rb/rb-sys/releases) - [Commits](https://github.com/oxidize-rb/rb-sys/compare/v0.9.124...v0.9.127) Updates `tiktoken-rs` from 0.9.1 to 0.11.0 - [Release notes](https://github.com/zurawiki/tiktoken-rs/releases) - [Commits](https://github.com/zurawiki/tiktoken-rs/compare/v0.9.1...v0.11.0) --- updated-dependencies: - dependency-name: rb-sys dependency-version: 0.9.127 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: cargo - dependency-name: tiktoken-rs dependency-version: 0.11.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: cargo ... Signed-off-by: dependabot[bot] --- Cargo.lock | 54 +++++++++++++++++------------------- ext/tiktoken_ruby/Cargo.toml | 4 +-- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7cacffe..cbfdcfd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "base64" @@ -25,38 +25,36 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "bindgen" -version = "0.69.5" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ "bitflags", "cexpr", "clang-sys", "itertools", - "lazy_static", - "lazycell", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.2", "shlex", "syn", ] [[package]] name = "bit-set" -version = "0.5.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" -version = "0.6.3" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" @@ -109,9 +107,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "fancy-regex" -version = "0.13.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" dependencies = [ "bit-set", "regex-automata", @@ -139,12 +137,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libc" version = "0.2.177" @@ -226,18 +218,18 @@ dependencies = [ [[package]] name = "rb-sys" -version = "0.9.124" +version = "0.9.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85c4188462601e2aa1469def389c17228566f82ea72f137ed096f21591bc489" +checksum = "d7d7c9560fe42dcffa576941394075f18a17dce89fcf718a2fa90b7dc2134d12" dependencies = [ "rb-sys-build", ] [[package]] name = "rb-sys-build" -version = "0.9.124" +version = "0.9.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568068db4102230882e6d4ae8de6632e224ca75fe5970f6e026a04e91ed635d3" +checksum = "f1688e8f32967ba48c89e4dfa283b57f901075f542fc7ee9c3d7c5f9091ca1d9" dependencies = [ "bindgen", "lazy_static", @@ -256,9 +248,9 @@ checksum = "08f8d2924cf136a1315e2b4c7460a39f62ef11ee5d522df9b2750fab55b868b6" [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -289,6 +281,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + [[package]] name = "seq-macro" version = "0.3.6" @@ -349,9 +347,9 @@ dependencies = [ [[package]] name = "tiktoken-rs" -version = "0.9.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d" +checksum = "fac4a168cfc1d8ed65bf17a6ee0843ad9a68f863c63c0fb2fa7eab67838782ee" dependencies = [ "anyhow", "base64", @@ -359,7 +357,7 @@ dependencies = [ "fancy-regex", "lazy_static", "regex", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] diff --git a/ext/tiktoken_ruby/Cargo.toml b/ext/tiktoken_ruby/Cargo.toml index 604c8f5..004fea7 100644 --- a/ext/tiktoken_ruby/Cargo.toml +++ b/ext/tiktoken_ruby/Cargo.toml @@ -11,5 +11,5 @@ crate-type = ["cdylib"] [dependencies] magnus = { version = "0.8.2" } -rb-sys = { version = "0.9.124", features = ["stable-api-compiled-fallback"] } -tiktoken-rs = { version = "0.9.0" } +rb-sys = { version = "0.9.127", features = ["stable-api-compiled-fallback"] } +tiktoken-rs = { version = "0.11.0" } From 6e201b0a8e3ac830d6ad9188de0a0c97bfefa14f Mon Sep 17 00:00:00 2001 From: "Garen J. Torikian" Date: Fri, 1 May 2026 09:35:48 -0400 Subject: [PATCH 2/4] Bump rb_sys gem to 0.9.127 The cargo group bump pulled in tiktoken-rs 0.11.0, whose Cargo metadata output trips the YAML-based parser in rb_sys 0.9.124 (`metadata must be a Hash`). rb_sys 0.9.125+ switched to JSON.parse and 0.9.127 ensures UTF-8 encoding, fixing `rake build` on CI. Co-Authored-By: Claude Opus 4.7 (1M context) --- Gemfile.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index a6ffceb..5c4b58f 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -24,9 +24,9 @@ GEM rake (13.3.1) rake-compiler (1.3.1) rake - rake-compiler-dock (1.11.0) - rb_sys (0.9.124) - rake-compiler-dock (= 1.11.0) + rake-compiler-dock (1.12.0) + rb_sys (0.9.127) + rake-compiler-dock (= 1.12.0) regexp_parser (2.11.3) rspec (3.13.2) rspec-core (~> 3.13.0) From cd51302c15f76c515efc32d9bf26929a37453886 Mon Sep 17 00:00:00 2001 From: "Garen J. Torikian" Date: Fri, 1 May 2026 09:47:21 -0400 Subject: [PATCH 3/4] Adapt to tiktoken-rs 0.10+ decode signature tiktoken-rs 0.10 changed CoreBPE::decode to take &[Rank] instead of Vec (zurawiki/tiktoken-rs#146). Pass a slice borrow instead of moving the Vec out via mem::take. Co-Authored-By: Claude Opus 4.7 (1M context) --- ext/tiktoken_ruby/src/core_bpe_wrapper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/tiktoken_ruby/src/core_bpe_wrapper.rs b/ext/tiktoken_ruby/src/core_bpe_wrapper.rs index ff14a4d..c054ff6 100644 --- a/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +++ b/ext/tiktoken_ruby/src/core_bpe_wrapper.rs @@ -60,7 +60,7 @@ unsafe extern "C" fn encode_special_without_gvl(data: *mut c_void) -> *mut c_voi unsafe extern "C" fn decode_without_gvl(data: *mut c_void) -> *mut c_void { let data = &mut *(data as *mut DecodeData); let core_bpe = &*data.core_bpe; - data.result = core_bpe.decode(std::mem::take(&mut data.ids)).map_err(|e| e.to_string()); + data.result = core_bpe.decode(&data.ids).map_err(|e| e.to_string()); std::ptr::null_mut() } From 757a8dc66dcddb5535ff413ac6e62f8cf23f5dfc Mon Sep 17 00:00:00 2001 From: "Garen J. Torikian" Date: Fri, 1 May 2026 09:51:37 -0400 Subject: [PATCH 4/4] Add gpt-5/gpt-5.x and codex-mini model mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror tiktoken-rs 0.11.0 additions (zurawiki/tiktoken-rs#156, #159): exact `gpt-5`, prefix `gpt-5.` (covers gpt-5.4, gpt-5.4-mini, gpt-5.3-codex, gpt-5.2-pro, etc.), and `codex-mini` prefix (codex-mini, codex-mini-latest). All resolve to o200k_base. Context-size fixes upstream (o1-mini/chatgpt-4o/gpt-4.5) don't apply here — this gem only exposes encoding lookup, not get_context_size. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/tiktoken_ruby.rb | 3 +++ spec/tiktoken_ruby_spec.rb | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/lib/tiktoken_ruby.rb b/lib/tiktoken_ruby.rb index 31c5465..0b1db52 100644 --- a/lib/tiktoken_ruby.rb +++ b/lib/tiktoken_ruby.rb @@ -79,6 +79,7 @@ def list_model_names o3: "o200k_base", "o4-mini": "o200k_base", # chat + "gpt-5": "o200k_base", "gpt-4.1": "o200k_base", "chatgpt-4o": "o200k_base", "gpt-4o": "o200k_base", @@ -136,7 +137,9 @@ def list_model_names "o3-": "o200k_base", "o4-": "o200k_base", # chat + "gpt-5.": "o200k_base", # e.g., gpt-5.4, gpt-5.4-mini, gpt-5.3-codex, gpt-5.2-pro "gpt-5-": "o200k_base", + "codex-mini": "o200k_base", # e.g., codex-mini, codex-mini-latest "gpt-4.5-": "o200k_base", "gpt-4.1-": "o200k_base", "chatgpt-4o-": "o200k_base", diff --git a/spec/tiktoken_ruby_spec.rb b/spec/tiktoken_ruby_spec.rb index cdf7f7f..e0c7b18 100644 --- a/spec/tiktoken_ruby_spec.rb +++ b/spec/tiktoken_ruby_spec.rb @@ -21,6 +21,21 @@ expect(Tiktoken.encoding_for_model("o3")).to be_a(Tiktoken::Encoding) end + { + "gpt-5" => :o200k_base, + "gpt-5-mini" => :o200k_base, + "gpt-5.4" => :o200k_base, + "gpt-5.4-mini" => :o200k_base, + "gpt-5.3-codex" => :o200k_base, + "gpt-5.2-pro" => :o200k_base, + "codex-mini" => :o200k_base, + "codex-mini-latest" => :o200k_base + }.each do |model, expected| + it "maps #{model} to #{expected}" do + expect(Tiktoken.encoding_for_model(model)).to eq(Tiktoken.get_encoding(expected)) + end + end + it "fails gracefully when getting an encoding for an unknown model" do expect(Tiktoken.encoding_for_model("bad-model-name")).to be_nil end