Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 26 additions & 28 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ GEM
rake (13.3.1)
rake-compiler (1.3.1)
rake
rake-compiler-dock (1.11.0)
rb_sys (0.9.124)
rake-compiler-dock (= 1.11.0)
rake-compiler-dock (1.12.0)
rb_sys (0.9.127)
rake-compiler-dock (= 1.12.0)
regexp_parser (2.11.3)
rspec (3.13.2)
rspec-core (~> 3.13.0)
Expand Down
4 changes: 2 additions & 2 deletions ext/tiktoken_ruby/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ crate-type = ["cdylib"]

[dependencies]
magnus = { version = "0.8.2" }
rb-sys = { version = "0.9.124", features = ["stable-api-compiled-fallback"] }
tiktoken-rs = { version = "0.9.0" }
rb-sys = { version = "0.9.127", features = ["stable-api-compiled-fallback"] }
tiktoken-rs = { version = "0.11.0" }
2 changes: 1 addition & 1 deletion ext/tiktoken_ruby/src/core_bpe_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ unsafe extern "C" fn encode_special_without_gvl(data: *mut c_void) -> *mut c_voi
unsafe extern "C" fn decode_without_gvl(data: *mut c_void) -> *mut c_void {
let data = &mut *(data as *mut DecodeData);
let core_bpe = &*data.core_bpe;
data.result = core_bpe.decode(std::mem::take(&mut data.ids)).map_err(|e| e.to_string());
data.result = core_bpe.decode(&data.ids).map_err(|e| e.to_string());
std::ptr::null_mut()
}

Expand Down
3 changes: 3 additions & 0 deletions lib/tiktoken_ruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def list_model_names
o3: "o200k_base",
"o4-mini": "o200k_base",
# chat
"gpt-5": "o200k_base",
"gpt-4.1": "o200k_base",
"chatgpt-4o": "o200k_base",
"gpt-4o": "o200k_base",
Expand Down Expand Up @@ -136,7 +137,9 @@ def list_model_names
"o3-": "o200k_base",
"o4-": "o200k_base",
# chat
"gpt-5.": "o200k_base", # e.g., gpt-5.4, gpt-5.4-mini, gpt-5.3-codex, gpt-5.2-pro
"gpt-5-": "o200k_base",
"codex-mini": "o200k_base", # e.g., codex-mini, codex-mini-latest
"gpt-4.5-": "o200k_base",
"gpt-4.1-": "o200k_base",
"chatgpt-4o-": "o200k_base",
Expand Down
15 changes: 15 additions & 0 deletions spec/tiktoken_ruby_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
expect(Tiktoken.encoding_for_model("o3")).to be_a(Tiktoken::Encoding)
end

{
"gpt-5" => :o200k_base,
"gpt-5-mini" => :o200k_base,
"gpt-5.4" => :o200k_base,
"gpt-5.4-mini" => :o200k_base,
"gpt-5.3-codex" => :o200k_base,
"gpt-5.2-pro" => :o200k_base,
"codex-mini" => :o200k_base,
"codex-mini-latest" => :o200k_base
}.each do |model, expected|
it "maps #{model} to #{expected}" do
expect(Tiktoken.encoding_for_model(model)).to eq(Tiktoken.get_encoding(expected))
end
end

it "fails gracefully when getting an encoding for an unknown model" do
expect(Tiktoken.encoding_for_model("bad-model-name")).to be_nil
end
Expand Down
Loading