From 07fd5b5540bf22e6e240e1c99d673b9ba4f43d3e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 19:42:26 +0000 Subject: [PATCH 1/5] Initial plan From 005f3e1c0482efce317aee6b19fe29f28ab0378a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 20:20:25 +0000 Subject: [PATCH 2/5] Add test case for Unicode special case mappings in intrinsic types Agent-Logs-Url: https://github.com/microsoft/typescript-go/sessions/7ac37be7-07a1-49c6-be87-36aa6ffa208f Co-authored-by: jakebailey <5341706+jakebailey@users.noreply.github.com> --- ...rinsicTypesUnicodeSpecialCasing.errors.txt | 63 +++++++++++++++ .../intrinsicTypesUnicodeSpecialCasing.js | 53 +++++++++++++ ...intrinsicTypesUnicodeSpecialCasing.symbols | 79 +++++++++++++++++++ .../intrinsicTypesUnicodeSpecialCasing.types | 71 +++++++++++++++++ .../intrinsicTypesUnicodeSpecialCasing.ts | 37 +++++++++ 5 files changed, 303 insertions(+) create mode 100644 testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt create mode 100644 testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js create mode 100644 testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols create mode 100644 testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types create mode 100644 testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt new file mode 100644 index 0000000000..6efc0a0ec4 --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt @@ -0,0 +1,63 @@ +intrinsicTypesUnicodeSpecialCasing.ts(7,7): error TS2322: Type '"SS"' is not assignable to type '"ß"'. +intrinsicTypesUnicodeSpecialCasing.ts(11,7): error TS2322: Type '"i̇"' is not assignable to type '"i"'. +intrinsicTypesUnicodeSpecialCasing.ts(15,7): error TS2322: Type '"FI"' is not assignable to type '"fi"'. +intrinsicTypesUnicodeSpecialCasing.ts(19,7): error TS2322: Type '"FL"' is not assignable to type '"fl"'. +intrinsicTypesUnicodeSpecialCasing.ts(23,7): error TS2322: Type '"FF"' is not assignable to type '"ff"'. +intrinsicTypesUnicodeSpecialCasing.ts(27,7): error TS2322: Type '"SStest"' is not assignable to type '"ßtest"'. +intrinsicTypesUnicodeSpecialCasing.ts(31,7): error TS2322: Type '"i̇SPANYOL"' is not assignable to type '"iSPANYOL"'. +intrinsicTypesUnicodeSpecialCasing.ts(35,7): error TS2322: Type '"STRASSE"' is not assignable to type '"STRAßE"'. + + +==== intrinsicTypesUnicodeSpecialCasing.ts (8 errors) ==== + // Test Unicode special case mappings for intrinsic string types + // These characters have 1:many case mappings that Go's strings.ToUpper/ToLower + // don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + + // ß (U+00DF) uppercases to "SS" in JavaScript + type T1 = Uppercase<"ß">; + const t1: T1 = "SS"; + ~~ +!!! error TS2322: Type '"SS"' is not assignable to type '"ß"'. + + // İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript + type T2 = Lowercase<"İ">; + const t2: T2 = "i\u0307"; + ~~ +!!! error TS2322: Type '"i̇"' is not assignable to type '"i"'. + + // Ligatures: fi (U+FB01) uppercases to "FI" + type T3 = Uppercase<"fi">; + const t3: T3 = "FI"; + ~~ +!!! error TS2322: Type '"FI"' is not assignable to type '"fi"'. + + // fl (U+FB02) uppercases to "FL" + type T4 = Uppercase<"fl">; + const t4: T4 = "FL"; + ~~ +!!! error TS2322: Type '"FL"' is not assignable to type '"fl"'. + + // ff (U+FB00) uppercases to "FF" + type T5 = Uppercase<"ff">; + const t5: T5 = "FF"; + ~~ +!!! error TS2322: Type '"FF"' is not assignable to type '"ff"'. + + // Capitalize should only affect first character + type T6 = Capitalize<"ßtest">; + const t6: T6 = "SStest"; + ~~ +!!! error TS2322: Type '"SStest"' is not assignable to type '"ßtest"'. + + // Uncapitalize with İ + type T7 = Uncapitalize<"İSPANYOL">; + const t7: T7 = "i\u0307SPANYOL"; + ~~ +!!! error TS2322: Type '"i̇SPANYOL"' is not assignable to type '"iSPANYOL"'. + + // Mixed string with special characters + type T8 = Uppercase<"straße">; + const t8: T8 = "STRASSE"; + ~~ +!!! error TS2322: Type '"STRASSE"' is not assignable to type '"STRAßE"'. + \ No newline at end of file diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js new file mode 100644 index 0000000000..e820c17a57 --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js @@ -0,0 +1,53 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +//// [intrinsicTypesUnicodeSpecialCasing.ts] +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +const t1: T1 = "SS"; + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +const t2: T2 = "i\u0307"; + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +const t3: T3 = "FI"; + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +const t4: T4 = "FL"; + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +const t5: T5 = "FF"; + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +const t6: T6 = "SStest"; + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +const t7: T7 = "i\u0307SPANYOL"; + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +const t8: T8 = "STRASSE"; + + +//// [intrinsicTypesUnicodeSpecialCasing.js] +"use strict"; +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. +const t1 = "SS"; +const t2 = "i\u0307"; +const t3 = "FI"; +const t4 = "FL"; +const t5 = "FF"; +const t6 = "SStest"; +const t7 = "i\u0307SPANYOL"; +const t8 = "STRASSE"; diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols new file mode 100644 index 0000000000..9ce7ca0f50 --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols @@ -0,0 +1,79 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +=== intrinsicTypesUnicodeSpecialCasing.ts === +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +>T1 : Symbol(T1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 0, 0)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t1: T1 = "SS"; +>t1 : Symbol(t1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 5)) +>T1 : Symbol(T1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 0, 0)) + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +>T2 : Symbol(T2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 20)) +>Lowercase : Symbol(Lowercase, Decl(lib.es5.d.ts, --, --)) + +const t2: T2 = "i\u0307"; +>t2 : Symbol(t2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 5)) +>T2 : Symbol(T2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 20)) + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +>T3 : Symbol(T3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 25)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t3: T3 = "FI"; +>t3 : Symbol(t3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 5)) +>T3 : Symbol(T3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 25)) + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +>T4 : Symbol(T4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 20)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t4: T4 = "FL"; +>t4 : Symbol(t4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 5)) +>T4 : Symbol(T4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 20)) + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +>T5 : Symbol(T5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 20)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t5: T5 = "FF"; +>t5 : Symbol(t5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 5)) +>T5 : Symbol(T5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 20)) + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +>T6 : Symbol(T6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 20)) +>Capitalize : Symbol(Capitalize, Decl(lib.es5.d.ts, --, --)) + +const t6: T6 = "SStest"; +>t6 : Symbol(t6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 5)) +>T6 : Symbol(T6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 20)) + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +>T7 : Symbol(T7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 24)) +>Uncapitalize : Symbol(Uncapitalize, Decl(lib.es5.d.ts, --, --)) + +const t7: T7 = "i\u0307SPANYOL"; +>t7 : Symbol(t7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 5)) +>T7 : Symbol(T7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 24)) + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +>T8 : Symbol(T8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 32)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t8: T8 = "STRASSE"; +>t8 : Symbol(t8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 34, 5)) +>T8 : Symbol(T8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 32)) + diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types new file mode 100644 index 0000000000..914c7b14cb --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types @@ -0,0 +1,71 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +=== intrinsicTypesUnicodeSpecialCasing.ts === +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +>T1 : "ß" + +const t1: T1 = "SS"; +>t1 : "ß" +>"SS" : "SS" + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +>T2 : "i" + +const t2: T2 = "i\u0307"; +>t2 : "i" +>"i\u0307" : "i̇" + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +>T3 : "fi" + +const t3: T3 = "FI"; +>t3 : "fi" +>"FI" : "FI" + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +>T4 : "fl" + +const t4: T4 = "FL"; +>t4 : "fl" +>"FL" : "FL" + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +>T5 : "ff" + +const t5: T5 = "FF"; +>t5 : "ff" +>"FF" : "FF" + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +>T6 : "ßtest" + +const t6: T6 = "SStest"; +>t6 : "ßtest" +>"SStest" : "SStest" + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +>T7 : "iSPANYOL" + +const t7: T7 = "i\u0307SPANYOL"; +>t7 : "iSPANYOL" +>"i\u0307SPANYOL" : "i̇SPANYOL" + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +>T8 : "STRAßE" + +const t8: T8 = "STRASSE"; +>t8 : "STRAßE" +>"STRASSE" : "STRASSE" + diff --git a/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts b/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts new file mode 100644 index 0000000000..d1a50f563f --- /dev/null +++ b/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts @@ -0,0 +1,37 @@ +// @strict: true + +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +const t1: T1 = "SS"; + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +const t2: T2 = "i\u0307"; + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +const t3: T3 = "FI"; + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +const t4: T4 = "FL"; + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +const t5: T5 = "FF"; + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +const t6: T6 = "SStest"; + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +const t7: T7 = "i\u0307SPANYOL"; + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +const t8: T8 = "STRASSE"; From 51f195607db9da2278e0e0e801a576bfff260e5c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 20:31:10 +0000 Subject: [PATCH 3/5] Fix Uppercase/Lowercase intrinsic types to apply Unicode special case mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement JavaScript-compatible full Unicode case mapping for intrinsic string types (Uppercase, Lowercase, Capitalize, Uncapitalize). Go's strings.ToUpper/ToLower use simple case mapping (1:1), while JavaScript's toUpperCase()/toLowerCase() use full Unicode case mapping from SpecialCasing.txt where a single character can map to multiple characters. Key examples fixed: - Uppercase<"ß"> now correctly resolves to "SS" (not "ß") - Lowercase<"İ"> now correctly resolves to "i̇" (not "i") - Uppercase<"fi"> now correctly resolves to "FI" (not "fi") Fixes #3489 Agent-Logs-Url: https://github.com/microsoft/typescript-go/sessions/7ac37be7-07a1-49c6-be87-36aa6ffa208f Co-authored-by: jakebailey <5341706+jakebailey@users.noreply.github.com> --- internal/checker/checker.go | 11 +- internal/checker/stringcase.go | 220 ++++++++++++++++++ ...rinsicTypesUnicodeSpecialCasing.errors.txt | 63 ----- .../intrinsicTypesUnicodeSpecialCasing.types | 32 +-- 4 files changed, 240 insertions(+), 86 deletions(-) create mode 100644 internal/checker/stringcase.go delete mode 100644 testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 020fe61193..995d9f1523 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -11,7 +11,6 @@ import ( "strings" "sync" "sync/atomic" - "unicode/utf8" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/binder" @@ -28873,15 +28872,13 @@ func (c *Checker) getStringMappingType(symbol *ast.Symbol, t *Type) *Type { func applyStringMapping(symbol *ast.Symbol, str string) string { switch intrinsicTypeKinds[symbol.Name] { case IntrinsicTypeKindUppercase: - return strings.ToUpper(str) + return toUpperCase(str) case IntrinsicTypeKindLowercase: - return strings.ToLower(str) + return toLowerCase(str) case IntrinsicTypeKindCapitalize: - _, size := utf8.DecodeRuneInString(str) - return strings.ToUpper(str[:size]) + str[size:] + return toUpperCaseFirstRune(str) case IntrinsicTypeKindUncapitalize: - _, size := utf8.DecodeRuneInString(str) - return strings.ToLower(str[:size]) + str[size:] + return toLowerCaseFirstRune(str) } return str } diff --git a/internal/checker/stringcase.go b/internal/checker/stringcase.go new file mode 100644 index 0000000000..bec2e0b5e5 --- /dev/null +++ b/internal/checker/stringcase.go @@ -0,0 +1,220 @@ +package checker + +import ( + "strings" + "unicode" + "unicode/utf8" +) + +// toUpperCase converts a string to uppercase using the full Unicode case mapping, +// matching JavaScript's String.prototype.toUpperCase() behavior. Unlike Go's +// strings.ToUpper which uses simple case mapping (1:1), this function handles +// special case mappings where a single character maps to multiple characters +// (e.g., 'ß' → "SS"). +func toUpperCase(s string) string { + // Fast path: check if any special casing characters exist + hasSpecial := false + for _, r := range s { + if _, ok := upperSpecialCasings[r]; ok { + hasSpecial = true + break + } + } + if !hasSpecial { + return strings.ToUpper(s) + } + + var b strings.Builder + b.Grow(len(s)) + for _, r := range s { + if mapped, ok := upperSpecialCasings[r]; ok { + b.WriteString(mapped) + } else { + b.WriteRune(unicode.ToUpper(r)) + } + } + return b.String() +} + +// toLowerCase converts a string to lowercase using the full Unicode case mapping, +// matching JavaScript's String.prototype.toLowerCase() behavior. +func toLowerCase(s string) string { + // Fast path: check if any special casing characters exist + hasSpecial := false + for _, r := range s { + if _, ok := lowerSpecialCasings[r]; ok { + hasSpecial = true + break + } + } + if !hasSpecial { + return strings.ToLower(s) + } + + var b strings.Builder + b.Grow(len(s)) + for _, r := range s { + if mapped, ok := lowerSpecialCasings[r]; ok { + b.WriteString(mapped) + } else { + b.WriteRune(unicode.ToLower(r)) + } + } + return b.String() +} + +// toUpperCaseFirstRune converts the first rune to uppercase using full Unicode +// case mapping, returning the result and the byte size of the original first rune. +func toUpperCaseFirstRune(s string) string { + r, size := utf8.DecodeRuneInString(s) + if r == utf8.RuneError { + return s + } + if mapped, ok := upperSpecialCasings[r]; ok { + return mapped + s[size:] + } + return strings.ToUpper(s[:size]) + s[size:] +} + +// toLowerCaseFirstRune converts the first rune to lowercase using full Unicode +// case mapping. +func toLowerCaseFirstRune(s string) string { + r, size := utf8.DecodeRuneInString(s) + if r == utf8.RuneError { + return s + } + if mapped, ok := lowerSpecialCasings[r]; ok { + return mapped + s[size:] + } + return strings.ToLower(s[:size]) + s[size:] +} + +// upperSpecialCasings contains unconditional special case mappings for toUpperCase +// from Unicode SpecialCasing.txt. These are cases where a single code point maps +// to multiple code points when uppercased, matching JavaScript's behavior. +var upperSpecialCasings = map[rune]string{ + // Latin + 0x00DF: "SS", // ß → SS + 0x0149: "\u02BCN", // ʼn → ʼN + 0x01F0: "J\u030C", // ǰ → J̌ + + // Greek + 0x0390: "\u0399\u0308\u0301", // ΐ → Ϊ́ + 0x03B0: "\u03A5\u0308\u0301", // ΰ → Ϋ́ + + // Armenian + 0x0587: "\u0535\u0552", // և → ԵՒ + + // Latin extended + 0x1E96: "H\u0331", // ẖ → H̱ + 0x1E97: "T\u0308", // ẗ → T̈ + 0x1E98: "W\u030A", // ẘ → W̊ + 0x1E99: "Y\u030A", // ẙ → Y̊ + 0x1E9A: "A\u02BE", // ẚ → Aʾ + + // Greek extended + 0x1F50: "\u03A5\u0313", // ὐ → Υ̓ + 0x1F52: "\u03A5\u0313\u0300", // ὒ → Υ̓̀ + 0x1F54: "\u03A5\u0313\u0301", // ὔ → Υ̓́ + 0x1F56: "\u03A5\u0313\u0342", // ὖ → Υ̓͂ + + // Greek extended - with iota subscript (prosgegrammeni) + 0x1F80: "\u1F08\u0399", // ᾀ + 0x1F81: "\u1F09\u0399", // ᾁ + 0x1F82: "\u1F0A\u0399", // ᾂ + 0x1F83: "\u1F0B\u0399", // ᾃ + 0x1F84: "\u1F0C\u0399", // ᾄ + 0x1F85: "\u1F0D\u0399", // ᾅ + 0x1F86: "\u1F0E\u0399", // ᾆ + 0x1F87: "\u1F0F\u0399", // ᾇ + 0x1F88: "\u1F08\u0399", // ᾈ + 0x1F89: "\u1F09\u0399", // ᾉ + 0x1F8A: "\u1F0A\u0399", // ᾊ + 0x1F8B: "\u1F0B\u0399", // ᾋ + 0x1F8C: "\u1F0C\u0399", // ᾌ + 0x1F8D: "\u1F0D\u0399", // ᾍ + 0x1F8E: "\u1F0E\u0399", // ᾎ + 0x1F8F: "\u1F0F\u0399", // ᾏ + 0x1F90: "\u1F28\u0399", // ᾐ + 0x1F91: "\u1F29\u0399", // ᾑ + 0x1F92: "\u1F2A\u0399", // ᾒ + 0x1F93: "\u1F2B\u0399", // ᾓ + 0x1F94: "\u1F2C\u0399", // ᾔ + 0x1F95: "\u1F2D\u0399", // ᾕ + 0x1F96: "\u1F2E\u0399", // ᾖ + 0x1F97: "\u1F2F\u0399", // ᾗ + 0x1F98: "\u1F28\u0399", // ᾘ + 0x1F99: "\u1F29\u0399", // ᾙ + 0x1F9A: "\u1F2A\u0399", // ᾚ + 0x1F9B: "\u1F2B\u0399", // ᾛ + 0x1F9C: "\u1F2C\u0399", // ᾜ + 0x1F9D: "\u1F2D\u0399", // ᾝ + 0x1F9E: "\u1F2E\u0399", // ᾞ + 0x1F9F: "\u1F2F\u0399", // ᾟ + 0x1FA0: "\u1F68\u0399", // ᾠ + 0x1FA1: "\u1F69\u0399", // ᾡ + 0x1FA2: "\u1F6A\u0399", // ᾢ + 0x1FA3: "\u1F6B\u0399", // ᾣ + 0x1FA4: "\u1F6C\u0399", // ᾤ + 0x1FA5: "\u1F6D\u0399", // ᾥ + 0x1FA6: "\u1F6E\u0399", // ᾦ + 0x1FA7: "\u1F6F\u0399", // ᾧ + 0x1FA8: "\u1F68\u0399", // ᾨ + 0x1FA9: "\u1F69\u0399", // ᾩ + 0x1FAA: "\u1F6A\u0399", // ᾪ + 0x1FAB: "\u1F6B\u0399", // ᾫ + 0x1FAC: "\u1F6C\u0399", // ᾬ + 0x1FAD: "\u1F6D\u0399", // ᾭ + 0x1FAE: "\u1F6E\u0399", // ᾮ + 0x1FAF: "\u1F6F\u0399", // ᾯ + 0x1FB2: "\u1FBA\u0399", // ᾲ + 0x1FB3: "\u0391\u0399", // ᾳ + 0x1FB4: "\u0386\u0399", // ᾴ + 0x1FB6: "\u0391\u0342", // ᾶ + 0x1FB7: "\u0391\u0342\u0399", // ᾷ + 0x1FBC: "\u0391\u0399", // ᾼ + 0x1FC2: "\u1FCA\u0399", // ῂ + 0x1FC3: "\u0397\u0399", // ῃ + 0x1FC4: "\u0389\u0399", // ῄ + 0x1FC6: "\u0397\u0342", // ῆ + 0x1FC7: "\u0397\u0342\u0399", // ῇ + 0x1FCC: "\u0397\u0399", // ῌ + 0x1FD2: "\u0399\u0308\u0300", // ῒ + 0x1FD3: "\u0399\u0308\u0301", // ΐ + 0x1FD6: "\u0399\u0342", // ῖ + 0x1FD7: "\u0399\u0308\u0342", // ῗ + 0x1FE2: "\u03A5\u0308\u0300", // ῢ + 0x1FE3: "\u03A5\u0308\u0301", // ΰ + 0x1FE4: "\u03A1\u0313", // ῤ + 0x1FE6: "\u03A5\u0342", // ῦ + 0x1FE7: "\u03A5\u0308\u0342", // ῧ + 0x1FF2: "\u1FFA\u0399", // ῲ + 0x1FF3: "\u03A9\u0399", // ῳ + 0x1FF4: "\u038F\u0399", // ῴ + 0x1FF6: "\u03A9\u0342", // ῶ + 0x1FF7: "\u03A9\u0342\u0399", // ῷ + 0x1FFC: "\u03A9\u0399", // ῼ + + // Latin ligatures + 0xFB00: "FF", // ff → FF + 0xFB01: "FI", // fi → FI + 0xFB02: "FL", // fl → FL + 0xFB03: "FFI", // ffi → FFI + 0xFB04: "FFL", // ffl → FFL + 0xFB05: "ST", // ſt → ST + 0xFB06: "ST", // st → ST + + // Armenian ligatures + 0xFB13: "\u0544\u0546", // ﬓ → ՄՆ + 0xFB14: "\u0544\u0535", // ﬔ → ՄԵ + 0xFB15: "\u0544\u053B", // ﬕ → ՄԻ + 0xFB16: "\u054E\u0546", // ﬖ → ՎՆ + 0xFB17: "\u0544\u053D", // ﬗ → ՄԽ +} + +// lowerSpecialCasings contains unconditional special case mappings for toLowerCase +// from Unicode SpecialCasing.txt. There is only one unconditional special lower-case +// mapping in Unicode. +var lowerSpecialCasings = map[rune]string{ + 0x0130: "i\u0307", // İ → i + combining dot above +} diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt deleted file mode 100644 index 6efc0a0ec4..0000000000 --- a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.errors.txt +++ /dev/null @@ -1,63 +0,0 @@ -intrinsicTypesUnicodeSpecialCasing.ts(7,7): error TS2322: Type '"SS"' is not assignable to type '"ß"'. -intrinsicTypesUnicodeSpecialCasing.ts(11,7): error TS2322: Type '"i̇"' is not assignable to type '"i"'. -intrinsicTypesUnicodeSpecialCasing.ts(15,7): error TS2322: Type '"FI"' is not assignable to type '"fi"'. -intrinsicTypesUnicodeSpecialCasing.ts(19,7): error TS2322: Type '"FL"' is not assignable to type '"fl"'. -intrinsicTypesUnicodeSpecialCasing.ts(23,7): error TS2322: Type '"FF"' is not assignable to type '"ff"'. -intrinsicTypesUnicodeSpecialCasing.ts(27,7): error TS2322: Type '"SStest"' is not assignable to type '"ßtest"'. -intrinsicTypesUnicodeSpecialCasing.ts(31,7): error TS2322: Type '"i̇SPANYOL"' is not assignable to type '"iSPANYOL"'. -intrinsicTypesUnicodeSpecialCasing.ts(35,7): error TS2322: Type '"STRASSE"' is not assignable to type '"STRAßE"'. - - -==== intrinsicTypesUnicodeSpecialCasing.ts (8 errors) ==== - // Test Unicode special case mappings for intrinsic string types - // These characters have 1:many case mappings that Go's strings.ToUpper/ToLower - // don't handle, but JavaScript's toUpperCase()/toLowerCase() do. - - // ß (U+00DF) uppercases to "SS" in JavaScript - type T1 = Uppercase<"ß">; - const t1: T1 = "SS"; - ~~ -!!! error TS2322: Type '"SS"' is not assignable to type '"ß"'. - - // İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript - type T2 = Lowercase<"İ">; - const t2: T2 = "i\u0307"; - ~~ -!!! error TS2322: Type '"i̇"' is not assignable to type '"i"'. - - // Ligatures: fi (U+FB01) uppercases to "FI" - type T3 = Uppercase<"fi">; - const t3: T3 = "FI"; - ~~ -!!! error TS2322: Type '"FI"' is not assignable to type '"fi"'. - - // fl (U+FB02) uppercases to "FL" - type T4 = Uppercase<"fl">; - const t4: T4 = "FL"; - ~~ -!!! error TS2322: Type '"FL"' is not assignable to type '"fl"'. - - // ff (U+FB00) uppercases to "FF" - type T5 = Uppercase<"ff">; - const t5: T5 = "FF"; - ~~ -!!! error TS2322: Type '"FF"' is not assignable to type '"ff"'. - - // Capitalize should only affect first character - type T6 = Capitalize<"ßtest">; - const t6: T6 = "SStest"; - ~~ -!!! error TS2322: Type '"SStest"' is not assignable to type '"ßtest"'. - - // Uncapitalize with İ - type T7 = Uncapitalize<"İSPANYOL">; - const t7: T7 = "i\u0307SPANYOL"; - ~~ -!!! error TS2322: Type '"i̇SPANYOL"' is not assignable to type '"iSPANYOL"'. - - // Mixed string with special characters - type T8 = Uppercase<"straße">; - const t8: T8 = "STRASSE"; - ~~ -!!! error TS2322: Type '"STRASSE"' is not assignable to type '"STRAßE"'. - \ No newline at end of file diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types index 914c7b14cb..e8b7342e2b 100644 --- a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types @@ -7,65 +7,65 @@ // ß (U+00DF) uppercases to "SS" in JavaScript type T1 = Uppercase<"ß">; ->T1 : "ß" +>T1 : "SS" const t1: T1 = "SS"; ->t1 : "ß" +>t1 : "SS" >"SS" : "SS" // İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript type T2 = Lowercase<"İ">; ->T2 : "i" +>T2 : "i̇" const t2: T2 = "i\u0307"; ->t2 : "i" +>t2 : "i̇" >"i\u0307" : "i̇" // Ligatures: fi (U+FB01) uppercases to "FI" type T3 = Uppercase<"fi">; ->T3 : "fi" +>T3 : "FI" const t3: T3 = "FI"; ->t3 : "fi" +>t3 : "FI" >"FI" : "FI" // fl (U+FB02) uppercases to "FL" type T4 = Uppercase<"fl">; ->T4 : "fl" +>T4 : "FL" const t4: T4 = "FL"; ->t4 : "fl" +>t4 : "FL" >"FL" : "FL" // ff (U+FB00) uppercases to "FF" type T5 = Uppercase<"ff">; ->T5 : "ff" +>T5 : "FF" const t5: T5 = "FF"; ->t5 : "ff" +>t5 : "FF" >"FF" : "FF" // Capitalize should only affect first character type T6 = Capitalize<"ßtest">; ->T6 : "ßtest" +>T6 : "SStest" const t6: T6 = "SStest"; ->t6 : "ßtest" +>t6 : "SStest" >"SStest" : "SStest" // Uncapitalize with İ type T7 = Uncapitalize<"İSPANYOL">; ->T7 : "iSPANYOL" +>T7 : "i̇SPANYOL" const t7: T7 = "i\u0307SPANYOL"; ->t7 : "iSPANYOL" +>t7 : "i̇SPANYOL" >"i\u0307SPANYOL" : "i̇SPANYOL" // Mixed string with special characters type T8 = Uppercase<"straße">; ->T8 : "STRAßE" +>T8 : "STRASSE" const t8: T8 = "STRASSE"; ->t8 : "STRAßE" +>t8 : "STRASSE" >"STRASSE" : "STRASSE" From e3ee0940b8eaf72b14259694bfad555bdd7059c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 21:04:03 +0000 Subject: [PATCH 4/5] Refactor stringcase.go to use golang.org/x/text/cases instead of hand-rolled table Replace the manually maintained Unicode SpecialCasing.txt table with golang.org/x/text/cases package (already a project dependency), which provides full Unicode case mapping that matches JavaScript's behavior and stays up to date automatically via the x/text module. Agent-Logs-Url: https://github.com/microsoft/typescript-go/sessions/47f91f8b-f932-4212-8285-794b2ee100e8 Co-authored-by: jakebailey <5341706+jakebailey@users.noreply.github.com> --- internal/checker/stringcase.go | 221 ++++----------------------------- 1 file changed, 27 insertions(+), 194 deletions(-) diff --git a/internal/checker/stringcase.go b/internal/checker/stringcase.go index bec2e0b5e5..caa5a00549 100644 --- a/internal/checker/stringcase.go +++ b/internal/checker/stringcase.go @@ -1,220 +1,53 @@ package checker import ( - "strings" - "unicode" "unicode/utf8" + + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +// upperCaser and lowerCaser use golang.org/x/text/cases with the "und" (undetermined) +// locale to perform full Unicode case mapping, matching JavaScript's +// String.prototype.toUpperCase() / toLowerCase() behavior. +// Unlike Go's strings.ToUpper/ToLower which use simple case mapping (1:1), +// these handle special case mappings where a single character maps to multiple +// characters (e.g., 'ß' → "SS", 'İ' → "i̇"). +// The mapping tables come from Unicode's SpecialCasing.txt and are kept up to +// date via the golang.org/x/text module. +var ( + upperCaser = cases.Upper(language.Und) + lowerCaser = cases.Lower(language.Und) ) // toUpperCase converts a string to uppercase using the full Unicode case mapping, -// matching JavaScript's String.prototype.toUpperCase() behavior. Unlike Go's -// strings.ToUpper which uses simple case mapping (1:1), this function handles -// special case mappings where a single character maps to multiple characters -// (e.g., 'ß' → "SS"). +// matching JavaScript's String.prototype.toUpperCase() behavior. func toUpperCase(s string) string { - // Fast path: check if any special casing characters exist - hasSpecial := false - for _, r := range s { - if _, ok := upperSpecialCasings[r]; ok { - hasSpecial = true - break - } - } - if !hasSpecial { - return strings.ToUpper(s) - } - - var b strings.Builder - b.Grow(len(s)) - for _, r := range s { - if mapped, ok := upperSpecialCasings[r]; ok { - b.WriteString(mapped) - } else { - b.WriteRune(unicode.ToUpper(r)) - } - } - return b.String() + return upperCaser.String(s) } // toLowerCase converts a string to lowercase using the full Unicode case mapping, // matching JavaScript's String.prototype.toLowerCase() behavior. func toLowerCase(s string) string { - // Fast path: check if any special casing characters exist - hasSpecial := false - for _, r := range s { - if _, ok := lowerSpecialCasings[r]; ok { - hasSpecial = true - break - } - } - if !hasSpecial { - return strings.ToLower(s) - } - - var b strings.Builder - b.Grow(len(s)) - for _, r := range s { - if mapped, ok := lowerSpecialCasings[r]; ok { - b.WriteString(mapped) - } else { - b.WriteRune(unicode.ToLower(r)) - } - } - return b.String() + return lowerCaser.String(s) } // toUpperCaseFirstRune converts the first rune to uppercase using full Unicode -// case mapping, returning the result and the byte size of the original first rune. +// case mapping, leaving the rest of the string unchanged. func toUpperCaseFirstRune(s string) string { - r, size := utf8.DecodeRuneInString(s) - if r == utf8.RuneError { + if s == "" { return s } - if mapped, ok := upperSpecialCasings[r]; ok { - return mapped + s[size:] - } - return strings.ToUpper(s[:size]) + s[size:] + _, size := utf8.DecodeRuneInString(s) + return upperCaser.String(s[:size]) + s[size:] } // toLowerCaseFirstRune converts the first rune to lowercase using full Unicode -// case mapping. +// case mapping, leaving the rest of the string unchanged. func toLowerCaseFirstRune(s string) string { - r, size := utf8.DecodeRuneInString(s) - if r == utf8.RuneError { + if s == "" { return s } - if mapped, ok := lowerSpecialCasings[r]; ok { - return mapped + s[size:] - } - return strings.ToLower(s[:size]) + s[size:] -} - -// upperSpecialCasings contains unconditional special case mappings for toUpperCase -// from Unicode SpecialCasing.txt. These are cases where a single code point maps -// to multiple code points when uppercased, matching JavaScript's behavior. -var upperSpecialCasings = map[rune]string{ - // Latin - 0x00DF: "SS", // ß → SS - 0x0149: "\u02BCN", // ʼn → ʼN - 0x01F0: "J\u030C", // ǰ → J̌ - - // Greek - 0x0390: "\u0399\u0308\u0301", // ΐ → Ϊ́ - 0x03B0: "\u03A5\u0308\u0301", // ΰ → Ϋ́ - - // Armenian - 0x0587: "\u0535\u0552", // և → ԵՒ - - // Latin extended - 0x1E96: "H\u0331", // ẖ → H̱ - 0x1E97: "T\u0308", // ẗ → T̈ - 0x1E98: "W\u030A", // ẘ → W̊ - 0x1E99: "Y\u030A", // ẙ → Y̊ - 0x1E9A: "A\u02BE", // ẚ → Aʾ - - // Greek extended - 0x1F50: "\u03A5\u0313", // ὐ → Υ̓ - 0x1F52: "\u03A5\u0313\u0300", // ὒ → Υ̓̀ - 0x1F54: "\u03A5\u0313\u0301", // ὔ → Υ̓́ - 0x1F56: "\u03A5\u0313\u0342", // ὖ → Υ̓͂ - - // Greek extended - with iota subscript (prosgegrammeni) - 0x1F80: "\u1F08\u0399", // ᾀ - 0x1F81: "\u1F09\u0399", // ᾁ - 0x1F82: "\u1F0A\u0399", // ᾂ - 0x1F83: "\u1F0B\u0399", // ᾃ - 0x1F84: "\u1F0C\u0399", // ᾄ - 0x1F85: "\u1F0D\u0399", // ᾅ - 0x1F86: "\u1F0E\u0399", // ᾆ - 0x1F87: "\u1F0F\u0399", // ᾇ - 0x1F88: "\u1F08\u0399", // ᾈ - 0x1F89: "\u1F09\u0399", // ᾉ - 0x1F8A: "\u1F0A\u0399", // ᾊ - 0x1F8B: "\u1F0B\u0399", // ᾋ - 0x1F8C: "\u1F0C\u0399", // ᾌ - 0x1F8D: "\u1F0D\u0399", // ᾍ - 0x1F8E: "\u1F0E\u0399", // ᾎ - 0x1F8F: "\u1F0F\u0399", // ᾏ - 0x1F90: "\u1F28\u0399", // ᾐ - 0x1F91: "\u1F29\u0399", // ᾑ - 0x1F92: "\u1F2A\u0399", // ᾒ - 0x1F93: "\u1F2B\u0399", // ᾓ - 0x1F94: "\u1F2C\u0399", // ᾔ - 0x1F95: "\u1F2D\u0399", // ᾕ - 0x1F96: "\u1F2E\u0399", // ᾖ - 0x1F97: "\u1F2F\u0399", // ᾗ - 0x1F98: "\u1F28\u0399", // ᾘ - 0x1F99: "\u1F29\u0399", // ᾙ - 0x1F9A: "\u1F2A\u0399", // ᾚ - 0x1F9B: "\u1F2B\u0399", // ᾛ - 0x1F9C: "\u1F2C\u0399", // ᾜ - 0x1F9D: "\u1F2D\u0399", // ᾝ - 0x1F9E: "\u1F2E\u0399", // ᾞ - 0x1F9F: "\u1F2F\u0399", // ᾟ - 0x1FA0: "\u1F68\u0399", // ᾠ - 0x1FA1: "\u1F69\u0399", // ᾡ - 0x1FA2: "\u1F6A\u0399", // ᾢ - 0x1FA3: "\u1F6B\u0399", // ᾣ - 0x1FA4: "\u1F6C\u0399", // ᾤ - 0x1FA5: "\u1F6D\u0399", // ᾥ - 0x1FA6: "\u1F6E\u0399", // ᾦ - 0x1FA7: "\u1F6F\u0399", // ᾧ - 0x1FA8: "\u1F68\u0399", // ᾨ - 0x1FA9: "\u1F69\u0399", // ᾩ - 0x1FAA: "\u1F6A\u0399", // ᾪ - 0x1FAB: "\u1F6B\u0399", // ᾫ - 0x1FAC: "\u1F6C\u0399", // ᾬ - 0x1FAD: "\u1F6D\u0399", // ᾭ - 0x1FAE: "\u1F6E\u0399", // ᾮ - 0x1FAF: "\u1F6F\u0399", // ᾯ - 0x1FB2: "\u1FBA\u0399", // ᾲ - 0x1FB3: "\u0391\u0399", // ᾳ - 0x1FB4: "\u0386\u0399", // ᾴ - 0x1FB6: "\u0391\u0342", // ᾶ - 0x1FB7: "\u0391\u0342\u0399", // ᾷ - 0x1FBC: "\u0391\u0399", // ᾼ - 0x1FC2: "\u1FCA\u0399", // ῂ - 0x1FC3: "\u0397\u0399", // ῃ - 0x1FC4: "\u0389\u0399", // ῄ - 0x1FC6: "\u0397\u0342", // ῆ - 0x1FC7: "\u0397\u0342\u0399", // ῇ - 0x1FCC: "\u0397\u0399", // ῌ - 0x1FD2: "\u0399\u0308\u0300", // ῒ - 0x1FD3: "\u0399\u0308\u0301", // ΐ - 0x1FD6: "\u0399\u0342", // ῖ - 0x1FD7: "\u0399\u0308\u0342", // ῗ - 0x1FE2: "\u03A5\u0308\u0300", // ῢ - 0x1FE3: "\u03A5\u0308\u0301", // ΰ - 0x1FE4: "\u03A1\u0313", // ῤ - 0x1FE6: "\u03A5\u0342", // ῦ - 0x1FE7: "\u03A5\u0308\u0342", // ῧ - 0x1FF2: "\u1FFA\u0399", // ῲ - 0x1FF3: "\u03A9\u0399", // ῳ - 0x1FF4: "\u038F\u0399", // ῴ - 0x1FF6: "\u03A9\u0342", // ῶ - 0x1FF7: "\u03A9\u0342\u0399", // ῷ - 0x1FFC: "\u03A9\u0399", // ῼ - - // Latin ligatures - 0xFB00: "FF", // ff → FF - 0xFB01: "FI", // fi → FI - 0xFB02: "FL", // fl → FL - 0xFB03: "FFI", // ffi → FFI - 0xFB04: "FFL", // ffl → FFL - 0xFB05: "ST", // ſt → ST - 0xFB06: "ST", // st → ST - - // Armenian ligatures - 0xFB13: "\u0544\u0546", // ﬓ → ՄՆ - 0xFB14: "\u0544\u0535", // ﬔ → ՄԵ - 0xFB15: "\u0544\u053B", // ﬕ → ՄԻ - 0xFB16: "\u054E\u0546", // ﬖ → ՎՆ - 0xFB17: "\u0544\u053D", // ﬗ → ՄԽ -} - -// lowerSpecialCasings contains unconditional special case mappings for toLowerCase -// from Unicode SpecialCasing.txt. There is only one unconditional special lower-case -// mapping in Unicode. -var lowerSpecialCasings = map[rune]string{ - 0x0130: "i\u0307", // İ → i + combining dot above + _, size := utf8.DecodeRuneInString(s) + return lowerCaser.String(s[:size]) + s[size:] } From 53de6b08dc7eac25ab53221abb021a65f5ce24bb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 16 May 2026 21:26:55 +0000 Subject: [PATCH 5/5] Add ASCII fast paths and lazy init for Unicode case mapping ASCII strings use strings.ToUpper/ToLower directly, avoiding x/text/cases overhead. The x/text casers are initialized lazily via sync.OnceValue so tables are only loaded when non-ASCII input is encountered. Agent-Logs-Url: https://github.com/microsoft/typescript-go/sessions/76d200c6-8d8e-4f2c-875d-f8712343859f Co-authored-by: jakebailey <5341706+jakebailey@users.noreply.github.com> --- internal/checker/stringcase.go | 76 ++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/internal/checker/stringcase.go b/internal/checker/stringcase.go index caa5a00549..c45d33b48b 100644 --- a/internal/checker/stringcase.go +++ b/internal/checker/stringcase.go @@ -1,35 +1,57 @@ package checker import ( + "strings" + "sync" "unicode/utf8" "golang.org/x/text/cases" "golang.org/x/text/language" ) -// upperCaser and lowerCaser use golang.org/x/text/cases with the "und" (undetermined) -// locale to perform full Unicode case mapping, matching JavaScript's -// String.prototype.toUpperCase() / toLowerCase() behavior. -// Unlike Go's strings.ToUpper/ToLower which use simple case mapping (1:1), -// these handle special case mappings where a single character maps to multiple -// characters (e.g., 'ß' → "SS", 'İ' → "i̇"). -// The mapping tables come from Unicode's SpecialCasing.txt and are kept up to -// date via the golang.org/x/text module. +// upperCaser and lowerCaser are lazily initialized via sync.OnceValue to avoid +// paying the cost of loading golang.org/x/text/cases tables unless non-ASCII +// input is actually encountered. var ( - upperCaser = cases.Upper(language.Und) - lowerCaser = cases.Lower(language.Und) + upperCaser = sync.OnceValue(func() *cases.Caser { + c := cases.Upper(language.Und) + return &c + }) + lowerCaser = sync.OnceValue(func() *cases.Caser { + c := cases.Lower(language.Und) + return &c + }) ) -// toUpperCase converts a string to uppercase using the full Unicode case mapping, +// isASCII reports whether s contains only ASCII bytes. +func isASCII(s string) bool { + for i := range len(s) { + if s[i] > 0x7F { + return false + } + } + return true +} + +// toUpperCase converts a string to uppercase using full Unicode case mapping, // matching JavaScript's String.prototype.toUpperCase() behavior. +// ASCII strings use a fast path; non-ASCII strings fall through to +// golang.org/x/text/cases which handles special case mappings where a single +// character maps to multiple characters (e.g., 'ß' → "SS"). func toUpperCase(s string) string { - return upperCaser.String(s) + if isASCII(s) { + return strings.ToUpper(s) + } + return upperCaser().String(s) } -// toLowerCase converts a string to lowercase using the full Unicode case mapping, +// toLowerCase converts a string to lowercase using full Unicode case mapping, // matching JavaScript's String.prototype.toLowerCase() behavior. func toLowerCase(s string) string { - return lowerCaser.String(s) + if isASCII(s) { + return strings.ToLower(s) + } + return lowerCaser().String(s) } // toUpperCaseFirstRune converts the first rune to uppercase using full Unicode @@ -38,8 +60,17 @@ func toUpperCaseFirstRune(s string) string { if s == "" { return s } - _, size := utf8.DecodeRuneInString(s) - return upperCaser.String(s[:size]) + s[size:] + r, size := utf8.DecodeRuneInString(s) + if r < 0x80 { + // ASCII fast path: single byte uppercase + if r >= 'a' && r <= 'z' { + b := []byte(s) + b[0] -= 'a' - 'A' + return string(b) + } + return s + } + return upperCaser().String(s[:size]) + s[size:] } // toLowerCaseFirstRune converts the first rune to lowercase using full Unicode @@ -48,6 +79,15 @@ func toLowerCaseFirstRune(s string) string { if s == "" { return s } - _, size := utf8.DecodeRuneInString(s) - return lowerCaser.String(s[:size]) + s[size:] + r, size := utf8.DecodeRuneInString(s) + if r < 0x80 { + // ASCII fast path: single byte lowercase + if r >= 'A' && r <= 'Z' { + b := []byte(s) + b[0] += 'a' - 'A' + return string(b) + } + return s + } + return lowerCaser().String(s[:size]) + s[size:] }