diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 020fe61193..995d9f1523 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -11,7 +11,6 @@ import ( "strings" "sync" "sync/atomic" - "unicode/utf8" "github.com/microsoft/typescript-go/internal/ast" "github.com/microsoft/typescript-go/internal/binder" @@ -28873,15 +28872,13 @@ func (c *Checker) getStringMappingType(symbol *ast.Symbol, t *Type) *Type { func applyStringMapping(symbol *ast.Symbol, str string) string { switch intrinsicTypeKinds[symbol.Name] { case IntrinsicTypeKindUppercase: - return strings.ToUpper(str) + return toUpperCase(str) case IntrinsicTypeKindLowercase: - return strings.ToLower(str) + return toLowerCase(str) case IntrinsicTypeKindCapitalize: - _, size := utf8.DecodeRuneInString(str) - return strings.ToUpper(str[:size]) + str[size:] + return toUpperCaseFirstRune(str) case IntrinsicTypeKindUncapitalize: - _, size := utf8.DecodeRuneInString(str) - return strings.ToLower(str[:size]) + str[size:] + return toLowerCaseFirstRune(str) } return str } diff --git a/internal/checker/stringcase.go b/internal/checker/stringcase.go new file mode 100644 index 0000000000..c45d33b48b --- /dev/null +++ b/internal/checker/stringcase.go @@ -0,0 +1,93 @@ +package checker + +import ( + "strings" + "sync" + "unicode/utf8" + + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +// upperCaser and lowerCaser are lazily initialized via sync.OnceValue to avoid +// paying the cost of loading golang.org/x/text/cases tables unless non-ASCII +// input is actually encountered. +var ( + upperCaser = sync.OnceValue(func() *cases.Caser { + c := cases.Upper(language.Und) + return &c + }) + lowerCaser = sync.OnceValue(func() *cases.Caser { + c := cases.Lower(language.Und) + return &c + }) +) + +// isASCII reports whether s contains only ASCII bytes. +func isASCII(s string) bool { + for i := range len(s) { + if s[i] > 0x7F { + return false + } + } + return true +} + +// toUpperCase converts a string to uppercase using full Unicode case mapping, +// matching JavaScript's String.prototype.toUpperCase() behavior. +// ASCII strings use a fast path; non-ASCII strings fall through to +// golang.org/x/text/cases which handles special case mappings where a single +// character maps to multiple characters (e.g., 'ß' → "SS"). +func toUpperCase(s string) string { + if isASCII(s) { + return strings.ToUpper(s) + } + return upperCaser().String(s) +} + +// toLowerCase converts a string to lowercase using full Unicode case mapping, +// matching JavaScript's String.prototype.toLowerCase() behavior. +func toLowerCase(s string) string { + if isASCII(s) { + return strings.ToLower(s) + } + return lowerCaser().String(s) +} + +// toUpperCaseFirstRune converts the first rune to uppercase using full Unicode +// case mapping, leaving the rest of the string unchanged. +func toUpperCaseFirstRune(s string) string { + if s == "" { + return s + } + r, size := utf8.DecodeRuneInString(s) + if r < 0x80 { + // ASCII fast path: single byte uppercase + if r >= 'a' && r <= 'z' { + b := []byte(s) + b[0] -= 'a' - 'A' + return string(b) + } + return s + } + return upperCaser().String(s[:size]) + s[size:] +} + +// toLowerCaseFirstRune converts the first rune to lowercase using full Unicode +// case mapping, leaving the rest of the string unchanged. +func toLowerCaseFirstRune(s string) string { + if s == "" { + return s + } + r, size := utf8.DecodeRuneInString(s) + if r < 0x80 { + // ASCII fast path: single byte lowercase + if r >= 'A' && r <= 'Z' { + b := []byte(s) + b[0] += 'a' - 'A' + return string(b) + } + return s + } + return lowerCaser().String(s[:size]) + s[size:] +} diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js new file mode 100644 index 0000000000..e820c17a57 --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.js @@ -0,0 +1,53 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +//// [intrinsicTypesUnicodeSpecialCasing.ts] +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +const t1: T1 = "SS"; + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +const t2: T2 = "i\u0307"; + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +const t3: T3 = "FI"; + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +const t4: T4 = "FL"; + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +const t5: T5 = "FF"; + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +const t6: T6 = "SStest"; + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +const t7: T7 = "i\u0307SPANYOL"; + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +const t8: T8 = "STRASSE"; + + +//// [intrinsicTypesUnicodeSpecialCasing.js] +"use strict"; +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. +const t1 = "SS"; +const t2 = "i\u0307"; +const t3 = "FI"; +const t4 = "FL"; +const t5 = "FF"; +const t6 = "SStest"; +const t7 = "i\u0307SPANYOL"; +const t8 = "STRASSE"; diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols new file mode 100644 index 0000000000..9ce7ca0f50 --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.symbols @@ -0,0 +1,79 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +=== intrinsicTypesUnicodeSpecialCasing.ts === +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +>T1 : Symbol(T1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 0, 0)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t1: T1 = "SS"; +>t1 : Symbol(t1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 5)) +>T1 : Symbol(T1, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 0, 0)) + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +>T2 : Symbol(T2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 20)) +>Lowercase : Symbol(Lowercase, Decl(lib.es5.d.ts, --, --)) + +const t2: T2 = "i\u0307"; +>t2 : Symbol(t2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 5)) +>T2 : Symbol(T2, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 6, 20)) + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +>T3 : Symbol(T3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 25)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t3: T3 = "FI"; +>t3 : Symbol(t3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 5)) +>T3 : Symbol(T3, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 10, 25)) + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +>T4 : Symbol(T4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 20)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t4: T4 = "FL"; +>t4 : Symbol(t4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 5)) +>T4 : Symbol(T4, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 14, 20)) + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +>T5 : Symbol(T5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 20)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t5: T5 = "FF"; +>t5 : Symbol(t5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 5)) +>T5 : Symbol(T5, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 18, 20)) + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +>T6 : Symbol(T6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 20)) +>Capitalize : Symbol(Capitalize, Decl(lib.es5.d.ts, --, --)) + +const t6: T6 = "SStest"; +>t6 : Symbol(t6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 5)) +>T6 : Symbol(T6, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 22, 20)) + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +>T7 : Symbol(T7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 24)) +>Uncapitalize : Symbol(Uncapitalize, Decl(lib.es5.d.ts, --, --)) + +const t7: T7 = "i\u0307SPANYOL"; +>t7 : Symbol(t7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 5)) +>T7 : Symbol(T7, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 26, 24)) + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +>T8 : Symbol(T8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 32)) +>Uppercase : Symbol(Uppercase, Decl(lib.es5.d.ts, --, --)) + +const t8: T8 = "STRASSE"; +>t8 : Symbol(t8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 34, 5)) +>T8 : Symbol(T8, Decl(intrinsicTypesUnicodeSpecialCasing.ts, 30, 32)) + diff --git a/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types new file mode 100644 index 0000000000..e8b7342e2b --- /dev/null +++ b/testdata/baselines/reference/compiler/intrinsicTypesUnicodeSpecialCasing.types @@ -0,0 +1,71 @@ +//// [tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts] //// + +=== intrinsicTypesUnicodeSpecialCasing.ts === +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +>T1 : "SS" + +const t1: T1 = "SS"; +>t1 : "SS" +>"SS" : "SS" + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +>T2 : "i̇" + +const t2: T2 = "i\u0307"; +>t2 : "i̇" +>"i\u0307" : "i̇" + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +>T3 : "FI" + +const t3: T3 = "FI"; +>t3 : "FI" +>"FI" : "FI" + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +>T4 : "FL" + +const t4: T4 = "FL"; +>t4 : "FL" +>"FL" : "FL" + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +>T5 : "FF" + +const t5: T5 = "FF"; +>t5 : "FF" +>"FF" : "FF" + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +>T6 : "SStest" + +const t6: T6 = "SStest"; +>t6 : "SStest" +>"SStest" : "SStest" + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +>T7 : "i̇SPANYOL" + +const t7: T7 = "i\u0307SPANYOL"; +>t7 : "i̇SPANYOL" +>"i\u0307SPANYOL" : "i̇SPANYOL" + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +>T8 : "STRASSE" + +const t8: T8 = "STRASSE"; +>t8 : "STRASSE" +>"STRASSE" : "STRASSE" + diff --git a/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts b/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts new file mode 100644 index 0000000000..d1a50f563f --- /dev/null +++ b/testdata/tests/cases/compiler/intrinsicTypesUnicodeSpecialCasing.ts @@ -0,0 +1,37 @@ +// @strict: true + +// Test Unicode special case mappings for intrinsic string types +// These characters have 1:many case mappings that Go's strings.ToUpper/ToLower +// don't handle, but JavaScript's toUpperCase()/toLowerCase() do. + +// ß (U+00DF) uppercases to "SS" in JavaScript +type T1 = Uppercase<"ß">; +const t1: T1 = "SS"; + +// İ (U+0130) lowercases to "i̇" (i + combining dot above U+0307) in JavaScript +type T2 = Lowercase<"İ">; +const t2: T2 = "i\u0307"; + +// Ligatures: fi (U+FB01) uppercases to "FI" +type T3 = Uppercase<"fi">; +const t3: T3 = "FI"; + +// fl (U+FB02) uppercases to "FL" +type T4 = Uppercase<"fl">; +const t4: T4 = "FL"; + +// ff (U+FB00) uppercases to "FF" +type T5 = Uppercase<"ff">; +const t5: T5 = "FF"; + +// Capitalize should only affect first character +type T6 = Capitalize<"ßtest">; +const t6: T6 = "SStest"; + +// Uncapitalize with İ +type T7 = Uncapitalize<"İSPANYOL">; +const t7: T7 = "i\u0307SPANYOL"; + +// Mixed string with special characters +type T8 = Uppercase<"straße">; +const t8: T8 = "STRASSE";