@@ -6,18 +6,75 @@ import { monoStringToString, utf16ToStringLoop, stringToUTF16 } from "../strings
66import { MonoObject , MonoObjectRef , MonoString , MonoStringRef } from "../types/internal" ;
77import { Int32Ptr } from "../types/emscripten" ;
88import { wrap_error_root , wrap_no_error_root } from "../invoke-js" ;
9+ import { localHeapViewU16 , setU16_local } from "../memory" ;
10+
11+ const SURROGATE_HIGHER_START = "\uD800" ;
12+ const SURROGATE_HIGHER_END = "\uDBFF" ;
13+ const SURROGATE_LOWER_START = "\uDC00" ;
14+ const SURROGATE_LOWER_END = "\uDFFF" ;
915
1016export function mono_wasm_change_case_invariant ( src : number , srcLength : number , dst : number , dstLength : number , toUpper : number , is_exception : Int32Ptr , ex_address : MonoObjectRef ) : void {
1117 const exceptionRoot = mono_wasm_new_external_root < MonoObject > ( ex_address ) ;
1218 try {
1319 const input = utf16ToStringLoop ( src , src + 2 * srcLength ) ;
14- let result = toUpper ? input . toUpperCase ( ) : input . toLowerCase ( ) ;
20+ const result = toUpper ? input . toUpperCase ( ) : input . toLowerCase ( ) ;
1521 // Unicode defines some codepoints which expand into multiple codepoints,
1622 // originally we do not support this expansion
17- if ( result . length > dstLength )
18- result = input ;
19- stringToUTF16 ( dst , dst + 2 * dstLength , result ) ;
20- wrap_no_error_root ( is_exception , exceptionRoot ) ;
23+ if ( result . length <= dstLength )
24+ {
25+ stringToUTF16 ( dst , dst + 2 * dstLength , result ) ;
26+ wrap_no_error_root ( is_exception , exceptionRoot ) ;
27+ return ;
28+ }
29+
30+ // workaround to maintain the ICU-like behavior
31+ const heapI16 = localHeapViewU16 ( ) ;
32+ let jump = 1 ;
33+ if ( toUpper )
34+ {
35+ for ( let i = 0 ; i < input . length ; i += jump )
36+ {
37+ // surrogate parts have to enter ToUpper/ToLower together to give correct output
38+ if ( isSurrogate ( input , i ) )
39+ {
40+ jump = 2 ;
41+ const surrogate = input . substring ( i , i + 2 ) ;
42+ const upperSurrogate = surrogate . toUpperCase ( ) ;
43+ const appendedSurrogate = upperSurrogate . length > 2 ? surrogate : upperSurrogate ;
44+ appendSurrogateToMemory ( heapI16 , dst , appendedSurrogate , i ) ;
45+
46+ }
47+ else
48+ {
49+ jump = 1 ;
50+ const upperChar = input [ i ] . toUpperCase ( ) ;
51+ const appendedChar = upperChar . length > 1 ? input [ i ] : upperChar ;
52+ setU16_local ( heapI16 , dst + i * 2 , appendedChar . charCodeAt ( 0 ) ) ;
53+ }
54+ }
55+ }
56+ else
57+ {
58+ for ( let i = 0 ; i < input . length ; i += jump )
59+ {
60+ if ( isSurrogate ( input , i ) )
61+ {
62+ jump = 2 ;
63+ const surrogate = input . substring ( i , i + 2 ) ;
64+ const upperSurrogate = surrogate . toLowerCase ( ) ;
65+ const appendedSurrogate = upperSurrogate . length > 2 ? surrogate : upperSurrogate ;
66+ appendSurrogateToMemory ( heapI16 , dst , appendedSurrogate , i ) ;
67+
68+ }
69+ else
70+ {
71+ jump = 1 ;
72+ const upperChar = input [ i ] . toLowerCase ( ) ;
73+ const appendedChar = upperChar . length > 1 ? input [ i ] : upperChar ;
74+ setU16_local ( heapI16 , dst + i * 2 , appendedChar . charCodeAt ( 0 ) ) ;
75+ }
76+ }
77+ }
2178 }
2279 catch ( ex : any ) {
2380 wrap_error_root ( is_exception , ex , exceptionRoot ) ;
@@ -35,11 +92,62 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe
3592 if ( ! cultureName )
3693 throw new Error ( "Cannot change case, the culture name is null." ) ;
3794 const input = utf16ToStringLoop ( src , src + 2 * srcLength ) ;
38- let result = toUpper ? input . toLocaleUpperCase ( cultureName ) : input . toLocaleLowerCase ( cultureName ) ;
39- if ( result . length > dstLength )
40- result = input ;
95+ const result = toUpper ? input . toLocaleUpperCase ( cultureName ) : input . toLocaleLowerCase ( cultureName ) ;
96+
97+ if ( result . length <= input . length )
98+ {
99+ stringToUTF16 ( dst , dst + 2 * dstLength , result ) ;
100+ wrap_no_error_root ( is_exception , exceptionRoot ) ;
101+ return ;
102+ }
103+ // workaround to maintain the ICU-like behavior
104+ const heapI16 = localHeapViewU16 ( ) ;
105+ let jump = 1 ;
106+ if ( toUpper )
107+ {
108+ for ( let i = 0 ; i < input . length ; i += jump )
109+ {
110+ // surrogate parts have to enter ToUpper/ToLower together to give correct output
111+ if ( isSurrogate ( input , i ) )
112+ {
113+ jump = 2 ;
114+ const surrogate = input . substring ( i , i + 2 ) ;
115+ const upperSurrogate = surrogate . toLocaleUpperCase ( cultureName ) ;
116+ const appendedSurrogate = upperSurrogate . length > 2 ? surrogate : upperSurrogate ;
117+ appendSurrogateToMemory ( heapI16 , dst , appendedSurrogate , i ) ;
41118
42- stringToUTF16 ( dst , dst + 2 * dstLength , result ) ;
119+ }
120+ else
121+ {
122+ jump = 1 ;
123+ const upperChar = input [ i ] . toLocaleUpperCase ( cultureName ) ;
124+ const appendedChar = upperChar . length > 1 ? input [ i ] : upperChar ;
125+ setU16_local ( heapI16 , dst + i * 2 , appendedChar . charCodeAt ( 0 ) ) ;
126+ }
127+ }
128+ }
129+ else
130+ {
131+ for ( let i = 0 ; i < input . length ; i += jump )
132+ {
133+ // surrogate parts have to enter ToUpper/ToLower together to give correct output
134+ if ( isSurrogate ( input , i ) )
135+ {
136+ jump = 2 ;
137+ const surrogate = input . substring ( i , i + 2 ) ;
138+ const upperSurrogate = surrogate . toLocaleLowerCase ( cultureName ) ;
139+ const appendedSurrogate = upperSurrogate . length > 2 ? surrogate : upperSurrogate ;
140+ appendSurrogateToMemory ( heapI16 , dst , appendedSurrogate , i ) ;
141+ }
142+ else
143+ {
144+ jump = 1 ;
145+ const lowerChar = input [ i ] . toLocaleLowerCase ( cultureName ) ;
146+ const appendedChar = lowerChar . length > 1 ? input [ i ] : lowerChar ;
147+ setU16_local ( heapI16 , dst + i * 2 , appendedChar . charCodeAt ( 0 ) ) ;
148+ }
149+ }
150+ }
43151 wrap_no_error_root ( is_exception , exceptionRoot ) ;
44152 }
45153 catch ( ex : any ) {
@@ -49,4 +157,19 @@ export function mono_wasm_change_case(culture: MonoStringRef, src: number, srcLe
49157 cultureRoot . release ( ) ;
50158 exceptionRoot . release ( ) ;
51159 }
52- }
160+ }
161+
162+ function isSurrogate ( str : string , startIdx : number ) : boolean
163+ {
164+ return SURROGATE_HIGHER_START <= str [ startIdx ] &&
165+ str [ startIdx ] <= SURROGATE_HIGHER_END &&
166+ startIdx + 1 < str . length &&
167+ SURROGATE_LOWER_START <= str [ startIdx + 1 ] &&
168+ str [ startIdx + 1 ] <= SURROGATE_LOWER_END ;
169+ }
170+
171+ function appendSurrogateToMemory ( heapI16 : Uint16Array , dst : number , surrogate : string , idx : number )
172+ {
173+ setU16_local ( heapI16 , dst + idx * 2 , surrogate . charCodeAt ( 0 ) ) ;
174+ setU16_local ( heapI16 , dst + ( idx + 1 ) * 2 , surrogate . charCodeAt ( 1 ) ) ;
175+ }
0 commit comments