Skip to content

Commit 67b1ede

Browse files
authored
[OSX] HybridGlobalization Implement casing functions (#87919)
Implement GlobalizationNative_ChangeCaseNative , GlobalizationNative_ChangeCaseInvariantNative for OSX
1 parent c88b377 commit 67b1ede

File tree

15 files changed

+242
-28
lines changed

15 files changed

+242
-28
lines changed

docs/design/features/globalization-hybrid-mode.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,4 +408,34 @@ Affected public APIs:
408408
- CompareInfo.GetSortKeyLength
409409
- CompareInfo.GetHashCode
410410

411-
Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
411+
Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`.
412+
413+
414+
## Case change
415+
416+
Affected public APIs:
417+
- TextInfo.ToLower,
418+
- TextInfo.ToUpper
419+
420+
Below function are used from apple native functions:
421+
- [uppercaseString](https://developer.apple.com/documentation/foundation/nsstring/1409855-uppercasestring)
422+
- [lowercaseString](https://developer.apple.com/documentation/foundation/nsstring/1408467-lowercasestring)
423+
- [uppercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1413316-uppercasestringwithlocale?language=objc)
424+
- [lowercaseStringWithLocale](https://developer.apple.com/documentation/foundation/nsstring/1417298-lowercasestringwithlocale?language=objc)
425+
426+
Behavioural changes compared to ICU
427+
428+
- Final sigma behavior correction:
429+
430+
ICU-based case change does not respect final-sigma rule, but hybrid does, so "ΒΌΛΟΣ" -> "βόλος", not "βόλοσ".
431+
432+
- Below cases will throw exception because of insufficiently sized destination buffer
433+
434+
- Capitalizing the German letter ß (sharp S) gives SS when using Apple native functions.
435+
436+
- Capitalizing ligatures gives different result on Apple platforms, eg. "\uFB00" (ff) uppercase (FF)
437+
438+
- Capitalizing "\u0149" (ʼn) on Apple platforms returns combination of "\u02BC" (ʼ) and N -> (ʼN)
439+
440+
441+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Runtime.InteropServices;
5+
6+
internal static partial class Interop
7+
{
8+
internal static partial class Globalization
9+
{
10+
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseNative", StringMarshalling = StringMarshalling.Utf16)]
11+
internal static unsafe partial int ChangeCaseNative(string localeName, int lNameLen, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
12+
13+
[LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_ChangeCaseInvariantNative", StringMarshalling = StringMarshalling.Utf8)]
14+
internal static unsafe partial int ChangeCaseInvariantNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, [MarshalAs(UnmanagedType.Bool)] bool bToUpper);
15+
}
16+
}

src/libraries/Common/src/Interop/Interop.ResultCode.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ internal enum ResultCode
1111
Success = 0,
1212
UnknownError = 1,
1313
InsufficientBuffer = 2,
14-
OutOfMemory = 3
14+
OutOfMemory = 3,
15+
InvalidCodePoint = 4,
1516
}
1617
}
1718
}

src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,6 @@
3838
<Compile Include="..\CompareInfo\CompareInfoTests.LastIndexOf.cs" />
3939
<Compile Include="..\CompareInfo\CompareInfoTests.IsPrefix.cs" />
4040
<Compile Include="..\CompareInfo\CompareInfoTests.IsSuffix.cs" />
41+
<Compile Include="..\System\Globalization\TextInfoTests.cs" />
4142
</ItemGroup>
4243
</Project>

src/libraries/System.Globalization/tests/System/Globalization/TextInfoTests.cs

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,9 @@ public static IEnumerable<object[]> ToLower_TestData()
274274
// we also don't preform.
275275
// Greek Capital Letter Sigma (does not case to U+03C2 with "final sigma" rule).
276276
yield return new object[] { cultureName, "\u03A3", "\u03C3" };
277-
if (PlatformDetection.IsHybridGlobalizationOnBrowser)
277+
if (PlatformDetection.IsHybridGlobalizationOnBrowser || PlatformDetection.IsHybridGlobalizationOnOSX)
278278
{
279-
// JS is using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
279+
// JS and Apple platforms are using "final sigma" rule correctly - it's costly to unify it with ICU's behavior
280280
yield return new object[] { cultureName, "O\u03A3", "o\u03C2" };
281281
}
282282
else
@@ -396,23 +396,29 @@ public static IEnumerable<object[]> ToUpper_TestData()
396396
// RAINBOW (outside the BMP and does not case)
397397
yield return new object[] { cultureName, "\U0001F308", "\U0001F308" };
398398

399-
// Unicode defines some codepoints which expand into multiple codepoints
400-
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
401-
// these sorts of expansions, since it would cause string lengths to change when cased,
402-
// which is non-intuitive. In addition, there are some context sensitive mappings which
403-
// we also don't preform.
404-
// es-zed does not case to SS when uppercased.
405-
yield return new object[] { cultureName, "\u00DF", "\u00DF" };
406-
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
407-
if (!PlatformDetection.IsNlsGlobalization)
408-
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
409-
410-
// Ligatures do not expand when cased.
411-
yield return new object[] { cultureName, "\uFB00", "\uFB00" };
412-
413-
// Precomposed character with no uppercase variant, we don't want to "decompose" this
414-
// as part of casing.
415-
yield return new object[] { cultureName, "\u0149", "\u0149" };
399+
if (!PlatformDetection.IsHybridGlobalizationOnOSX)
400+
{
401+
// Unicode defines some codepoints which expand into multiple codepoints
402+
// when cased (see SpecialCasing.txt from UNIDATA for some examples). We have never done
403+
// these sorts of expansions, since it would cause string lengths to change when cased,
404+
// which is non-intuitive. In addition, there are some context sensitive mappings which
405+
// we also don't preform.
406+
// es-zed does not case to SS when uppercased.
407+
// on OSX, capitalizing the German letter ß (sharp S) gives SS
408+
yield return new object[] { cultureName, "\u00DF", "\u00DF" };
409+
yield return new object[] { cultureName, "stra\u00DFe", "STRA\u00DFE" };
410+
if (!PlatformDetection.IsNlsGlobalization)
411+
yield return new object[] { cultureName, "st\uD801\uDC37ra\u00DFe", "ST\uD801\uDC0FRA\u00DFE" };
412+
413+
// Ligatures do not expand when cased.
414+
// on OSX, this is uppercase to "FF"
415+
yield return new object[] { cultureName, "\uFB00", "\uFB00" };
416+
417+
// Precomposed character with no uppercase variant, we don't want to "decompose" this
418+
// as part of casing.
419+
// on OSX, this is uppercased to "ʼN"
420+
yield return new object[] { cultureName, "\u0149", "\u0149" };
421+
}
416422
}
417423

418424
// Turkish i

src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@
389389
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.cs" />
390390
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Icu.cs" />
391391
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.Nls.cs" />
392+
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.OSX.cs" Condition="'$(IsOSXLike)' == 'true'" />
392393
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TextInfo.WebAssembly.cs" Condition="'$(TargetsBrowser)' == 'true'" />
393394
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\ThaiBuddhistCalendar.cs" />
394395
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\TimeSpanFormat.cs" />
@@ -1273,6 +1274,9 @@
12731274
<Compile Include="$(CommonPath)Interop\Interop.Casing.cs">
12741275
<Link>Common\Interop\Interop.Casing.cs</Link>
12751276
</Compile>
1277+
<Compile Include="$(CommonPath)Interop\Interop.Casing.OSX.cs" Condition="'$(IsOSXLike)' == 'true'">
1278+
<Link>Common\Interop\Interop.Casing.OSX.cs</Link>
1279+
</Compile>
12761280
<Compile Include="$(CommonPath)Interop\Interop.Collation.cs">
12771281
<Link>Common\Interop\Interop.Collation.cs</Link>
12781282
</Compile>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics;
5+
6+
namespace System.Globalization
7+
{
8+
public partial class TextInfo
9+
{
10+
internal unsafe void ChangeCaseNative(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool toUpper)
11+
{
12+
Debug.Assert(!GlobalizationMode.Invariant);
13+
Debug.Assert(!GlobalizationMode.UseNls);
14+
Debug.Assert(GlobalizationMode.Hybrid);
15+
int result;
16+
17+
if (HasEmptyCultureName)
18+
result = Interop.Globalization.ChangeCaseInvariantNative(src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
19+
else
20+
result = Interop.Globalization.ChangeCaseNative(_cultureName, _cultureName.Length, src, srcLen, dstBuffer, dstBufferCapacity, toUpper);
21+
22+
if (result != (int)Interop.Globalization.ResultCode.Success)
23+
throw new Exception(result == (int)Interop.Globalization.ResultCode.InvalidCodePoint ? "Invalid code point while case changing" :
24+
result == (int)Interop.Globalization.ResultCode.InsufficientBuffer ? "Insufficiently sized destination buffer" : "Exception occurred while case changing");
25+
}
26+
}
27+
}

src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,12 @@ private unsafe void ChangeCaseCore(char* src, int srcLen, char* dstBuffer, int d
692692
JsChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
693693
return;
694694
}
695+
#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS
696+
if (GlobalizationMode.Hybrid)
697+
{
698+
ChangeCaseNative(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
699+
return;
700+
}
695701
#endif
696702
IcuChangeCase(src, srcLen, dstBuffer, dstBufferCapacity, bToUpper);
697703
}

src/mono/mono/mini/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ if(HAVE_SYS_ICU)
7171
set(icu_shim_sources_base
7272
${icu_shim_sources_base}
7373
pal_locale.m
74-
pal_collation.m)
74+
pal_collation.m
75+
pal_casing.m)
7576
endif()
7677

7778
addprefix(icu_shim_sources "${ICU_SHIM_PATH}" "${icu_shim_sources_base}")

src/native/libs/System.Globalization.Native/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ else()
9393
endif()
9494

9595
if (CLR_CMAKE_TARGET_APPLE)
96-
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m)
96+
set(NATIVEGLOBALIZATION_SOURCES ${NATIVEGLOBALIZATION_SOURCES} pal_locale.m pal_collation.m pal_casing.m)
9797
endif()
9898

9999
# time zone names are filtered out of icu data for the browser and associated functionality is disabled

0 commit comments

Comments
 (0)