Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit a59ead8

Browse files
filipnavarajkotas
authored andcommitted
[System.Globalization.Native] Fix small issues in CloneCollatorWithOptions and GetCollatorFromSortHandle (#24100)
* Fix allocation size calculation when resizing array Allocate initial array using malloc to avoid needlessly zeroing it Reinstate optimization for empty array returned from GetCustomRules lost in #22378 * Avoid resizing arrays in GetCustomRules by preallocating the maximum size we can consume (5648 bytes) * Avoid creating a binary search tree for something that could be easily stored as 32-entry lookup table * Remove obsolete comment
1 parent dea615b commit a59ead8

File tree

1 file changed

+41
-83
lines changed

1 file changed

+41
-83
lines changed

src/corefx/System.Globalization.Native/pal_collation.c

Lines changed: 41 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ const int32_t CompareOptionsIgnoreNonSpace = 0x2;
2222
const int32_t CompareOptionsIgnoreSymbols = 0x4;
2323
const int32_t CompareOptionsIgnoreKanaType = 0x8;
2424
const int32_t CompareOptionsIgnoreWidth = 0x10;
25+
const int32_t CompareOptionsMask = 0x1f;
2526
// const int32_t CompareOptionsStringSort = 0x20000000;
2627
// ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric.
2728
// When StringSort is not specified (.NET's default), the sort order will be different between
@@ -41,21 +42,11 @@ typedef struct { int32_t key; UCollator* UCollator; } TCollatorMap;
4142
*/
4243
struct SortHandle
4344
{
44-
UCollator* regular;
4545
pthread_mutex_t collatorsLockObject;
46-
void* collatorsPerOptionRoot;
46+
UCollator* collatorsPerOption[CompareOptionsMask + 1];
4747
};
4848

49-
typedef struct { UChar* items; size_t capacity; size_t size; } UCharList;
50-
51-
static int TreeComparer(const void* left, const void* right)
52-
{
53-
const TCollatorMap* leftMap = left;
54-
const TCollatorMap* rightMap = right;
55-
if (leftMap->key < rightMap->key) return -1;
56-
if (leftMap->key > rightMap->key) return 1;
57-
return 0;
58-
}
49+
typedef struct { UChar* items; size_t size; } UCharList;
5950

6051
// Hiragana character range
6152
const UChar hiraganaStart = 0x3041;
@@ -137,24 +128,6 @@ static int IsHalfFullHigherSymbol(UChar character)
137128
|| (0xff61 <= character && character <= 0xff65);
138129
}
139130

140-
static int AddItem(UCharList* list, const UChar item)
141-
{
142-
size_t size = list->size++;
143-
if (size >= list->capacity)
144-
{
145-
list->capacity *= 2;
146-
UChar* ptr = (UChar*)realloc(list->items, list->capacity * sizeof(UChar*));
147-
if (ptr == NULL)
148-
{
149-
return FALSE;
150-
}
151-
list->items = ptr;
152-
}
153-
154-
list->items[size] = item;
155-
return TRUE;
156-
}
157-
158131
/*
159132
Gets a string of custom collation rules, if necessary.
160133
@@ -184,19 +157,19 @@ static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, i
184157
}
185158

186159
// If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long
187-
// and the Width custom rule will be at least 215 halfwidth characters * 4 = 860 chars long.
188-
// Use 512 as the starting size, so the customRules won't have to grow if we are just
189-
// doing the KanaType custom rule.
190-
customRules->capacity = 512;
191-
customRules->items = calloc(customRules->capacity, sizeof(UChar));
160+
// and the Width custom rule will be at most 212 halfwidth characters * 5 = 1060 chars long.
161+
int capacity =
162+
((needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule) ? 4 * (hiraganaEnd - hiraganaStart + 1) : 0) +
163+
((needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule) ? 5 * g_HalfFullCharsLength : 0);
164+
165+
UChar* items;
166+
customRules->items = items = malloc(capacity * sizeof(UChar));
192167
if (customRules->items == NULL)
193168
{
194169
free(customRules);
195170
return NULL;
196171
}
197172

198-
customRules->size = 0;
199-
200173
if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule)
201174
{
202175
UChar compareChar = needsIgnoreKanaTypeCustomRule ? '=' : '<';
@@ -206,15 +179,11 @@ static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, i
206179
// Hiragana is the range 3041 to 3096 & 309D & 309E
207180
if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
208181
{
209-
if(!(AddItem(customRules, '&') &&
210-
AddItem(customRules, hiraganaChar) &&
211-
AddItem(customRules, compareChar) &&
212-
AddItem(customRules, hiraganaChar + hiraganaToKatakanaOffset)))
213-
{
214-
free(customRules->items);
215-
free(customRules);
216-
return NULL;
217-
}
182+
assert(items - customRules->items <= capacity - 4);
183+
*(items++) = '&';
184+
*(items++) = hiraganaChar;
185+
*(items++) = compareChar;
186+
*(items++) = hiraganaChar + hiraganaToKatakanaOffset;
218187
}
219188
}
220189
}
@@ -237,20 +206,21 @@ static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, i
237206
// this character is a symbol, and if so skip it
238207
if (!(isIgnoreSymbols && needsNotIgnoreWidthCustomRule && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
239208
{
240-
if(!(AddItem(customRules, '&') &&
241-
(!needsEscape || AddItem(customRules, '\\')) &&
242-
AddItem(customRules, lowerChar) &&
243-
AddItem(customRules, compareChar) &&
244-
AddItem(customRules, higherChar)))
209+
assert(items - customRules->items <= capacity - 5);
210+
*(items++) = '&';
211+
if (needsEscape)
245212
{
246-
free(customRules->items);
247-
free(customRules);
248-
return NULL;
213+
*(items++) = '\\';
249214
}
215+
*(items++) = lowerChar;
216+
*(items++) = compareChar;
217+
*(items++) = higherChar;
250218
}
251219
}
252220
}
253221

222+
customRules->size = items - customRules->items;
223+
254224
return customRules;
255225
}
256226

@@ -280,7 +250,7 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options,
280250

281251
UCollator* pClonedCollator;
282252
UCharList* customRules = GetCustomRules(options, strength, isIgnoreSymbols);
283-
if (customRules == NULL)
253+
if (customRules == NULL || customRules->size == 0)
284254
{
285255
pClonedCollator = ucol_safeClone(pCollator, NULL, NULL, pErr);
286256
}
@@ -305,8 +275,8 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options,
305275

306276
pClonedCollator = ucol_openRules(completeRules, completeRulesLength, UCOL_DEFAULT, strength, NULL, pErr);
307277
free(completeRules);
308-
free(customRules);
309278
}
279+
free(customRules);
310280

311281
if (isIgnoreSymbols)
312282
{
@@ -371,9 +341,9 @@ void CreateSortHandle(SortHandle** ppSortHandle)
371341
return;
372342
}
373343

374-
(*ppSortHandle)->collatorsPerOptionRoot = NULL;
375-
int result = pthread_mutex_init(&(*ppSortHandle)->collatorsLockObject, NULL);
344+
memset(*ppSortHandle, 0, sizeof(SortHandle));
376345

346+
int result = pthread_mutex_init(&(*ppSortHandle)->collatorsLockObject, NULL);
377347
if (result != 0)
378348
{
379349
assert(FALSE && "Unexpected pthread_mutex_init return value.");
@@ -392,7 +362,7 @@ ResultCode GlobalizationNative_GetSortHandle(const char* lpLocaleName, SortHandl
392362

393363
UErrorCode err = U_ZERO_ERROR;
394364

395-
(*ppSortHandle)->regular = ucol_open(lpLocaleName, &err);
365+
(*ppSortHandle)->collatorsPerOption[0] = ucol_open(lpLocaleName, &err);
396366

397367
if (U_FAILURE(err))
398368
{
@@ -406,15 +376,13 @@ ResultCode GlobalizationNative_GetSortHandle(const char* lpLocaleName, SortHandl
406376

407377
void GlobalizationNative_CloseSortHandle(SortHandle* pSortHandle)
408378
{
409-
ucol_close(pSortHandle->regular);
410-
pSortHandle->regular = NULL;
411-
412-
while (pSortHandle->collatorsPerOptionRoot != NULL)
379+
for (int i = 0; i <= CompareOptionsMask; i++)
413380
{
414-
TCollatorMap* data = *(TCollatorMap **)pSortHandle->collatorsPerOptionRoot;
415-
tdelete(data, &pSortHandle->collatorsPerOptionRoot, TreeComparer);
416-
ucol_close(data->UCollator);
417-
free(data);
381+
if (pSortHandle->collatorsPerOption[i] != NULL)
382+
{
383+
ucol_close(pSortHandle->collatorsPerOption[i]);
384+
pSortHandle->collatorsPerOption[i] = NULL;
385+
}
418386
}
419387

420388
pthread_mutex_destroy(&pSortHandle->collatorsLockObject);
@@ -427,7 +395,7 @@ const UCollator* GetCollatorFromSortHandle(SortHandle* pSortHandle, int32_t opti
427395
UCollator* pCollator;
428396
if (options == 0)
429397
{
430-
pCollator = pSortHandle->regular;
398+
pCollator = pSortHandle->collatorsPerOption[0];
431399
}
432400
else
433401
{
@@ -437,22 +405,12 @@ const UCollator* GetCollatorFromSortHandle(SortHandle* pSortHandle, int32_t opti
437405
assert(FALSE && "Unexpected pthread_mutex_lock return value.");
438406
}
439407

440-
TCollatorMap* map = (TCollatorMap*)malloc(sizeof(TCollatorMap));
441-
map->key = options;
442-
// tfind on glibc is significantly faster than tsearch and we expect
443-
// to hit the cache here often so it's benefitial to prefer lookup time
444-
// over addition time
445-
void* entry = tfind(map, &pSortHandle->collatorsPerOptionRoot, TreeComparer);
446-
if (entry == NULL)
447-
{
448-
pCollator = CloneCollatorWithOptions(pSortHandle->regular, options, pErr);
449-
map->UCollator = pCollator;
450-
tsearch(map, &pSortHandle->collatorsPerOptionRoot, TreeComparer);
451-
}
452-
else
408+
options &= CompareOptionsMask;
409+
pCollator = pSortHandle->collatorsPerOption[options];
410+
if (pCollator == NULL)
453411
{
454-
free(map);
455-
pCollator = (*(TCollatorMap**)entry)->UCollator;
412+
pCollator = CloneCollatorWithOptions(pSortHandle->collatorsPerOption[0], options, pErr);
413+
pSortHandle->collatorsPerOption[options] = pCollator;
456414
}
457415

458416
pthread_mutex_unlock(&pSortHandle->collatorsLockObject);

0 commit comments

Comments
 (0)