Skip to content

Commit 613b166

Browse files
committed
More wrong encoding conversions
1 parent ca6fecd commit 613b166

20 files changed

+276
-250
lines changed

ext/mbstring/mbstring.c

Lines changed: 54 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ static const sapi_post_entry mbstr_post_entries[] = {
323323
};
324324
/* }}} */
325325

326-
static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
326+
static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, const uint32_t arg_num) {
327327
if (encoding_name) {
328328
const mbfl_encoding *encoding;
329329
zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
@@ -334,7 +334,8 @@ static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name) {
334334

335335
encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
336336
if (!encoding) {
337-
php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", ZSTR_VAL(encoding_name));
337+
zend_argument_value_error(arg_num, "must be a valid encoding, encoding \"%s\" is unknown",
338+
ZSTR_VAL(encoding_name));
338339
return NULL;
339340
}
340341

@@ -1891,9 +1892,9 @@ PHP_FUNCTION(mb_str_split)
18911892
string.val = (unsigned char *) ZSTR_VAL(str);
18921893
string.len = ZSTR_LEN(str);
18931894
string.no_language = MBSTRG(language);
1894-
string.encoding = php_mb_get_encoding(encoding);
1895+
string.encoding = php_mb_get_encoding(encoding, 3);
18951896
if (!string.encoding) {
1896-
RETURN_FALSE;
1897+
RETURN_THROWS();
18971898
}
18981899

18991900
p = ZSTR_VAL(str); /* string cursor pointer */
@@ -2021,9 +2022,9 @@ PHP_FUNCTION(mb_strlen)
20212022
string.val = (unsigned char *) str;
20222023
string.len = str_len;
20232024
string.no_language = MBSTRG(language);
2024-
string.encoding = php_mb_get_encoding(enc_name);
2025+
string.encoding = php_mb_get_encoding(enc_name, 2);
20252026
if (!string.encoding) {
2026-
RETURN_FALSE;
2027+
RETURN_THROWS();
20272028
}
20282029

20292030
n = mbfl_strlen(&string);
@@ -2066,9 +2067,9 @@ PHP_FUNCTION(mb_strpos)
20662067
}
20672068

20682069
haystack.no_language = needle.no_language = MBSTRG(language);
2069-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2070+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
20702071
if (!haystack.encoding) {
2071-
RETURN_FALSE;
2072+
RETURN_THROWS();
20722073
}
20732074

20742075
n = mbfl_strpos(&haystack, &needle, offset, reverse);
@@ -2094,9 +2095,9 @@ PHP_FUNCTION(mb_strrpos)
20942095
}
20952096

20962097
haystack.no_language = needle.no_language = MBSTRG(language);
2097-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2098+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
20982099
if (!haystack.encoding) {
2099-
RETURN_FALSE;
2100+
RETURN_THROWS();
21002101
}
21012102

21022103
n = mbfl_strpos(&haystack, &needle, offset, 1);
@@ -2125,7 +2126,7 @@ PHP_FUNCTION(mb_stripos)
21252126

21262127
enc = php_mb_get_encoding(from_encoding);
21272128
if (!enc) {
2128-
RETURN_FALSE;
2129+
RETURN_THROWS();
21292130
}
21302131

21312132
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
@@ -2155,7 +2156,7 @@ PHP_FUNCTION(mb_strripos)
21552156

21562157
enc = php_mb_get_encoding(from_encoding);
21572158
if (!enc) {
2158-
RETURN_FALSE;
2159+
RETURN_THROWS();
21592160
}
21602161

21612162
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
@@ -2183,9 +2184,9 @@ PHP_FUNCTION(mb_strstr)
21832184
}
21842185

21852186
haystack.no_language = needle.no_language = MBSTRG(language);
2186-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2187+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
21872188
if (!haystack.encoding) {
2188-
RETURN_FALSE;
2189+
RETURN_THROWS();
21892190
}
21902191

21912192
n = mbfl_strpos(&haystack, &needle, 0, 0);
@@ -2229,9 +2230,9 @@ PHP_FUNCTION(mb_strrchr)
22292230
}
22302231

22312232
haystack.no_language = needle.no_language = MBSTRG(language);
2232-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2233+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
22332234
if (!haystack.encoding) {
2234-
RETURN_FALSE;
2235+
RETURN_THROWS();
22352236
}
22362237

22372238
n = mbfl_strpos(&haystack, &needle, 0, 1);
@@ -2275,9 +2276,9 @@ PHP_FUNCTION(mb_stristr)
22752276
}
22762277

22772278
haystack.no_language = needle.no_language = MBSTRG(language);
2278-
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2279+
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding, 4);
22792280
if (!haystack.encoding) {
2280-
RETURN_FALSE;
2281+
RETURN_THROWS();
22812282
}
22822283

22832284
n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, needle.encoding);
@@ -2321,9 +2322,9 @@ PHP_FUNCTION(mb_strrichr)
23212322
}
23222323

23232324
haystack.no_language = needle.no_language = MBSTRG(language);
2324-
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding);
2325+
haystack.encoding = needle.encoding = php_mb_get_encoding(from_encoding, 4);
23252326
if (!haystack.encoding) {
2326-
RETURN_FALSE;
2327+
RETURN_THROWS();
23272328
}
23282329

23292330
n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, needle.encoding);
@@ -2366,9 +2367,9 @@ PHP_FUNCTION(mb_substr_count)
23662367
}
23672368

23682369
haystack.no_language = needle.no_language = MBSTRG(language);
2369-
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name);
2370+
haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
23702371
if (!haystack.encoding) {
2371-
RETURN_FALSE;
2372+
RETURN_THROWS();
23722373
}
23732374

23742375
if (needle.len == 0) {
@@ -2402,9 +2403,9 @@ PHP_FUNCTION(mb_substr)
24022403
}
24032404

24042405
string.no_language = MBSTRG(language);
2405-
string.encoding = php_mb_get_encoding(encoding);
2406+
string.encoding = php_mb_get_encoding(encoding, 4);
24062407
if (!string.encoding) {
2407-
RETURN_FALSE;
2408+
RETURN_THROWS();
24082409
}
24092410

24102411
string.val = (unsigned char *)str;
@@ -2465,9 +2466,9 @@ PHP_FUNCTION(mb_strcut)
24652466
}
24662467

24672468
string.no_language = MBSTRG(language);
2468-
string.encoding = php_mb_get_encoding(encoding);
2469+
string.encoding = php_mb_get_encoding(encoding, 4);
24692470
if (!string.encoding) {
2470-
RETURN_FALSE;
2471+
RETURN_THROWS();
24712472
}
24722473

24732474
if (len_is_null) {
@@ -2522,12 +2523,9 @@ PHP_FUNCTION(mb_strwidth)
25222523
}
25232524

25242525
string.no_language = MBSTRG(language);
2525-
// TODO CHECK THIS WHAT THE FUCK IT DOES
2526-
string.encoding = php_mb_get_encoding(enc_name);
2526+
string.encoding = php_mb_get_encoding(enc_name, 2);
25272527
if (!string.encoding) {
2528-
//zend_argument_value_error(1, "must be a valid encoding, encoding \"%s\" is unknown", name);
2529-
//RETURN_THROWS();
2530-
RETURN_FALSE;
2528+
RETURN_THROWS();
25312529
}
25322530

25332531
n = mbfl_strwidth(&string);
@@ -2554,9 +2552,9 @@ PHP_FUNCTION(mb_strimwidth)
25542552
}
25552553

25562554
string.no_language = marker.no_language = MBSTRG(language);
2557-
string.encoding = marker.encoding = php_mb_get_encoding(encoding);
2555+
string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
25582556
if (!string.encoding) {
2559-
RETURN_FALSE;
2557+
RETURN_THROWS();
25602558
}
25612559

25622560
string.val = (unsigned char *)str;
@@ -2788,9 +2786,9 @@ PHP_FUNCTION(mb_convert_encoding)
27882786
RETURN_THROWS();
27892787
}
27902788

2791-
to_encoding = php_mb_get_encoding(to_encoding_name);
2789+
to_encoding = php_mb_get_encoding(to_encoding_name, 2);
27922790
if (!to_encoding) {
2793-
RETURN_FALSE;
2791+
RETURN_THROWS();
27942792
}
27952793

27962794
if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
@@ -2881,9 +2879,9 @@ PHP_FUNCTION(mb_convert_case)
28812879
RETURN_THROWS();
28822880
}
28832881

2884-
enc = php_mb_get_encoding(from_encoding);
2882+
enc = php_mb_get_encoding(from_encoding, 3);
28852883
if (!enc) {
2886-
return;
2884+
RETURN_THROWS();
28872885
}
28882886

28892887
if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
@@ -2918,9 +2916,9 @@ PHP_FUNCTION(mb_strtoupper)
29182916
RETURN_THROWS();
29192917
}
29202918

2921-
enc = php_mb_get_encoding(from_encoding);
2919+
enc = php_mb_get_encoding(from_encoding, 2);
29222920
if (!enc) {
2923-
RETURN_FALSE;
2921+
RETURN_THROWS();
29242922
}
29252923

29262924
newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
@@ -2952,9 +2950,9 @@ PHP_FUNCTION(mb_strtolower)
29522950
RETURN_THROWS();
29532951
}
29542952

2955-
enc = php_mb_get_encoding(from_encoding);
2953+
enc = php_mb_get_encoding(from_encoding, 2);
29562954
if (!enc) {
2957-
RETURN_FALSE;
2955+
RETURN_THROWS();
29582956
}
29592957

29602958
newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
@@ -3253,9 +3251,9 @@ PHP_FUNCTION(mb_convert_kana)
32533251

32543252
/* encoding */
32553253
string.no_language = MBSTRG(language);
3256-
string.encoding = php_mb_get_encoding(encname);
3254+
string.encoding = php_mb_get_encoding(encname, 3);
32573255
if (!string.encoding) {
3258-
RETURN_FALSE;
3256+
RETURN_THROWS();
32593257
}
32603258

32613259
ret = mbfl_ja_jp_hantozen(&string, &result, opt);
@@ -4380,14 +4378,15 @@ PHP_FUNCTION(mb_check_encoding)
43804378
/* }}} */
43814379

43824380

4383-
static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name)
4381+
static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4382+
const uint32_t enc_name_arg_num)
43844383
{
43854384
const mbfl_encoding *enc;
43864385
enum mbfl_no_encoding no_enc;
43874386

4388-
enc = php_mb_get_encoding(enc_name);
4387+
enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
43894388
if (!enc) {
4390-
return -1;
4389+
return -2;
43914390
}
43924391

43934392
no_enc = enc->no_encoding;
@@ -4446,9 +4445,12 @@ PHP_FUNCTION(mb_ord)
44464445
Z_PARAM_STR(enc)
44474446
ZEND_PARSE_PARAMETERS_END();
44484447

4449-
cp = php_mb_ord(str, str_len, enc);
4448+
cp = php_mb_ord(str, str_len, enc, 2);
44504449

44514450
if (0 > cp) {
4451+
if (cp == -2) {
4452+
RETURN_THROWS();
4453+
}
44524454
RETURN_FALSE;
44534455
}
44544456

@@ -4457,15 +4459,15 @@ PHP_FUNCTION(mb_ord)
44574459
/* }}} */
44584460

44594461

4460-
static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name)
4462+
static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
44614463
{
44624464
const mbfl_encoding *enc;
44634465
enum mbfl_no_encoding no_enc;
44644466
zend_string *ret;
44654467
char* buf;
44664468
size_t buf_len;
44674469

4468-
enc = php_mb_get_encoding(enc_name);
4470+
enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
44694471
if (!enc) {
44704472
return NULL;
44714473
}
@@ -4554,7 +4556,7 @@ PHP_FUNCTION(mb_chr)
45544556
Z_PARAM_STR(enc)
45554557
ZEND_PARSE_PARAMETERS_END();
45564558

4557-
ret = php_mb_chr(cp, enc);
4559+
ret = php_mb_chr(cp, enc, 2);
45584560
if (ret == NULL) {
45594561
RETURN_FALSE;
45604562
}
@@ -4586,9 +4588,9 @@ PHP_FUNCTION(mb_scrub)
45864588
Z_PARAM_STR(enc_name)
45874589
ZEND_PARSE_PARAMETERS_END();
45884590

4589-
enc = php_mb_get_encoding(enc_name);
4591+
enc = php_mb_get_encoding(enc_name, 2);
45904592
if (!enc) {
4591-
RETURN_FALSE;
4593+
RETURN_THROWS();
45924594
}
45934595

45944596
ret = php_mb_scrub(str, str_len, enc, &ret_len);

0 commit comments

Comments
 (0)