Skip to content

Major overhaul of mbstring (part 4) #6430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ext/mbstring/libmbfl/filters/emoji2uni.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ static const unsigned short mb_tbl_code2uni_docomo1[] = { // 0x28c2 - 0x29db
0xf4ba, 0xf303, 0xEE1E, 0xEE1F,
0xEE20, 0xf51c, 0xf51b, 0xf51a,
0x23f0, 0xEE21, 0xEE22, 0xEE23,
0xEE24, 0xEE25, 0xEE26, 0xEE27,
0xEE28, 0xEE29, 0xEE2A, 0xEE2B,
0xEE2C, 0xEE2D, 0xEE2E, 0xEE2F,
0xEE24, 0xEE25, 0x25EA, 0x25A0,
0x25BF, 0xEE29, 0xEE2A, 0xEE2B,
0x2020, 0xEE2D, 0xEE2E, 0xEE2F,
0xEE30, 0xEE31, 0xEE32, 0xEE33,
0xf4f2, 0xf4e9, 0xf4e0, 0xEE10,
0xEE11, 0x2709, 0xEE12, 0xEE13,
Expand Down
17 changes: 2 additions & 15 deletions ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,8 @@ mbfl_filt_conv_jis_ms_wchar(int c, mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
{
int c1, s;
int s = 0;

s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
Expand All @@ -408,20 +407,12 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)

/* do some transliteration */
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
s = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0x203e) { /* OVER LINE */
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down Expand Up @@ -636,8 +627,6 @@ mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter)
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down Expand Up @@ -780,8 +769,6 @@ mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter)
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down
66 changes: 26 additions & 40 deletions ext/mbstring/libmbfl/filters/mbfilter_cp51932.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "unicode_table_jis.h"
#include "cp932_table.h"

static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter);

static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
Expand Down Expand Up @@ -72,7 +74,7 @@ const struct mbfl_convert_vtbl vtbl_cp51932_wchar = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp51932_wchar,
mbfl_filt_conv_common_flush,
mbfl_filt_conv_cp51932_wchar_flush,
NULL,
};

Expand Down Expand Up @@ -105,17 +107,15 @@ mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter)

switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data));
} else if (c > 0xa0 && c < 0xff) { /* CP932 first char */
} else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* kana first char */
} else if (c == 0x8e) { /* kana first char */
filter->status = 2;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
CK((*filter->output_function)(c | MBFL_WCSGROUP_THROUGH, filter->data));
}
break;

Expand Down Expand Up @@ -152,17 +152,11 @@ mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter)
}
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP932;
w = ((c1 & 0x7f) << 8) | (c & 0x7f) | MBFL_WCSPLANE_WINCP932;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
Expand All @@ -172,12 +166,8 @@ mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter)
if (c > 0xa0 && c < 0xe0) {
w = 0xfec0 + c;
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = 0x8e00 | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
w = 0x8e00 | c | MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
Expand All @@ -190,6 +180,20 @@ mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter)
return c;
}

static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
/* Input string was truncated */
(*filter->output_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter->data);
}

if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}

return 0;
}

/*
* wchar => cp51932
*/
Expand All @@ -210,28 +214,10 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
}
if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
s1 = -1;
}
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */
s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */
(s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */
s1 <= ((94 + 0x20) << 8))) {
s1 = -1;
}
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x005c; /* YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x007e; /* FULLWIDTH MACRON */
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s1 = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down
44 changes: 20 additions & 24 deletions ext/mbstring/libmbfl/filters/mbfilter_cp932.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"

static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter);

static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
Expand Down Expand Up @@ -71,7 +73,7 @@ const struct mbfl_convert_vtbl vtbl_cp932_wchar = {
mbfl_filt_conv_common_ctor,
NULL,
mbfl_filt_conv_cp932_wchar,
mbfl_filt_conv_common_flush,
mbfl_filt_conv_cp932_wchar_flush,
NULL,
};

Expand Down Expand Up @@ -193,17 +195,11 @@ mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter)
}
}
if (w <= 0) {
w = (s1 << 8) | s2;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP932;
w = (s1 << 8) | s2 | MBFL_WCSPLANE_WINCP932;
}
CK((*filter->output_function)(w, filter->data));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
w = (c1 << 8) | c | MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data));
}
break;
Expand All @@ -216,6 +212,19 @@ mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter)
return c;
}

static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter)
{
if (filter->status) {
(*filter->filter_function)(filter->cache | MBFL_WCSGROUP_THROUGH, filter);
}

if (filter->flush_function) {
(*filter->flush_function)(filter->data);
}

return 0;
}

/*
* wchar => SJIS-win
*/
Expand All @@ -242,23 +251,10 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
s2 = 1;
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
s1 = 0x005c; /* YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x007e; /* FULLWIDTH MACRON */
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216F; /* FULLWIDTH YEN SIGN */
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s1 = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s1 = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s1 = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down
7 changes: 4 additions & 3 deletions ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,11 @@ static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
{
int s;
int s = 0;

s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
if (c == 0xAF) { /* U+00AF is MACRON */
s = 0xA2B4; /* Use JIS X 0212 overline */
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
Expand Down
20 changes: 1 addition & 19 deletions ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,25 +273,7 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
s1 = 0x2d62; /* NUMERO SIGN */
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
s1 = -1;
}
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */
s1 = -1;
}
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */
s1 = -1;
} else {
s1 |= 0x8080;
}
} else if (c == 0xa5) { /* YEN SIGN */
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
Expand Down
11 changes: 1 addition & 10 deletions ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,16 +302,7 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
s1 = (c1 << 8) | c2;
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
Expand Down
11 changes: 1 addition & 10 deletions ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,7 @@ mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
s1 = (c1 << 8) | c2;
}
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
s1 = c & MBFL_WCSPLANE_MASK;
s2 = 1;
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
s1 = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s1 = c & MBFL_WCSPLANE_MASK;
s1 |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
if (c == 0xa5) { /* YEN SIGN */
s1 = 0x216f; /* FULLWIDTH YEN SIGN */
} else if (c == 0x203e) { /* OVER LINE */
s1 = 0x2131; /* FULLWIDTH MACRON */
Expand Down
15 changes: 2 additions & 13 deletions ext/mbstring/libmbfl/filters/mbfilter_jis.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,8 @@ mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
{
int c1, s;
int s = 0;

s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
Expand All @@ -287,20 +286,12 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
s = c & MBFL_WCSPLANE_MASK;
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
s = c & MBFL_WCSPLANE_MASK;
s |= 0x8080;
} else if (c == 0xa5) { /* YEN SIGN */
if (c == 0xa5) { /* YEN SIGN */
s = 0x1005c;
} else if (c == 0x203e) { /* OVER LINE */
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down Expand Up @@ -396,8 +387,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
s = 0x1007e;
} else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
s = 0x2140;
} else if (c == 0xff5e) { /* FULLWIDTH TILDE */
s = 0x2141;
} else if (c == 0x2225) { /* PARALLEL TO */
s = 0x2142;
} else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
Expand Down
Loading