@@ -79,7 +79,7 @@ const struct mbfl_convert_vtbl vtbl_utf8_wchar = {
79
79
mbfl_filt_conv_common_ctor ,
80
80
mbfl_filt_conv_common_dtor ,
81
81
mbfl_filt_conv_utf8_wchar ,
82
- mbfl_filt_conv_common_flush
82
+ mbfl_filt_conv_utf8_wchar_flush
83
83
};
84
84
85
85
const struct mbfl_convert_vtbl vtbl_wchar_utf8 = {
@@ -93,118 +93,122 @@ const struct mbfl_convert_vtbl vtbl_wchar_utf8 = {
93
93
94
94
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
95
95
96
+ int mbfl_filt_put_invalid_char (int c , mbfl_convert_filter * filter )
97
+ {
98
+ int w ;
99
+ w = c & MBFL_WCSGROUP_MASK ;
100
+ w |= MBFL_WCSGROUP_THROUGH ;
101
+ filter -> status = 0 ;
102
+ filter -> cache = 0 ;
103
+ CK ((* filter -> output_function )(w , filter -> data ));
104
+ }
105
+
106
+
96
107
/*
97
108
* UTF-8 => wchar
98
109
*/
99
110
int mbfl_filt_conv_utf8_wchar (int c , mbfl_convert_filter * filter )
100
111
{
101
112
int s , c1 , w = 0 , flag = 0 ;
102
113
103
- if (c < 0x80 ) {
104
- if (filter -> status != 0 ) {
105
- w = (filter -> cache & MBFL_WCSGROUP_MASK ) | MBFL_WCSGROUP_THROUGH ;
106
- CK ((* filter -> output_function )(w , filter -> data ));
107
- filter -> status = 0 ;
108
- filter -> cache = 0 ;
109
- }
110
- if (c >= 0 ) {
114
+ retry :
115
+ switch (filter -> status & 0xff ) {
116
+ case 0x00 :
117
+ if (c < 0x80 ) {
111
118
CK ((* filter -> output_function )(c , filter -> data ));
119
+ } else if (c >= 0xc2 && c <= 0xdf ) { /* 2byte code first char: 0xc2-0xdf */
120
+ filter -> status = 0x10 ;
121
+ filter -> cache = c & 0x1f ;
122
+ } else if (c >= 0xe0 && c <= 0xef ) { /* 3byte code first char: 0xe0-0xef */
123
+ filter -> status = 0x20 ;
124
+ filter -> cache = c & 0xf ;
125
+ } else if (c >= 0xf0 && c <= 0xf4 ) { /* 3byte code first char: 0xf0-0xf4 */
126
+ filter -> status = 0x30 ;
127
+ filter -> cache = c & 0x7 ;
128
+ } else {
129
+ mbfl_filt_put_invalid_char (c , filter );
112
130
}
113
- } else if (c < 0xc0 ) {
114
- int status = filter -> status & 0xff ;
115
- switch (status ) {
116
- case 0x10 : /* 2byte code 2nd char: 0x80-0xbf */
117
- case 0x21 : /* 3byte code 3rd char: 0x80-0xbf */
118
- case 0x32 : /* 4byte code 4th char: 0x80-0xbf */
119
- filter -> status = 0 ;
120
- s = filter -> cache | (c & 0x3f );
131
+ break ;
132
+ case 0x10 : /* 2byte code 2nd char: 0x80-0xbf */
133
+ case 0x21 : /* 3byte code 3rd char: 0x80-0xbf */
134
+ case 0x32 : /* 4byte code 4th char: 0x80-0xbf */
135
+ filter -> status = 0 ;
136
+ if (c >= 0x80 && c <= 0xbf ) {
137
+ s = (filter -> cache <<6 ) | (c & 0x3f );
121
138
filter -> cache = 0 ;
122
- if ((status == 0x10 && s >= 0x80 ) ||
123
- (status == 0x21 && s >= 0x800 && (s < 0xd800 || s > 0xdfff )) ||
124
- (status == 0x32 && s >= 0x10000 && s < 0x110000 )) {
125
- CK ((* filter -> output_function )(s , filter -> data ));
126
- } else {
127
- w = s & MBFL_WCSGROUP_MASK ;
128
- flag = 1 ;
129
- }
130
- break ;
131
- case 0x20 : /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
132
- s = filter -> cache | ((c & 0x3f ) << 6 );
133
- c1 = (s >> 12 ) & 0xf ;
134
- if ((c1 == 0x0 && c >= 0xa0 ) ||
135
- (c1 == 0xd && c < 0xa0 ) ||
136
- (c1 > 0x0 && c1 != 0xd )) {
137
- filter -> cache = s ;
138
- filter -> status ++ ;
139
- } else {
140
- w = s & MBFL_WCSGROUP_MASK ;
141
- flag = 1 ;
142
- }
143
- break ;
144
- case 0x31 : /* 4byte code 3rd char: 0x80-0xbf */
145
- filter -> cache |= ((c & 0x3f ) << 6 );
146
- filter -> status ++ ;
147
- break ;
148
- case 0x30 : /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
149
- s = filter -> cache | ((c & 0x3f ) << 12 );
150
- c1 = (s >> 18 ) & 0x7 ;
151
- if ((c1 == 0x0 && c >= 0x90 ) ||
152
- (c1 > 0x0 && c1 < 0x4 ) ||
153
- (c1 == 0x4 && c < 0x90 )) {
154
- filter -> cache = s ;
155
- filter -> status ++ ;
156
- } else {
157
- w = s & MBFL_WCSGROUP_MASK ;
158
- flag = 1 ;
159
- }
160
- break ;
161
- default :
162
- w = c & MBFL_WCSGROUP_MASK ;
163
- flag = 1 ;
164
- break ;
139
+ CK ((* filter -> output_function )(s , filter -> data ));
140
+ } else {
141
+ mbfl_filt_put_invalid_char (filter -> cache , filter );
142
+ goto retry ;
165
143
}
166
- } else if (c < 0xc2 ) { /* invalid: 0xc0,0xc1 */
167
- w = c & MBFL_WCSGROUP_MASK ;
168
- flag = 1 ;
169
- } else if (c < 0xe0 ) { /* 2byte code first char: 0xc2-0xdf */
170
- if (filter -> status == 0x0 ) {
171
- filter -> status = 0x10 ;
172
- filter -> cache = (c & 0x1f ) << 6 ;
144
+ break ;
145
+ case 0x20 : /* 3byte code 2nd char: 0:0xa0-0xbf,D:0x80-9F,1-C,E-F:0x80-0x9f */
146
+ s = (filter -> cache <<6 ) | (c & 0x3f );
147
+ c1 = filter -> cache & 0xf ;
148
+
149
+ if ((c >= 0x80 && c <= 0xbf ) &&
150
+ ((c1 == 0x0 && c >= 0xa0 ) ||
151
+ (c1 == 0xd && c < 0xa0 ) ||
152
+ (c1 > 0x0 && c1 != 0xd ))) {
153
+ filter -> cache = s ;
154
+ filter -> status ++ ;
173
155
} else {
174
- w = c & MBFL_WCSGROUP_MASK ;
175
- flag = 1 ;
156
+ mbfl_filt_put_invalid_char ( filter -> cache , filter ) ;
157
+ goto retry ;
176
158
}
177
- } else if (c < 0xf0 ) { /* 3byte code first char: 0xe0-0xef */
178
- if (filter -> status == 0x0 ) {
179
- filter -> status = 0x20 ;
180
- filter -> cache = (c & 0xf ) << 12 ;
159
+ break ;
160
+ case 0x30 : /* 4byte code 2nd char: 0:0x90-0xbf,1-3:0x80-0xbf,4:0x80-0x8f */
161
+ s = (filter -> cache <<6 ) | (c & 0x3f );
162
+ c1 = filter -> cache & 0x7 ;
163
+
164
+ if ((c >= 0x80 && c <= 0xbf ) &&
165
+ ((c1 == 0x0 && c >= 0x90 ) ||
166
+ (c1 == 0x4 && c < 0x90 ) ||
167
+ (c1 > 0x0 && c1 != 0x4 ))) {
168
+ filter -> cache = s ;
169
+ filter -> status ++ ;
181
170
} else {
182
- w = c & MBFL_WCSGROUP_MASK ;
183
- flag = 1 ;
171
+ mbfl_filt_put_invalid_char ( filter -> cache , filter ) ;
172
+ goto retry ;
184
173
}
185
- } else if (c < 0xf5 ) { /* 4byte code first char: 0xf0-0xf4 */
186
- if (filter -> status == 0x0 ) {
187
- filter -> status = 0x30 ;
188
- filter -> cache = (c & 0x7 ) << 18 ;
174
+ break ;
175
+ case 0x31 : /* 4byte code 3rd char: 0x80-0xbf */
176
+ if (c >= 0x80 && c <= 0xbf ) {
177
+ filter -> cache = (filter -> cache <<6 ) | (c & 0x3f );
178
+ filter -> status ++ ;
189
179
} else {
190
- w = c & MBFL_WCSGROUP_MASK ;
191
- flag = 1 ;
180
+ mbfl_filt_put_invalid_char ( filter -> cache , filter ) ;
181
+ goto retry ;
192
182
}
193
- } else {
194
- w = c & MBFL_WCSGROUP_MASK ;
195
- flag = 1 ;
196
- }
197
-
198
- if (flag ) {
199
- w |= MBFL_WCSGROUP_THROUGH ;
200
- CK ((* filter -> output_function )(w , filter -> data ));
183
+ break ;
184
+ default :
201
185
filter -> status = 0 ;
202
- filter -> cache = 0 ;
186
+ break ;
203
187
}
204
188
205
189
return c ;
206
190
}
207
191
192
+ int mbfl_filt_conv_utf8_wchar_flush (mbfl_convert_filter * filter )
193
+ {
194
+ int status , cache ;
195
+
196
+ status = filter -> status ;
197
+ cache = filter -> cache ;
198
+
199
+ filter -> status = 0 ;
200
+ filter -> cache = 0 ;
201
+
202
+ if (status != 0 ) {
203
+ mbfl_filt_put_invalid_char (cache , filter );
204
+ }
205
+
206
+ if (filter -> flush_function != NULL ) {
207
+ (* filter -> flush_function )(filter -> data );
208
+ }
209
+ return 0 ;
210
+ }
211
+
208
212
/*
209
213
* wchar => UTF-8
210
214
*/
0 commit comments