@@ -235,24 +235,31 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
235235static PyObject *
236236u_getitem (arrayobject * ap , Py_ssize_t i )
237237{
238- return PyUnicode_FromOrdinal (((Py_UNICODE * ) ap -> ob_item )[i ]);
238+ return PyUnicode_FromOrdinal (((wchar_t * ) ap -> ob_item )[i ]);
239239}
240240
241241static int
242242u_setitem (arrayobject * ap , Py_ssize_t i , PyObject * v )
243243{
244- Py_UNICODE * p ;
245- Py_ssize_t len ;
246-
247- if (!PyArg_Parse (v , "u#;array item must be unicode character" , & p , & len ))
244+ PyObject * u ;
245+ if (!PyArg_Parse (v , "U;array item must be unicode character" , & u )) {
248246 return -1 ;
249- if (len != 1 ) {
247+ }
248+
249+ Py_ssize_t len = PyUnicode_AsWideChar (u , NULL , 0 );
250+ if (len != 2 ) {
250251 PyErr_SetString (PyExc_TypeError ,
251252 "array item must be unicode character" );
252253 return -1 ;
253254 }
254- if (i >= 0 )
255- ((Py_UNICODE * )ap -> ob_item )[i ] = p [0 ];
255+
256+ wchar_t w ;
257+ len = PyUnicode_AsWideChar (u , & w , 1 );
258+ assert (len == 1 );
259+
260+ if (i >= 0 ) {
261+ ((wchar_t * )ap -> ob_item )[i ] = w ;
262+ }
256263 return 0 ;
257264}
258265
@@ -530,7 +537,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
530537
531538DEFINE_COMPAREITEMS (b , signed char )
532539DEFINE_COMPAREITEMS (BB , unsigned char )
533- DEFINE_COMPAREITEMS (u , Py_UNICODE )
540+ DEFINE_COMPAREITEMS (u , wchar_t )
534541DEFINE_COMPAREITEMS (h , short )
535542DEFINE_COMPAREITEMS (HH , unsigned short )
536543DEFINE_COMPAREITEMS (i , int )
@@ -548,7 +555,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
548555static const struct arraydescr descriptors [] = {
549556 {'b' , 1 , b_getitem , b_setitem , b_compareitems , "b" , 1 , 1 },
550557 {'B' , 1 , BB_getitem , BB_setitem , BB_compareitems , "B" , 1 , 0 },
551- {'u' , sizeof (Py_UNICODE ), u_getitem , u_setitem , u_compareitems , "u" , 0 , 0 },
558+ {'u' , sizeof (wchar_t ), u_getitem , u_setitem , u_compareitems , "u" , 0 , 0 },
552559 {'h' , sizeof (short ), h_getitem , h_setitem , h_compareitems , "h" , 1 , 1 },
553560 {'H' , sizeof (short ), HH_getitem , HH_setitem , HH_compareitems , "H" , 1 , 0 },
554561 {'i' , sizeof (int ), i_getitem , i_setitem , i_compareitems , "i" , 1 , 1 },
@@ -1660,7 +1667,7 @@ array_array_tobytes_impl(arrayobject *self)
16601667/*[clinic input]
16611668array.array.fromunicode
16621669
1663- ustr: Py_UNICODE(zeroes=True)
1670+ ustr: unicode
16641671 /
16651672
16661673Extends this array with data from the unicode string ustr.
@@ -1671,25 +1678,28 @@ some other type.
16711678[clinic start generated code]*/
16721679
16731680static PyObject *
1674- array_array_fromunicode_impl (arrayobject * self , const Py_UNICODE * ustr ,
1675- Py_ssize_clean_t ustr_length )
1676- /*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/
1681+ array_array_fromunicode_impl (arrayobject * self , PyObject * ustr )
1682+ /*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
16771683{
1678- char typecode ;
1679-
1680- typecode = self -> ob_descr -> typecode ;
1681- if (typecode != 'u' ) {
1684+ if (self -> ob_descr -> typecode != 'u' ) {
16821685 PyErr_SetString (PyExc_ValueError ,
16831686 "fromunicode() may only be called on "
16841687 "unicode type arrays" );
16851688 return NULL ;
16861689 }
1687- if (ustr_length > 0 ) {
1690+
1691+ Py_ssize_t ustr_length = PyUnicode_AsWideChar (ustr , NULL , 0 );
1692+ assert (ustr_length > 0 );
1693+ if (ustr_length > 1 ) {
1694+ ustr_length -- ; /* trim trailing NUL character */
16881695 Py_ssize_t old_size = Py_SIZE (self );
1689- if (array_resize (self , old_size + ustr_length ) == -1 )
1696+ if (array_resize (self , old_size + ustr_length ) == -1 ) {
16901697 return NULL ;
1691- memcpy (self -> ob_item + old_size * sizeof (Py_UNICODE ),
1692- ustr , ustr_length * sizeof (Py_UNICODE ));
1698+ }
1699+
1700+ // must not fail
1701+ PyUnicode_AsWideChar (
1702+ ustr , ((wchar_t * )self -> ob_item ) + old_size , ustr_length );
16931703 }
16941704
16951705 Py_RETURN_NONE ;
@@ -1709,14 +1719,12 @@ static PyObject *
17091719array_array_tounicode_impl (arrayobject * self )
17101720/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
17111721{
1712- char typecode ;
1713- typecode = self -> ob_descr -> typecode ;
1714- if (typecode != 'u' ) {
1722+ if (self -> ob_descr -> typecode != 'u' ) {
17151723 PyErr_SetString (PyExc_ValueError ,
17161724 "tounicode() may only be called on unicode type arrays" );
17171725 return NULL ;
17181726 }
1719- return PyUnicode_FromWideChar ((Py_UNICODE * ) self -> ob_item , Py_SIZE (self ));
1727+ return PyUnicode_FromWideChar ((wchar_t * ) self -> ob_item , Py_SIZE (self ));
17201728}
17211729
17221730/*[clinic input]
@@ -2675,30 +2683,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
26752683 Py_DECREF (v );
26762684 }
26772685 else if (initial != NULL && PyUnicode_Check (initial )) {
2678- Py_UNICODE * ustr ;
26792686 Py_ssize_t n ;
2680-
2681- ustr = PyUnicode_AsUnicode (initial );
2687+ wchar_t * ustr = PyUnicode_AsWideCharString (initial , & n );
26822688 if (ustr == NULL ) {
2683- PyErr_NoMemory ();
26842689 Py_DECREF (a );
26852690 return NULL ;
26862691 }
26872692
2688- n = PyUnicode_GET_DATA_SIZE (initial );
26892693 if (n > 0 ) {
26902694 arrayobject * self = (arrayobject * )a ;
2691- char * item = self -> ob_item ;
2692- item = (char * )PyMem_Realloc (item , n );
2693- if (item == NULL ) {
2694- PyErr_NoMemory ();
2695- Py_DECREF (a );
2696- return NULL ;
2697- }
2698- self -> ob_item = item ;
2699- Py_SET_SIZE (self , n / sizeof (Py_UNICODE ));
2700- memcpy (item , ustr , n );
2701- self -> allocated = Py_SIZE (self );
2695+ // self->ob_item may be NULL but it is safe.
2696+ PyMem_Free (self -> ob_item );
2697+ self -> ob_item = (char * )ustr ;
2698+ Py_SET_SIZE (self , n );
2699+ self -> allocated = n ;
27022700 }
27032701 }
27042702 else if (initial != NULL && array_Check (initial ) && len > 0 ) {
0 commit comments