@@ -94,10 +94,19 @@ def array(
9494 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
9595 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
9696 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
97+ :class:`int` :class:`pandas.arrays.IntegerArray`
98+ :class:`str` :class:`pandas.arrays.StringArray`
99+ :class:`bool` :class:`pandas.arrays.BooleanArray`
97100 ============================== =====================================
98101
99102 For all other cases, NumPy's usual inference rules will be used.
100103
104+ .. versionchanged:: 1.0.0
105+
106+ Pandas infers nullable-integer dtype for integer data,
107+ string dtype for string data, and nullable-boolean dtype
108+ for boolean data.
109+
101110 copy : bool, default True
102111 Whether to copy the data, even if not necessary. Depending
103112 on the type of `data`, creating the new array may require
@@ -154,14 +163,6 @@ def array(
154163 ['a', 'b']
155164 Length: 2, dtype: str32
156165
157- Or use the dedicated constructor for the array you're expecting, and
158- wrap that in a PandasArray
159-
160- >>> pd.array(np.array(['a', 'b'], dtype='<U1'))
161- <PandasArray>
162- ['a', 'b']
163- Length: 2, dtype: str32
164-
165166 Finally, Pandas has arrays that mostly overlap with NumPy
166167
167168 * :class:`arrays.DatetimeArray`
@@ -184,20 +185,28 @@ def array(
184185
185186 Examples
186187 --------
187- If a dtype is not specified, `data` is passed through to
188- :meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned .
188+ If a dtype is not specified, pandas will infer the best dtype from the values.
189+ See the description of `dtype` for the types pandas infers for .
189190
190191 >>> pd.array([1, 2])
191- <PandasArray >
192+ <IntegerArray >
192193 [1, 2]
193- Length: 2, dtype: int64
194+ Length: 2, dtype: Int64
194195
195- Or the NumPy dtype can be specified
196+ >>> pd.array([1, 2, np.nan])
197+ <IntegerArray>
198+ [1, 2, NaN]
199+ Length: 3, dtype: Int64
196200
197- >>> pd.array([1, 2], dtype=np.dtype("int32"))
198- <PandasArray>
199- [1, 2]
200- Length: 2, dtype: int32
201+ >>> pd.array(["a", None, "c"])
202+ <StringArray>
203+ ['a', nan, 'c']
204+ Length: 3, dtype: string
205+
206+ >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
207+ <PeriodArray>
208+ ['2000-01-01', '2000-01-01']
209+ Length: 2, dtype: period[D]
201210
202211 You can use the string alias for `dtype`
203212
@@ -212,29 +221,24 @@ def array(
212221 [a, b, a]
213222 Categories (3, object): [a < b < c]
214223
215- Because omitting the `dtype` passes the data through to NumPy,
216- a mixture of valid integers and NA will return a floating-point
217- NumPy array.
224+ If pandas does not infer a dedicated extension type a
225+ :class:`arrays.PandasArray` is returned.
218226
219- >>> pd.array([1, 2, np.nan ])
227+ >>> pd.array([1.1 , 2.2 ])
220228 <PandasArray>
221- [1.0, 2.0, nan]
222- Length: 3, dtype: float64
223-
224- To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify
225- the dtype:
229+ [1.1, 2.2]
230+ Length: 2, dtype: float64
226231
227- >>> pd.array([1, 2, np.nan], dtype='Int64')
228- <IntegerArray>
229- [1, 2, NaN]
230- Length: 3, dtype: Int64
232+ As mentioned in the "Notes" section, new extension types may be added
233+ in the future (by pandas or 3rd party libraries), causing the return
234+ value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`
235+ as a NumPy dtype if you need to ensure there's no future change in
236+ behavior.
231237
232- Pandas will infer an ExtensionArray for some types of data:
233-
234- >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
235- <PeriodArray>
236- ['2000-01-01', '2000-01-01']
237- Length: 2, dtype: period[D]
238+ >>> pd.array([1, 2], dtype=np.dtype("int32"))
239+ <PandasArray>
240+ [1, 2]
241+ Length: 2, dtype: int32
238242
239243 `data` must be 1-dimensional. A ValueError is raised when the input
240244 has the wrong dimensionality.
@@ -246,21 +250,26 @@ def array(
246250 """
247251 from pandas .core .arrays import (
248252 period_array ,
253+ BooleanArray ,
254+ IntegerArray ,
249255 IntervalArray ,
250256 PandasArray ,
251257 DatetimeArray ,
252258 TimedeltaArray ,
259+ StringArray ,
253260 )
254261
255262 if lib .is_scalar (data ):
256263 msg = "Cannot pass scalar '{}' to 'pandas.array'."
257264 raise ValueError (msg .format (data ))
258265
259- data = extract_array ( data , extract_numpy = True )
260-
261- if dtype is None and isinstance ( data , ABCExtensionArray ):
266+ if dtype is None and isinstance (
267+ data , ( ABCSeries , ABCIndexClass , ABCExtensionArray )
268+ ):
262269 dtype = data .dtype
263270
271+ data = extract_array (data , extract_numpy = True )
272+
264273 # this returns None for not-found dtypes.
265274 if isinstance (dtype , str ):
266275 dtype = registry .find (dtype ) or dtype
@@ -270,7 +279,7 @@ def array(
270279 return cls ._from_sequence (data , dtype = dtype , copy = copy )
271280
272281 if dtype is None :
273- inferred_dtype = lib .infer_dtype (data , skipna = False )
282+ inferred_dtype = lib .infer_dtype (data , skipna = True )
274283 if inferred_dtype == "period" :
275284 try :
276285 return period_array (data , copy = copy )
@@ -298,7 +307,14 @@ def array(
298307 # timedelta, timedelta64
299308 return TimedeltaArray ._from_sequence (data , copy = copy )
300309
301- # TODO(BooleanArray): handle this type
310+ elif inferred_dtype == "string" :
311+ return StringArray ._from_sequence (data , copy = copy )
312+
313+ elif inferred_dtype == "integer" :
314+ return IntegerArray ._from_sequence (data , copy = copy )
315+
316+ elif inferred_dtype == "boolean" :
317+ return BooleanArray ._from_sequence (data , copy = copy )
302318
303319 # Pandas overrides NumPy for
304320 # 1. datetime64[ns]
0 commit comments