Try to fix typecheck issues

vnlitvinov · vnlitvinov · commit 98bfab40d924 · 2022-03-31T23:30:58.000+03:00
Signed-off-by: Vasily Litvinov &lt;vasilij.n.litvinov@intel.com&gt;
diff --git a/pandas/core/exchange/buffer.py b/pandas/core/exchange/buffer.py
@@ -1,4 +1,7 @@
-from typing import Tuple
+from typing import (
+    Optional,
+    Tuple,
+)
 
 import numpy as np
 
@@ -52,7 +55,7 @@ def __dlpack__(self):
         """
         raise NotImplementedError("__dlpack__")
 
-    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, int]:
+    def __dlpack_device__(self) -> Tuple[DlpackDeviceType, Optional[int]]:
         """
         Device type and device ID for where the data in the buffer resides.
         """
diff --git a/pandas/core/exchange/column.py b/pandas/core/exchange/column.py
@@ -1,6 +1,8 @@
 from functools import cached_property
 from typing import (
     Any,
+    List,
+    Optional,
     Tuple,
 )
 
@@ -14,6 +16,7 @@
 from pandas.core.exchange.buffer import PandasBuffer
 from pandas.core.exchange.dataframe_protocol import (
     Column,
+    ColumnBuffers,
     ColumnNullType,
     DtypeKind,
 )
@@ -223,7 +226,7 @@ def get_buffers(self):
                          if the data buffer does not have an associated offsets
                          buffer.
         """
-        buffers = {}
+        buffers: ColumnBuffers = {}
         buffers["data"] = self._get_data_buffer()
         try:
             buffers["validity"] = self._get_validity_buffer()
@@ -328,7 +331,7 @@ def _get_offsets_buffer(self) -> Tuple[PandasBuffer, Any]:
             # For each string, we need to manually determine the next offset
             values = self._col.to_numpy()
             ptr = 0
-            offsets = [ptr] + [None] * len(values)
+            offsets: List[Optional[int]] = [ptr] + [None] * len(values)
             for i, v in enumerate(values):
                 # For missing values (in this case, `np.nan` values)
                 # we don't increment the pointer
diff --git a/pandas/core/exchange/dataframe_protocol.py b/pandas/core/exchange/dataframe_protocol.py
@@ -216,7 +216,7 @@ class Column(ABC):
 
     @property
     @abstractmethod
-    def size(self) -> Optional[int]:
+    def size(self) -> int:
         """
         Size of the column, in elements.
 
diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py
@@ -2,6 +2,8 @@
 import re
 from typing import (
     Any,
+    Dict,
+    List,
     Optional,
     Tuple,
     Union,
@@ -22,7 +24,7 @@
     Endianness,
 )
 
-_NP_DTYPES = {
+_NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = {
     DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64},
     DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64},
     DtypeKind.FLOAT: {32: np.float32, 64: np.float64},
@@ -90,7 +92,7 @@ def protocol_df_chunk_to_pandas(df: DataFrameXchg) -> pd.DataFrame:
     """
     # We need a dict of columns here, with each column being a NumPy array (at
     # least for now, deal with non-NumPy dtypes later).
-    columns = {}
+    columns: Dict[str, Any] = {}
     buffers = []  # hold on to buffers, keeps memory alive
     for name in df.column_names():
         if not isinstance(name, str):
@@ -210,6 +212,7 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
 
     buffers = col.get_buffers()
 
+    assert buffers["offsets"], "String buffers must contain offsets"
     # Retrieve the data buffer containing the UTF-8 code units
     data_buff, protocol_data_dtype = buffers["data"]
     # We're going to reinterpret the buffer as uint8, so make sure we can do it safely
@@ -238,13 +241,14 @@ def string_column_to_ndarray(col: Column) -> Tuple[np.ndarray, Any]:
 
     null_pos = None
     if null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK):
+        assert buffers["validity"], "Validity buffers cannot be empty for masks"
         valid_buff, valid_dtype = buffers["validity"]
         null_pos = buffer_to_ndarray(valid_buff, valid_dtype, col.offset, col.size)
         if sentinel_val == 0:
             null_pos = ~null_pos
 
     # Assemble the strings from the code units
-    str_list = [None] * col.size
+    str_list: List[Union[None, float, str]] = [None] * col.size
     for i in range(col.size):
         # Check for missing values
         if null_pos is not None and null_pos[i]:
@@ -448,7 +452,7 @@ def bitmask_to_bool_ndarray(
 def set_nulls(
     data: Union[np.ndarray, pd.Series],
     col: Column,
-    validity: Tuple[Buffer, Tuple[DtypeKind, int, str, str]],
+    validity: Optional[Tuple[Buffer, Tuple[DtypeKind, int, str, str]]],
     allow_modify_inplace: bool = True,
 ):
     """
diff --git a/pandas/tests/exchange/test_impl.py b/pandas/tests/exchange/test_impl.py
@@ -72,7 +72,6 @@ def test_dataframe(data):
 
     df2 = df.__dataframe__()
 
-    assert df2._allow_copy is True
     assert df2.num_columns() == NCOLS
     assert df2.num_rows() == NROWS
 
@@ -153,7 +152,7 @@ def test_select_columns_error():
 
     df2 = df.__dataframe__()
 
-    with pytest.raises(ValueError, match="is not a sequence"):
+    with pytest.raises(ValueError, match="is not a sequence"):  # type: ignore[arg-type]
         df2.select_columns(np.array([0, 2]))
 
 
@@ -162,7 +161,7 @@ def test_select_columns_by_name_error():
 
     df2 = df.__dataframe__()
 
-    with pytest.raises(ValueError, match="is not a sequence"):
+    with pytest.raises(ValueError, match="is not a sequence"):  # type: ignore[arg-type]
         df2.select_columns_by_name(np.array(["col33", "col35"]))