diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 185ce7cd4f..eef0efcf83 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -429,7 +429,10 @@ def _create_unpivot_labels_array( for row_offset in range(len(former_column_labels)): row_label = former_column_labels[row_offset] row_label = (row_label,) if not isinstance(row_label, tuple) else row_label - row = {col_ids[i]: row_label[i] for i in range(len(col_ids))} + row = { + col_ids[i]: (row_label[i] if pandas.notnull(row_label[i]) else None) + for i in range(len(col_ids)) + } rows.append(row) return ArrayValue.from_pyarrow(pa.Table.from_pylist(rows), session=self.session) diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index bb0af52976..613ad945c1 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -1191,3 +1191,22 @@ def test_explode_w_multi_index(): check_dtype=False, check_index_type=False, ) + + +def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index): + columns = ["int64_too", "int64_col", "rowindex_2"] + level1 = pandas.Index(["b", "c", "d"]) + # Need resulting column to be pyarrow string rather than object dtype + level2 = pandas.Index([None, "b", "b"], dtype="string[pyarrow]") + multi_columns = pandas.MultiIndex.from_arrays([level1, level2]) + bf_df = scalars_df_index[columns].copy() + bf_df.columns = multi_columns + pd_df = scalars_pandas_df_index[columns].copy() + pd_df.columns = multi_columns + + pd_result = pd_df.stack() + bf_result = bf_df.stack().to_pandas() + + # Pandas produces pd.NA, where bq dataframes produces NaN + pd_result["c"] = pd_result["c"].replace(pandas.NA, np.nan) + pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)