From 9043d6f0cec0795702579d78d0c91fa6aa934246 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 22 Aug 2023 10:25:45 +0100 Subject: [PATCH 1/2] add DataFrame.update_column and DataFrame.update_columns --- .../dataframe_api/dataframe_object.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index b0f70b43..16768883 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -201,6 +201,67 @@ def insert_column(self, loc: int, column: Column[Any]) -> DataFrame: """ ... + def update_column(self, column: Column[Any]) -> DataFrame: + """ + Update column in DataFrame. + + The column's name must already be present in the dataframe. + + Parameters + ---------- + column : Column + + Returns + ------- + DataFrame + """ + ... + + def update_columns(self, columns: Sequence[Column[Any]]) -> DataFrame: + """ + Update values in existing columns. + + Like :meth:`update_column`, but can update multiple (independent) columns. + Some implementations may be able to make use of parallelism in this + case. For example instead of: + + .. code-block:: python + + new_column = df.get_column_by_name('a') + 1 + df = df.update_column(new_column) + new_column = df.get_column_by_name('b') + 1 + df = df.update_column(new_column) + + it would be better to write + + .. code-block:: python + + new_column_0 = df.get_column_by_name('a') + 1 + new_column_1 = df.get_column_by_name('b') + 1 + df = df.update_columns( + [ + new_column_0, + new_column_1, + ] + ) + + so that updates can happen in parallel for some implementations. + + Parameters + ---------- + columns : Sequence[Column] + Sequence of columns. + Must be independent of each other. + Column names must already be present in dataframe - use + :meth:`Column.rename` to rename them + beforehand if necessary. + + Returns + ------- + DataFrame + """ + ... + def drop_column(self, label: str) -> DataFrame: """ Drop the specified column. From fc949563145df691be392aadf11f8bcdb46efec1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 28 Aug 2023 15:12:27 +0200 Subject: [PATCH 2/2] single update_columns, remove independence --- .../dataframe_api/dataframe_object.py | 56 ++++--------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 16768883..a917043c 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -201,61 +201,25 @@ def insert_column(self, loc: int, column: Column[Any]) -> DataFrame: """ ... - def update_column(self, column: Column[Any]) -> DataFrame: + def update_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: """ - Update column in DataFrame. + Update values in existing column(s) from Dataframe. - The column's name must already be present in the dataframe. - - Parameters - ---------- - column : Column - - Returns - ------- - DataFrame - """ - ... - - def update_columns(self, columns: Sequence[Column[Any]]) -> DataFrame: - """ - Update values in existing columns. + The column's name will be used to tell which column to update. + To update a column with a different name, combine with :meth:`Column.rename`, + e.g.: - Like :meth:`update_column`, but can update multiple (independent) columns. - Some implementations may be able to make use of parallelism in this - case. For example instead of: - .. code-block:: python new_column = df.get_column_by_name('a') + 1 - df = df.update_column(new_column) - new_column = df.get_column_by_name('b') + 1 - df = df.update_column(new_column) - - it would be better to write - - .. code-block:: python - - new_column_0 = df.get_column_by_name('a') + 1 - new_column_1 = df.get_column_by_name('b') + 1 - df = df.update_columns( - [ - new_column_0, - new_column_1, - ] - ) - - so that updates can happen in parallel for some implementations. + df = df.update_column(new_column.rename('b')) Parameters ---------- - columns : Sequence[Column] - Sequence of columns. - Must be independent of each other. - Column names must already be present in dataframe - use - :meth:`Column.rename` to rename them - beforehand if necessary. - + columns : Column | Sequence[Column] + Column(s) to update. If updating multiple columns, they must all have + different names. + Returns ------- DataFrame