diff --git a/docs/api_reference/python-api-reference.md b/docs/api_reference/python-api-reference.md index 56b4dd6395..8e059a35d4 100644 --- a/docs/api_reference/python-api-reference.md +++ b/docs/api_reference/python-api-reference.md @@ -31,30 +31,23 @@ SPDX-License-Identifier: MPL-2.0 .. autofunction:: power_grid_model.validation.errors_to_string ``` -## data types +### errors ```{eval-rst} -.. autoclass:: power_grid_model.data_types.SparseBatchArray -.. autoclass:: power_grid_model.data_types.BatchArray -.. autoclass:: power_grid_model.data_types.SingleDataset -.. autoclass:: power_grid_model.data_types.BatchDataset -.. autoclass:: power_grid_model.data_types.Dataset -.. autoclass:: power_grid_model.data_types.BatchList -.. autoclass:: power_grid_model.data_types.NominalValue -.. autoclass:: power_grid_model.data_types.RealValue -.. autoclass:: power_grid_model.data_types.AsymValue -.. autoclass:: power_grid_model.data_types.AttributeValue -.. autoclass:: power_grid_model.data_types.Component -.. autoclass:: power_grid_model.data_types.ComponentList -.. autoclass:: power_grid_model.data_types.SinglePythonDataset -.. autoclass:: power_grid_model.data_types.BatchPythonDataset -.. autoclass:: power_grid_model.data_types.PythonDataset +.. autoclass:: power_grid_model.validation.errors.ValidationError ``` -### errors +## data types ```{eval-rst} -.. autoclass:: power_grid_model.validation.errors.ValidationError +.. autoclass:: power_grid_model.data_types.Dataset +.. autoclass:: power_grid_model.data_types.SingleDataset +.. autoclass:: power_grid_model.data_types.BatchDataset +.. autoclass:: power_grid_model.data_types.DataArray +.. autoclass:: power_grid_model.data_types.SingleArray +.. autoclass:: power_grid_model.data_types.BatchArray +.. autoclass:: power_grid_model.data_types.DenseBatchArray +.. autoclass:: power_grid_model.data_types.SparseBatchArray ``` ## utils diff --git a/docs/user_manual/dataset-terminology.md b/docs/user_manual/dataset-terminology.md index 3df005dbcf..4649aaf657 100644 --- a/docs/user_manual/dataset-terminology.md +++ b/docs/user_manual/dataset-terminology.md @@ -12,9 +12,12 @@ Some terms regarding the data structures are explained here, including the defin - **Dataset:** Either a single or a batch dataset. - **SingleDataset:** A data type storing input data (i.e. all elements of all components) for a single scenario. - - **BatchDataset:** A data type storing update and or output data for one or more scenarios. A batch dataset can be either a sparse or a dense one. - - **SparseBatchDataset:** Dictionaries with a one-dimensional numpy int32 array and a one-dimensional structured numpy arrays. - - **DenseBatchDataset:** A dictionary where the keys are the component types and the values are two-dimensional structured numpy arrays. + - **BatchDataset:** A data type storing update and or output data for one or more scenarios. A batch dataset can contain sparse or dense data, depending on the component. +- **DataArray** A data array can be a single or a batch array. It is a numpy structured array. + - **SingleArray** A dictionary where the keys are the component types and the values are one-dimensional structured numpy arrays. + - **BatchArray:** An array of dictionaries where the keys are the component types and the values are two-dimensional structured numpy arrays. + - **DenseBatchArray:** A two-dimensional structured numpy array containing a list of components of the same type for each scenario. + - **SparseBatchArray:** A dictionary with a one-dimensional numpy int64 array and a one-dimensional structured numpy arrays. ### Type of Dataset diff --git a/src/power_grid_model/data_types.py b/src/power_grid_model/data_types.py index 1236e074fe..875cfb0e3f 100644 --- a/src/power_grid_model/data_types.py +++ b/src/power_grid_model/data_types.py @@ -13,52 +13,76 @@ # When we're dropping python 3.8, we should introduce proper NumPy type hinting -SparseBatchArray = Dict[str, np.ndarray] +SingleArray = Union[np.ndarray] """ -A sparse batch array is a dictionary containing the keys `indptr` and `data`. +A single array is a one-dimensional structured containing a list of components of the same type. + +- Examples: -- indptr: a one-dimensional numpy int64 array -- data: a one-dimensional structured numpy array. The exact dtype depends on the type of component. -- Example: {"indptr": <1d-array>, "data": <1d-array>} + - structure: <1d-array> + - concrete: array([(0, 10500.0), (0, 10500.0)], dtype=power_grid_meta_data["input"]["node"].dtype) """ -BatchArray = Union[np.ndarray, SparseBatchArray] +DenseBatchArray = Union[np.ndarray] +""" +A dense batch array is a two-dimensional structured numpy array containing a list of components of +the same type for each scenario. """ -A batch is a either a dense or a sparse batch array. + +SparseBatchArray = Dict[str, Union[np.ndarray, SingleArray]] +""" +A sparse batch array is a dictionary containing the keys `indptr` and `data`. + +- data: a :class:`SingleArray`. The exact dtype depends on the type of component. +- indptr: a one-dimensional numpy int64 array containing n+1 elements where n is the amount of scenarios. + + - The elements are the indices in the data that point to the first element of that scenario. + - The last element is one after the data index of the last element of the last scenario. + - Usually, the last element will therefore be the size of the data. - Examples: -dense: <2d-array> + - structure: {"indptr": <1d-array>, "data": :class:`SingleArray`} + - concrete example: {"indptr": [0, 2, 2, 3], "data": [(0, 1, 1), (1, 1, 1), (0, 0, 0)]} + + - the scenario 0 sets the statuses of components ids 0 and 1 to 1 (and keeps defaults for other components) + - scenario 1 keeps the default values for all components + - scenario 2 sets the statuses of component with id 0 to 0 (and keeps defaults for other components) +""" + +BatchArray = Union[DenseBatchArray, SparseBatchArray] +""" +A batch array is a either a :class:`DenseBatchArray` or a :class:`SparseBatchArray`. +""" -sparse: {"indptr": <1d-array>, "data": <1d-array>} +DataArray = Union[SingleArray, BatchArray] +""" +A data array can be a :class:`SingleArray` or a :class:`BatchArray`. """ -SingleDataset = Dict[str, np.ndarray] +SingleDataset = Dict[str, SingleArray] """ -A single dataset is a dictionary where the keys are the component types and the values are one-dimensional -structured numpy arrays. +A single dataset is a dictionary where the keys are the component types and the values are +:class:`SingleArray` -- Example: {"node": <1d-array>, "line": <1d-array>} +- Example: {"node": :class:`SingleArray`, "line": :class:`SingleArray`} """ BatchDataset = Dict[str, BatchArray] """ -A batch dataset is a dictionary where the keys are the component types and the values are either two-dimensional -structured numpy arrays (dense batch array) or dictionaries with an indptr and a one-dimensional structured numpy -array (sparse batch array). +A batch dataset is a dictionary where the keys are the component types and the values are :class:`BatchArray` -- Example: {"node": <2d-array>, "line": {"indptr": <1d-array>, "data": <1d-array>}} +- Example: {"node": :class:`DenseBatchArray`, "line": :class:`SparseBatchArray`} """ Dataset = Union[SingleDataset, BatchDataset] """ -A general data set can be a single or a batch dataset. +A general data set can be a :class:`SingleDataset` or a :class:`BatchDataset`. - Examples: -single: {"node": <1d-array>, "line": <1d-array>} - -batch: {"node": <2d-array>, "line": {"indptr": <1d-array>, "data": <1d-array>}} + - single: {"node": :class:`SingleArray`, "line": :class:`SingleArray`} + - batch: {"node": :class:`DenseBatchArray`, "line": :class:`SparseBatchArray`} """ BatchList = List[SingleDataset] @@ -66,7 +90,7 @@ A batch list is an alternative representation of a batch. It is a list of single datasets, where each single dataset is actually a batch. The batch list is intended as an intermediate data type, during conversions. -- Example: [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}] +- Example: [:class:`SingleDataset`, {"node": :class:`SingleDataset`}] """ NominalValue = int @@ -97,11 +121,9 @@ - Examples: -real: 10500.0 - -nominal: 123 - -asym: (10400.0, 10500.0, 10600.0) + - real: 10500.0 + - nominal: 123 + - asym: (10400.0, 10500.0, 10600.0) """ Component = Dict[str, Union[AttributeValue, str]] @@ -128,9 +150,10 @@ - Example: - {"node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], - - "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}],} + { + "node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], + "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}], + } """ BatchPythonDataset = List[SinglePythonDataset] @@ -141,9 +164,8 @@ - Example: - [{"line": [{"id": 3, "from_status": 0, "to_status": 0, ...}],}, - - {"line": [{"id": 3, "from_status": 1, "to_status": 1, ...}],}] + [{"line": [{"id": 3, "from_status": 0, "to_status": 0, ...}],}, + {"line": [{"id": 3, "from_status": 1, "to_status": 1, ...}],}] """ PythonDataset = Union[SinglePythonDataset, BatchPythonDataset] @@ -152,15 +174,15 @@ - Examples: - single: - - {"node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], - - "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}],} + - single: - batch: + { + "node": [{"id": 1, "u_rated": 10500.0}, {"id": 2, "u_rated": 10500.0}], + "line": [{"id": 3, "from_node": 1, "to_node": 2, ...}], + } - [{"line": [{"id": 3, "from_status": 0, "to_status": 0, ...}],}, + - batch: - {"line": [{"id": 3, "from_status": 1, "to_status": 1, ...}],}] + [{"line": [{"id": 3, "from_status": 0, "to_status": 0, ...}],}, + {"line": [{"id": 3, "from_status": 1, "to_status": 1, ...}],}] """