diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index 78c2259cf0..af2b7908ad 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -47,6 +47,8 @@ ibis_dtypes.JSON, ] +IBIS_GEO_TYPE = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True) + BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, bigframes.dtypes.Dtype]] = ( (ibis_dtypes.boolean, pd.BooleanDtype()), @@ -70,7 +72,7 @@ pd.ArrowDtype(pa.decimal256(76, 38)), ), ( - ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True), + IBIS_GEO_TYPE, gpd.array.GeometryDtype(), ), (ibis_dtypes.json, db_dtypes.JSONDtype()), @@ -177,6 +179,14 @@ def cast_ibis_value( ibis_dtypes.timestamp, ), ibis_dtypes.binary: (ibis_dtypes.string,), + ibis_dtypes.point: (IBIS_GEO_TYPE,), + ibis_dtypes.geometry: (IBIS_GEO_TYPE,), + ibis_dtypes.geography: (IBIS_GEO_TYPE,), + ibis_dtypes.linestring: (IBIS_GEO_TYPE,), + ibis_dtypes.polygon: (IBIS_GEO_TYPE,), + ibis_dtypes.multilinestring: (IBIS_GEO_TYPE,), + ibis_dtypes.multipoint: (IBIS_GEO_TYPE,), + ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,), } value = ibis_value_to_canonical_type(value) @@ -282,6 +292,9 @@ def ibis_dtype_to_bigframes_dtype( if isinstance(ibis_dtype, ibis_dtypes.JSON): return bigframes.dtypes.JSON_DTYPE + if isinstance(ibis_dtype, ibis_dtypes.GeoSpatial): + return gpd.array.GeometryDtype() + if ibis_dtype in IBIS_TO_BIGFRAMES: return IBIS_TO_BIGFRAMES[ibis_dtype] elif isinstance(ibis_dtype, ibis_dtypes.Decimal): diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 4739cc9a99..ea642c20fd 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1003,6 +1003,13 @@ def geo_area_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).area() +@scalar_op_compiler.register_binary_op(ops.geo_st_geogpoint_op, pass_op=False) +def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return typing.cast(ibis_types.NumericValue, x).point( + typing.cast(ibis_types.NumericValue, y) + ) + + # Parameterized ops @scalar_op_compiler.register_unary_op(ops.StructFieldOp, pass_op=True) def struct_field_op_impl(x: ibis_types.Value, op: ops.StructFieldOp): diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index bc0482f60d..b757e2b971 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -66,3 +66,12 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore raise NotImplementedError( f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}" ) + + @classmethod + def from_xy(cls, x, y, index=None, session=None, **kwargs) -> GeoSeries: + # TODO: if either x or y is local and the other is remote. Use the + # session from the remote object. + series_x = bigframes.series.Series(x, index=index, session=session, **kwargs) + series_y = bigframes.series.Series(y, index=index, session=session, **kwargs) + + return cls(series_x._apply_binary_op(series_y, ops.geo_st_geogpoint_op)) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index ba8f3f64d7..d35fa2c5c2 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -85,7 +85,12 @@ SqlScalarOp, where_op, ) -from bigframes.operations.geo_ops import geo_area_op, geo_x_op, geo_y_op +from bigframes.operations.geo_ops import ( + geo_area_op, + geo_st_geogpoint_op, + geo_x_op, + geo_y_op, +) from bigframes.operations.json_ops import ( JSONExtract, JSONExtractArray, @@ -337,6 +342,7 @@ "geo_x_op", "geo_y_op", "geo_area_op", + "geo_st_geogpoint_op", # Numpy ops mapping "NUMPY_TO_BINOP", "NUMPY_TO_OP", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index bc14fa611b..0ae8accd56 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -36,3 +36,7 @@ dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" ), ) + +geo_st_geogpoint_op = base_ops.create_binary_op( + name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo() +) diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index 441134aff5..86bb56fc39 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -121,6 +121,20 @@ def output_type( return bigframes.dtypes.coerce_to_common(left_type, right_type) +@dataclasses.dataclass +class BinaryNumericGeo(BinaryTypeSignature): + """Type signature for geo functions like from_xy that can map ints to ints.""" + + def output_type( + self, left_type: ExpressionType, right_type: ExpressionType + ) -> ExpressionType: + if (left_type is not None) and not bigframes.dtypes.is_numeric(left_type): + raise TypeError(f"Type {left_type} is not numeric") + if (right_type is not None) and not bigframes.dtypes.is_numeric(right_type): + raise TypeError(f"Type {right_type} is not numeric") + return bigframes.dtypes.GEO_DTYPE + + @dataclasses.dataclass class BinaryRealNumeric(BinaryTypeSignature): """Type signature for real-valued functions like divide, arctan2, pow.""" diff --git a/notebooks/geo/geoseries.ipynb b/notebooks/geo/geoseries.ipynb index 7dc4c596ca..4792c4fe27 100644 --- a/notebooks/geo/geoseries.ipynb +++ b/notebooks/geo/geoseries.ipynb @@ -44,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Load the Counties table from the Census Bureau US Boundaries dataset" + "### 1. Load the Counties table from the Census Bureau US Boundaries dataset" ] }, { @@ -56,7 +56,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n", + "/usr/local/google/home/arwas/src/bigframes3/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n", " warnings.warn(msg, category=bfe.DefaultIndexWarning)\n" ] } @@ -69,7 +69,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create a series from the int_point_geom column" + "### 2. Create a series from the int_point_geom column" ] }, { @@ -103,11 +103,11 @@ { "data": { "text/plain": [ - "37 POINT (-91.19496 39.98605)\n", - "406 POINT (-84.86717 33.92103)\n", - "926 POINT (-82.47974 35.33641)\n", - "940 POINT (-75.50298 39.09709)\n", - "996 POINT (-92.56434 39.8298)\n", + "171 POINT (-95.50742 42.39186)\n", + "219 POINT (-105.42894 37.27755)\n", + "402 POINT (-93.34905 32.10121)\n", + "526 POINT (-84.60469 43.29233)\n", + "677 POINT (-89.5681 37.04779)\n", "Name: int_point_geom, dtype: geometry" ] }, @@ -136,11 +136,11 @@ { "data": { "text/plain": [ - "0 POINT (-91.19496 39.98605)\n", - "1 POINT (-84.86717 33.92103)\n", - "2 POINT (-82.47974 35.33641)\n", - "3 POINT (-75.50298 39.09709)\n", - "4 POINT (-92.56434 39.8298)\n", + "0 POINT (-95.50742 42.39186)\n", + "1 POINT (-105.42894 37.27755)\n", + "2 POINT (-93.34905 32.10121)\n", + "3 POINT (-84.60469 43.29233)\n", + "4 POINT (-89.5681 37.04779)\n", "dtype: geometry" ] }, @@ -185,11 +185,11 @@ { "data": { "text/plain": [ - "0 -91.194961\n", - "1 -84.867169\n", - "2 -82.479741\n", - "3 -75.502982\n", - "4 -92.56434\n", + "0 -95.507421\n", + "1 -105.42894\n", + "2 -93.34905\n", + "3 -84.60469\n", + "4 -89.568097\n", "dtype: Float64" ] }, @@ -217,11 +217,11 @@ { "data": { "text/plain": [ - "0 39.986053\n", - "1 33.92103\n", - "2 35.336415\n", - "3 39.097088\n", - "4 39.829795\n", + "0 42.39186\n", + "1 37.277547\n", + "2 32.101213\n", + "3 43.292326\n", + "4 37.047793\n", "dtype: Float64" ] }, @@ -367,11 +367,11 @@ { "data": { "text/plain": [ - "10 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n", - "127 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n", - "253 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n", - "261 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n", - "303 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n", + "54 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n", + "256 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n", + "266 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n", + "485 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n", + "765 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n", "Name: county_geom, dtype: geometry" ] }, @@ -389,7 +389,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Convert the geometry collection to `bigframes.gopandas.GeoSeries`" + "### 2. Convert the geometry collection to `bigframes.gopandas.GeoSeries`" ] }, { @@ -400,11 +400,11 @@ { "data": { "text/plain": [ - "0 POLYGON ((-101.7778 40.34969, -101.77812 40.34...\n", - "1 POLYGON ((-89.22333 44.50398, -89.22334 44.499...\n", - "2 POLYGON ((-76.69446 37.07288, -76.69515 37.072...\n", - "3 POLYGON ((-98.70136 44.45055, -98.70136 44.450...\n", - "4 POLYGON ((-85.99565 30.28131, -85.99566 30.280...\n", + "0 POLYGON ((-93.76575 45.06448, -93.76575 45.064...\n", + "1 POLYGON ((-89.83723 42.68318, -89.83732 42.682...\n", + "2 POLYGON ((-104.19381 39.56523, -104.19464 39.5...\n", + "3 MULTIPOLYGON (((-91.05884 32.17233, -91.05891 ...\n", + "4 POLYGON ((-83.61848 38.1557, -83.61861 38.1554...\n", "dtype: geometry" ] }, @@ -442,14 +442,14 @@ "outputs": [ { "ename": "NotImplementedError", - "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0", + "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n", - "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.34.0" + "File \u001b[0;32m~/src/bigframes3/bigframes/geopandas/geoseries.py:66\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 68\u001b[0m )\n", + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.35.0" ] } ], @@ -461,7 +461,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area" + "### 3. Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area" ] }, { @@ -481,11 +481,11 @@ { "data": { "text/plain": [ - "0 2382382043.48891\n", - "1 1977633097.26862\n", - "2 939388839.499466\n", - "3 3269015229.381782\n", - "4 2678752241.321673\n", + "0 1567505274.453911\n", + "1 1511436852.079554\n", + "2 4789800692.948824\n", + "3 1686877416.586061\n", + "4 740944862.916908\n", "dtype: Float64" ] }, @@ -498,6 +498,45 @@ "geom_area = bbq.st_area(five_geom)\n", "geom_area" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use `bigframes.geopandas.GeoSeries.from_xy()` to create a GeoSeries of `Point` geometries. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Reuse the `geo_points.x` and `geo_points.y` results by passing them to `.from_xy()` " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POINT (-95.50742 42.39186)\n", + "1 POINT (-105.42894 37.27755)\n", + "2 POINT (-93.34905 32.10121)\n", + "3 POINT (-84.60469 43.29233)\n", + "4 POINT (-89.5681 37.04779)\n", + "dtype: geometry" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bigframes.geopandas.GeoSeries.from_xy(geo_points.x, geo_points.y)" + ] } ], "metadata": { diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 2967e4d247..5951d0b12c 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -87,3 +87,23 @@ def test_geo_area_not_supported(): ), ): bf_series.area + + +def test_geo_from_xy(): + x = [2.5, 5, -3.0] + y = [0.5, 1, 1.5] + bf_result = ( + bigframes.geopandas.GeoSeries.from_xy(x, y) + .astype(geopandas.array.GeometryDtype()) + .to_pandas() + ) + pd_result = geopandas.GeoSeries.from_xy(x, y, crs="EPSG:4326").astype( + geopandas.array.GeometryDtype() + ) + + pd.testing.assert_series_equal( + bf_result, + pd_result, + check_series_type=False, + check_index=False, + ) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index d84dec94a8..b8a7af437b 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -90,3 +90,45 @@ def y(self) -> bigframes.series.Series: Return the y location (latitude) of point geometries. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @classmethod + def from_xy(cls, x, y, index=None, **kwargs) -> bigframes.geopandas.GeoSeries: + """ + Alternate constructor to create a GeoSeries of Point geometries from + lists or arrays of x, y coordinates. + + In case of geographic coordinates, it is assumed that longitude is + captured by x coordinates and latitude by y. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.geopandas + >>> bpd.options.display.progress_bar = None + + >>> x = [2.5, 5, -3.0] + >>> y = [0.5, 1, 1.5] + + >>> s = bigframes.geopandas.GeoSeries.from_xy(x, y) + >>> s + 0 POINT (2.5 0.5) + 1 POINT (5 1) + 2 POINT (-3 1.5) + dtype: geometry + + Args: + x, y (array-like): + longitude is x coordinates and latitude y coordinates. + + index (array-like or Index, optional): + The index for the GeoSeries. If not given and all coordinate + inputs are Series with an equal index, that index is used.. + + **kwargs: + Additional arguments passed to the Series constructor, e.g. `name`. + + Returns: + bigframes.geopandas.GeoSeries: + A GeoSeries of Point geometries. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)