diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 262dca6d6b..0856baabea 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -106,7 +106,9 @@ def st_area( def st_difference( series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries], other: Union[ - bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry + bigframes.series.Series, + bigframes.geopandas.GeoSeries, + shapely.geometry.base.BaseGeometry, ], ) -> bigframes.series.Series: """ @@ -207,7 +209,9 @@ def st_difference( def st_distance( series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries], other: Union[ - bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry + bigframes.series.Series, + bigframes.geopandas.GeoSeries, + shapely.geometry.base.BaseGeometry, ], *, use_spheroid: bool = False, @@ -282,7 +286,9 @@ def st_distance( def st_intersection( series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries], other: Union[ - bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry + bigframes.series.Series, + bigframes.geopandas.GeoSeries, + shapely.geometry.base.BaseGeometry, ], ) -> bigframes.series.Series: """ diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index de6c331043..eff9d02c5c 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -607,7 +607,7 @@ def _infer_dtype_from_python_type(type_: type) -> Dtype: return DATE_DTYPE if issubclass(type_, datetime.time): return TIME_DTYPE - if issubclass(type_, shapely.Geometry): + if issubclass(type_, shapely.geometry.base.BaseGeometry): return GEO_DTYPE else: raise TypeError( diff --git a/setup.py b/setup.py index 1fe7006860..edc77e11b6 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "pyarrow >=15.0.2", "pydata-google-auth >=1.8.2", "requests >=2.27.1", - "shapely >=2.0.0", + "shapely >=1.8.5", "sqlglot >=23.6.3", "tabulate >=0.9", "ipywidgets >=7.7.1", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index b0537cd035..dff245d176 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -19,7 +19,7 @@ pyarrow==15.0.2 pydata-google-auth==1.8.2 requests==2.27.1 scikit-learn==1.2.2 -shapely==2.0.0 +shapely==1.8.5 sqlglot==23.6.3 tabulate==0.9 ipywidgets==7.7.1 diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index fa2c522109..be517fb5cc 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -15,6 +15,7 @@ import geopandas # type: ignore import pandas as pd import pandas.testing +import pytest from shapely.geometry import ( # type: ignore GeometryCollection, LineString, @@ -94,6 +95,12 @@ def test_geo_st_difference_with_geometry_objects(): def test_geo_st_difference_with_single_geometry_object(): + pytest.importorskip( + "shapely", + minversion="2.0.0", + reason="shapely objects must be hashable to include in our expression trees", + ) + data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 10), (0, 0)]), Polygon([(0, 1), (10, 1), (10, 9), (0, 9), (0, 1)]), @@ -205,6 +212,12 @@ def test_geo_st_distance_with_geometry_objects(): def test_geo_st_distance_with_single_geometry_object(): + pytest.importorskip( + "shapely", + minversion="2.0.0", + reason="shapely objects must be hashable to include in our expression trees", + ) + data1 = [ # 0.00001 is approximately 1 meter. Polygon([(0, 0), (0.00001, 0), (0.00001, 0.00001), (0, 0.00001), (0, 0)]), @@ -279,6 +292,12 @@ def test_geo_st_intersection_with_geometry_objects(): def test_geo_st_intersection_with_single_geometry_object(): + pytest.importorskip( + "shapely", + minversion="2.0.0", + reason="shapely objects must be hashable to include in our expression trees", + ) + data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 10), (0, 0)]), Polygon([(0, 1), (10, 1), (10, 9), (0, 9), (0, 1)]), diff --git a/tests/unit/core/test_dtypes.py b/tests/unit/core/test_dtypes.py index bbeac3602b..37658bc436 100644 --- a/tests/unit/core/test_dtypes.py +++ b/tests/unit/core/test_dtypes.py @@ -20,7 +20,7 @@ import pandas as pd import pyarrow as pa # type: ignore import pytest -import shapely # type: ignore +import shapely.geometry # type: ignore import bigframes.core.compile.ibis_types import bigframes.dtypes @@ -231,9 +231,9 @@ def test_bigframes_string_dtype_converts(ibis_dtype, bigframes_dtype_str): (bool, bigframes.dtypes.BOOL_DTYPE), (int, bigframes.dtypes.INT_DTYPE), (str, bigframes.dtypes.STRING_DTYPE), - (shapely.Point, bigframes.dtypes.GEO_DTYPE), - (shapely.Polygon, bigframes.dtypes.GEO_DTYPE), - (shapely.Geometry, bigframes.dtypes.GEO_DTYPE), + (shapely.geometry.Point, bigframes.dtypes.GEO_DTYPE), + (shapely.geometry.Polygon, bigframes.dtypes.GEO_DTYPE), + (shapely.geometry.base.BaseGeometry, bigframes.dtypes.GEO_DTYPE), ], ) def test_bigframes_type_supports_python_types(python_type, expected_dtype): diff --git a/tests/unit/core/test_sql.py b/tests/unit/core/test_sql.py index 1b5c63e061..17da3008fc 100644 --- a/tests/unit/core/test_sql.py +++ b/tests/unit/core/test_sql.py @@ -74,43 +74,45 @@ def test_simple_literal(value, expected_pattern): @pytest.mark.parametrize( - ("value", "expected"), + ("value", "expected_pattern"), ( # Try to have some list of literals for each scalar data type: # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types - ([None, None], "[NULL, NULL]"), - ([True, False], "[True, False]"), + ([None, None], re.escape("[NULL, NULL]")), + ([True, False], re.escape("[True, False]")), ( [b"\x01\x02\x03ABC", b"\x01\x02\x03ABC"], - "[b'\\x01\\x02\\x03ABC', b'\\x01\\x02\\x03ABC']", + re.escape("[b'\\x01\\x02\\x03ABC', b'\\x01\\x02\\x03ABC']"), ), ( [datetime.date(2025, 1, 1), datetime.date(2025, 1, 1)], - "[DATE('2025-01-01'), DATE('2025-01-01')]", + re.escape("[DATE('2025-01-01'), DATE('2025-01-01')]"), ), ( [datetime.datetime(2025, 1, 2, 3, 45, 6, 789123)], - "[DATETIME('2025-01-02T03:45:06.789123')]", + re.escape("[DATETIME('2025-01-02T03:45:06.789123')]"), ), ( - [shapely.Point(0, 1), shapely.Point(0, 2)], - "[ST_GEOGFROMTEXT('POINT (0 1)'), ST_GEOGFROMTEXT('POINT (0 2)')]", + [shapely.geometry.Point(0, 1), shapely.geometry.Point(0, 2)], + r"\[ST_GEOGFROMTEXT\('POINT \(0[.]?0* 1[.]?0*\)'\), ST_GEOGFROMTEXT\('POINT \(0[.]?0* 2[.]?0*\)'\)\]", ), # TODO: INTERVAL type (e.g. from dateutil.relativedelta) # TODO: JSON type (TBD what Python object that would correspond to) - ([123, 456], "[123, 456]"), + ([123, 456], re.escape("[123, 456]")), ( [decimal.Decimal("123.75"), decimal.Decimal("456.78")], - "[CAST('123.75' AS NUMERIC), CAST('456.78' AS NUMERIC)]", + re.escape("[CAST('123.75' AS NUMERIC), CAST('456.78' AS NUMERIC)]"), ), # TODO: support BIGNUMERIC by looking at precision/scale of the DECIMAL - ([123.75, 456.78], "[123.75, 456.78]"), + ([123.75, 456.78], re.escape("[123.75, 456.78]")), # TODO: support RANGE type - (["abc", "def"], "['abc', 'def']"), + (["abc", "def"], re.escape("['abc', 'def']")), # TODO: support STRUCT type (possibly another method?) ( [datetime.time(12, 34, 56, 789123), datetime.time(11, 25, 56, 789123)], - "[TIME(DATETIME('1970-01-01 12:34:56.789123')), TIME(DATETIME('1970-01-01 11:25:56.789123'))]", + re.escape( + "[TIME(DATETIME('1970-01-01 12:34:56.789123')), TIME(DATETIME('1970-01-01 11:25:56.789123'))]" + ), ), ( [ @@ -121,13 +123,15 @@ def test_simple_literal(value, expected_pattern): 2025, 2, 1, 4, 45, 6, 789123, tzinfo=datetime.timezone.utc ), ], - "[TIMESTAMP('2025-01-02T03:45:06.789123+00:00'), TIMESTAMP('2025-02-01T04:45:06.789123+00:00')]", + re.escape( + "[TIMESTAMP('2025-01-02T03:45:06.789123+00:00'), TIMESTAMP('2025-02-01T04:45:06.789123+00:00')]" + ), ), ), ) -def test_simple_literal_w_list(value: list, expected: str): +def test_simple_literal_w_list(value: list, expected_pattern: str): got = sql.simple_literal(value) - assert got == expected + assert re.match(expected_pattern, got) is not None def test_create_vector_search_sql_simple():