Skip to content

Commit eeed191

Browse files
committed
[SPARK-46808][PYTHON] Refine error classes in Python with automatic sorting function
### What changes were proposed in this pull request? This PR proposes: - Add the automated way of writing `error_classes.py` file, `from pyspark.errors.exceptions import _write_self; _write_self()` - Fix the formatting of the JSON file to be consistent - Fix typos within the error messages - Fix parameter names to be consistent (it fixes some, not all) ### Why are the changes needed? - Now, it is difficult to add a new error class because it enforces alphabetical order or error classes, etc. When you add multiple error classes, you should manually fix and move them around which is troublesome. - In addition, the current JSON format isn't very consistent. - For consistency. This PR includes the changes of *some* of parameter naming. ### Does this PR introduce _any_ user-facing change? Yes, it fixes a couple of typos. ### How was this patch tested? Unittests were fixed together. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44848 from HyukjinKwon/SPARK-46808. Authored-by: Hyukjin Kwon <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
1 parent e7fb0ad commit eeed191

File tree

11 files changed

+429
-382
lines changed

11 files changed

+429
-382
lines changed

python/pyspark/errors/error_classes.py

Lines changed: 376 additions & 371 deletions
Large diffs are not rendered by default.

python/pyspark/errors/exceptions/__init__.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,43 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
#
17+
18+
19+
def _write_self() -> None:
20+
import json
21+
from pyspark.errors import error_classes
22+
23+
with open("python/pyspark/errors/error_classes.py", "w") as f:
24+
error_class_py_file = """#
25+
# Licensed to the Apache Software Foundation (ASF) under one or more
26+
# contributor license agreements. See the NOTICE file distributed with
27+
# this work for additional information regarding copyright ownership.
28+
# The ASF licenses this file to You under the Apache License, Version 2.0
29+
# (the "License"); you may not use this file except in compliance with
30+
# the License. You may obtain a copy of the License at
31+
#
32+
# http://www.apache.org/licenses/LICENSE-2.0
33+
#
34+
# Unless required by applicable law or agreed to in writing, software
35+
# distributed under the License is distributed on an "AS IS" BASIS,
36+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37+
# See the License for the specific language governing permissions and
38+
# limitations under the License.
39+
#
40+
41+
# NOTE: Automatically sort this file via
42+
# - cd $SPARK_HOME
43+
# - bin/pyspark
44+
# - from pyspark.errors.exceptions import _write_self; _write_self()
45+
import json
46+
47+
48+
ERROR_CLASSES_JSON = '''
49+
%s
50+
'''
51+
52+
ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
53+
""" % json.dumps(
54+
error_classes.ERROR_CLASSES_MAP, sort_keys=True, indent=2
55+
)
56+
f.write(error_class_py_file)

python/pyspark/sql/connect/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1663,7 +1663,7 @@ def sampleBy(
16631663
"arg_name": "fractions",
16641664
"arg_type": type(fractions).__name__,
16651665
"allowed_types": "float, int, str",
1666-
"return_type": type(k).__name__,
1666+
"item_type": type(k).__name__,
16671667
},
16681668
)
16691669
fractions[k] = float(v)

python/pyspark/sql/connect/session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def createDataFrame(
353353
if isinstance(data, DataFrame):
354354
raise PySparkTypeError(
355355
error_class="INVALID_TYPE",
356-
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
356+
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
357357
)
358358

359359
_schema: Optional[Union[AtomicType, StructType]] = None

python/pyspark/sql/dataframe.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,7 @@ def hint(
13191319
error_class="DISALLOWED_TYPE_FOR_CONTAINER",
13201320
message_parameters={
13211321
"arg_name": "parameters",
1322+
"arg_type": type(parameters).__name__,
13221323
"allowed_types": allowed_types_repr,
13231324
"item_type": type(p).__name__,
13241325
},
@@ -1329,6 +1330,7 @@ def hint(
13291330
error_class="DISALLOWED_TYPE_FOR_CONTAINER",
13301331
message_parameters={
13311332
"arg_name": "parameters",
1333+
"arg_type": type(parameters).__name__,
13321334
"allowed_types": allowed_types_repr,
13331335
"item_type": type(p).__name__ + "[" + type(p[0]).__name__ + "]",
13341336
},
@@ -2385,7 +2387,7 @@ def sampleBy(
23852387
"arg_name": "fractions",
23862388
"arg_type": type(fractions).__name__,
23872389
"allowed_types": "float, int, str",
2388-
"return_type": type(k).__name__,
2390+
"item_type": type(k).__name__,
23892391
},
23902392
)
23912393
fractions[k] = float(v)
@@ -5839,7 +5841,7 @@ def approxQuantile(
58395841
"arg_name": "col",
58405842
"arg_type": type(col).__name__,
58415843
"allowed_types": "str",
5842-
"return_type": type(c).__name__,
5844+
"item_type": type(c).__name__,
58435845
},
58445846
)
58455847
col = _to_list(self._sc, cast(List["ColumnOrName"], col))

python/pyspark/sql/session.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,7 @@ def createDataFrame( # type: ignore[misc]
14211421
if isinstance(data, DataFrame):
14221422
raise PySparkTypeError(
14231423
error_class="INVALID_TYPE",
1424-
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
1424+
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
14251425
)
14261426

14271427
if isinstance(schema, str):

python/pyspark/sql/tests/connect/test_connect_basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2282,7 +2282,7 @@ def test_stat_sample_by(self):
22822282
"arg_name": "fractions",
22832283
"arg_type": "dict",
22842284
"allowed_types": "float, int, str",
2285-
"return_type": "NoneType",
2285+
"item_type": "NoneType",
22862286
},
22872287
)
22882288

python/pyspark/sql/tests/test_dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,7 @@ def test_observe(self):
11111111
# observation requires name (if given) to be non empty string
11121112
with self.assertRaisesRegex(TypeError, "`name` should be a str, got int"):
11131113
Observation(123)
1114-
with self.assertRaisesRegex(ValueError, "`name` must be a non empty string, got ''."):
1114+
with self.assertRaisesRegex(ValueError, "`name` must be a non-empty string, got ''."):
11151115
Observation("")
11161116

11171117
# dataframe.observe requires at least one expr
@@ -2044,7 +2044,7 @@ def test_invalid_argument_create_dataframe(self):
20442044
self.check_error(
20452045
exception=pe.exception,
20462046
error_class="INVALID_TYPE",
2047-
message_parameters={"arg_name": "data", "data_type": "DataFrame"},
2047+
message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
20482048
)
20492049

20502050

python/pyspark/sql/tests/test_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def test_sampleby(self):
190190
"arg_name": "fractions",
191191
"arg_type": "dict",
192192
"allowed_types": "float, int, str",
193-
"return_type": "NoneType",
193+
"item_type": "NoneType",
194194
},
195195
)
196196

python/pyspark/sql/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2236,7 +2236,7 @@ def verify_udf(obj: Any) -> None:
22362236
error_class="NOT_INSTANCE_OF",
22372237
message_parameters={
22382238
"value": str(obj),
2239-
"data_type": str(dataType),
2239+
"type": str(dataType),
22402240
},
22412241
)
22422242
verifier(dataType.toInternal(obj))

0 commit comments

Comments
 (0)