From 5caf63cc32a7546823e64d774faee9fb63a6b286 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Thu, 15 Mar 2018 16:07:27 -0700 Subject: [PATCH 1/8] raise same type of error when not falling back --- python/pyspark/sql/dataframe.py | 8 ++++---- python/pyspark/sql/session.py | 4 ++-- python/pyspark/sql/tests.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 3fc194d8ec1d..b32dd082dae2 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2013,13 +2013,13 @@ def toPandas(self): warnings.warn(msg) use_arrow = False else: - msg = ( + e.message = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise RuntimeError(msg) + raise # Try to use Arrow optimization when the schema is supported and the required version # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled. @@ -2040,14 +2040,14 @@ def toPandas(self): except Exception as e: # We might have to allow fallback here as well but multiple Spark jobs can # be executed. So, simply fail in this case for now. - msg = ( + e.message = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed unexpectedly:\n %s\n" "Note that 'spark.sql.execution.arrow.fallback.enabled' does " "not have an effect in such failure in the middle of " "computation." % _exception_message(e)) - raise RuntimeError(msg) + raise # Below is toPandas without Arrow optimization. pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index e82a9750a001..4fcdcefa1386 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -679,13 +679,13 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr "true." % _exception_message(e)) warnings.warn(msg) else: - msg = ( + e.message = ( "createDataFrame attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise RuntimeError(msg) + raise data = self._convert_from_pandas(data, schema, timezone) if isinstance(schema, StructType): diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 480815d27333..49a7625513ec 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -3661,7 +3661,7 @@ def test_createDataFrame_with_incorrect_schema(self): pdf = self.create_pandas_data_frame() wrong_schema = StructType(list(reversed(self.schema))) with QuietTest(self.sc): - with self.assertRaisesRegexp(RuntimeError, ".*No cast.*string.*timestamp.*"): + with self.assertRaisesRegexp(Exception, ".*No cast.*string.*timestamp.*"): self.spark.createDataFrame(pdf, schema=wrong_schema) def test_createDataFrame_with_names(self): @@ -3686,7 +3686,7 @@ def test_createDataFrame_column_name_encoding(self): def test_createDataFrame_with_single_data_type(self): import pandas as pd with QuietTest(self.sc): - with self.assertRaisesRegexp(RuntimeError, ".*IntegerType.*not supported.*"): + with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"): self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int") def test_createDataFrame_does_not_modify_input(self): @@ -3761,7 +3761,7 @@ def test_createDataFrame_fallback_disabled(self): import pandas as pd with QuietTest(self.sc): - with self.assertRaisesRegexp(Exception, 'Unsupported type'): + with self.assertRaisesRegexp(TypeError, 'Unsupported type'): self.spark.createDataFrame( pd.DataFrame([[{u'a': 1}]]), "a: map") From dc5ed47ec36433850d6c58890d19b4173eda9a06 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 16 Mar 2018 10:21:10 -0700 Subject: [PATCH 2/8] create new exception with modified msg --- python/pyspark/sql/dataframe.py | 10 ++++++---- python/pyspark/sql/session.py | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b32dd082dae2..0f825281accf 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2013,13 +2013,14 @@ def toPandas(self): warnings.warn(msg) use_arrow = False else: - e.message = ( + clazz = type(e) + msg = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise + raise clazz(msg) # Try to use Arrow optimization when the schema is supported and the required version # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled. @@ -2040,14 +2041,15 @@ def toPandas(self): except Exception as e: # We might have to allow fallback here as well but multiple Spark jobs can # be executed. So, simply fail in this case for now. - e.message = ( + clazz = type(e) + msg = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed unexpectedly:\n %s\n" "Note that 'spark.sql.execution.arrow.fallback.enabled' does " "not have an effect in such failure in the middle of " "computation." % _exception_message(e)) - raise + raise clazz(msg) # Below is toPandas without Arrow optimization. pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 4fcdcefa1386..4fad1f414f90 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -679,13 +679,14 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr "true." % _exception_message(e)) warnings.warn(msg) else: - e.message = ( + clazz = type(e) + msg = ( "createDataFrame attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise + raise clazz(msg) data = self._convert_from_pandas(data, schema, timezone) if isinstance(schema, StructType): From 39c34732a1fefb622cb7cbfca32282edd92aa44b Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Mon, 19 Mar 2018 20:33:15 -0700 Subject: [PATCH 3/8] try printing warning msg before raise --- python/pyspark/sql/dataframe.py | 9 +++++---- python/pyspark/sql/session.py | 4 ++-- python/pyspark/sql/utils.py | 6 ++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 0f825281accf..d705a07ddcc7 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2013,14 +2013,14 @@ def toPandas(self): warnings.warn(msg) use_arrow = False else: - clazz = type(e) msg = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise clazz(msg) + warnings.warn(msg) + raise # Try to use Arrow optimization when the schema is supported and the required version # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled. @@ -2041,7 +2041,6 @@ def toPandas(self): except Exception as e: # We might have to allow fallback here as well but multiple Spark jobs can # be executed. So, simply fail in this case for now. - clazz = type(e) msg = ( "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " @@ -2049,7 +2048,9 @@ def toPandas(self): "Note that 'spark.sql.execution.arrow.fallback.enabled' does " "not have an effect in such failure in the middle of " "computation." % _exception_message(e)) - raise clazz(msg) + # TODO: e.args = (msg,) + e.args[1:] + warnings.warn(msg) + raise # Below is toPandas without Arrow optimization. pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 4fad1f414f90..ca0b770b4d78 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -679,14 +679,14 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr "true." % _exception_message(e)) warnings.warn(msg) else: - clazz = type(e) msg = ( "createDataFrame attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) - raise clazz(msg) + warnings.warn(msg) + raise data = self._convert_from_pandas(data, schema, timezone) if isinstance(schema, StructType): diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index 578298632dd4..45363f089a73 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -121,7 +121,10 @@ def require_minimum_pandas_version(): from distutils.version import LooseVersion try: import pandas + have_pandas = True except ImportError: + have_pandas = False + if not have_pandas: raise ImportError("Pandas >= %s must be installed; however, " "it was not found." % minimum_pandas_version) if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version): @@ -138,7 +141,10 @@ def require_minimum_pyarrow_version(): from distutils.version import LooseVersion try: import pyarrow + have_arrow = True except ImportError: + have_arrow = False + if not have_arrow: raise ImportError("PyArrow >= %s must be installed; however, " "it was not found." % minimum_pyarrow_version) if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version): From 17dd6053e466f214c852807b0d11db0961062c0a Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 21 Mar 2018 13:54:57 -0700 Subject: [PATCH 4/8] format msg --- python/pyspark/sql/dataframe.py | 22 +++++++++------------- python/pyspark/sql/session.py | 24 +++++++++++------------- 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index d705a07ddcc7..40e1a0c2bf3e 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2000,25 +2000,22 @@ def toPandas(self): require_minimum_pyarrow_version() to_arrow_schema(self.schema) except Exception as e: + msg = ( + "toPandas attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" % _exception_message(e)) if self.sql_ctx.getConf("spark.sql.execution.arrow.fallback.enabled", "true") \ .lower() == "true": - msg = ( - "toPandas attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" - "Attempts non-optimization as " - "'spark.sql.execution.arrow.fallback.enabled' is set to " - "true." % _exception_message(e)) + msg += ( + "Attempting non-optimization as " + "'spark.sql.execution.arrow.fallback.enabled' is set to true.") warnings.warn(msg) use_arrow = False else: - msg = ( - "toPandas attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" + msg += ( "For fallback to non-optimization automatically, please set true to " - "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) + "'spark.sql.execution.arrow.fallback.enabled'.") warnings.warn(msg) raise @@ -2048,7 +2045,6 @@ def toPandas(self): "Note that 'spark.sql.execution.arrow.fallback.enabled' does " "not have an effect in such failure in the middle of " "computation." % _exception_message(e)) - # TODO: e.args = (msg,) + e.args[1:] warnings.warn(msg) raise diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index ca0b770b4d78..e43ffef4f989 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -668,23 +668,21 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr except Exception as e: from pyspark.util import _exception_message + msg = ( + "createDataFrame attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" % _exception_message(e)) + if self.conf.get("spark.sql.execution.arrow.fallback.enabled", "true") \ .lower() == "true": - msg = ( - "createDataFrame attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" - "Attempts non-optimization as " - "'spark.sql.execution.arrow.fallback.enabled' is set to " - "true." % _exception_message(e)) + msg += ( + "Attempting non-optimization as " + "'spark.sql.execution.arrow.fallback.enabled' is set to true.") warnings.warn(msg) else: - msg = ( - "createDataFrame attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" - "For fallback to non-optimization automatically, please set true to " - "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) + msg += ( + "For fallback to non-optimization automatically, please set " + "true to 'spark.sql.execution.arrow.fallback.enabled'.") warnings.warn(msg) raise data = self._convert_from_pandas(data, schema, timezone) From 55209b0de21fb4ee812f71026d725e244b1d680f Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 21 Mar 2018 14:38:01 -0700 Subject: [PATCH 5/8] made a small fix in warning msg --- python/pyspark/sql/tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 49a7625513ec..20426de3931a 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -3533,7 +3533,7 @@ def test_toPandas_fallback_enabled(self): warn.message for warn in warns if isinstance(warn.message, UserWarning)] self.assertTrue(len(user_warns) > 0) self.assertTrue( - "Attempts non-optimization" in _exception_message(user_warns[-1])) + "Attempting non-optimization" in _exception_message(user_warns[-1])) self.assertPandasEqual(pdf, pd.DataFrame({u'map': [{u'a': 1}]})) def test_toPandas_fallback_disabled(self): @@ -3754,7 +3754,7 @@ def test_createDataFrame_fallback_enabled(self): warn.message for warn in warns if isinstance(warn.message, UserWarning)] self.assertTrue(len(user_warns) > 0) self.assertTrue( - "Attempts non-optimization" in _exception_message(user_warns[-1])) + "Attempting non-optimization" in _exception_message(user_warns[-1])) self.assertEqual(df.collect(), [Row(a={u'a': 1})]) def test_createDataFrame_fallback_disabled(self): From e9a3e2c36c458e2930049f3269a06eb3def1e13a Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 23 Mar 2018 14:22:57 -0700 Subject: [PATCH 6/8] Revert "format msg" This reverts commit 17dd6053e466f214c852807b0d11db0961062c0a. --- python/pyspark/sql/dataframe.py | 22 +++++++++++++--------- python/pyspark/sql/session.py | 24 +++++++++++++----------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 40e1a0c2bf3e..d705a07ddcc7 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2000,22 +2000,25 @@ def toPandas(self): require_minimum_pyarrow_version() to_arrow_schema(self.schema) except Exception as e: - msg = ( - "toPandas attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" % _exception_message(e)) if self.sql_ctx.getConf("spark.sql.execution.arrow.fallback.enabled", "true") \ .lower() == "true": - msg += ( - "Attempting non-optimization as " - "'spark.sql.execution.arrow.fallback.enabled' is set to true.") + msg = ( + "toPandas attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" + "Attempts non-optimization as " + "'spark.sql.execution.arrow.fallback.enabled' is set to " + "true." % _exception_message(e)) warnings.warn(msg) use_arrow = False else: - msg += ( + msg = ( + "toPandas attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" "For fallback to non-optimization automatically, please set true to " - "'spark.sql.execution.arrow.fallback.enabled'.") + "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) warnings.warn(msg) raise @@ -2045,6 +2048,7 @@ def toPandas(self): "Note that 'spark.sql.execution.arrow.fallback.enabled' does " "not have an effect in such failure in the middle of " "computation." % _exception_message(e)) + # TODO: e.args = (msg,) + e.args[1:] warnings.warn(msg) raise diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index e43ffef4f989..ca0b770b4d78 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -668,21 +668,23 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr except Exception as e: from pyspark.util import _exception_message - msg = ( - "createDataFrame attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" % _exception_message(e)) - if self.conf.get("spark.sql.execution.arrow.fallback.enabled", "true") \ .lower() == "true": - msg += ( - "Attempting non-optimization as " - "'spark.sql.execution.arrow.fallback.enabled' is set to true.") + msg = ( + "createDataFrame attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" + "Attempts non-optimization as " + "'spark.sql.execution.arrow.fallback.enabled' is set to " + "true." % _exception_message(e)) warnings.warn(msg) else: - msg += ( - "For fallback to non-optimization automatically, please set " - "true to 'spark.sql.execution.arrow.fallback.enabled'.") + msg = ( + "createDataFrame attempted Arrow optimization because " + "'spark.sql.execution.arrow.enabled' is set to true; however, " + "failed by the reason below:\n %s\n" + "For fallback to non-optimization automatically, please set true to " + "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) warnings.warn(msg) raise data = self._convert_from_pandas(data, schema, timezone) From 1a6be1df25a41b5bdcfc0e47378a757be384efab Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 23 Mar 2018 15:20:05 -0700 Subject: [PATCH 7/8] reword warnings --- python/pyspark/sql/dataframe.py | 18 ++++++++---------- python/pyspark/sql/session.py | 8 ++++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index d705a07ddcc7..5f6db0c26c98 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2015,10 +2015,10 @@ def toPandas(self): else: msg = ( "toPandas attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" - "For fallback to non-optimization automatically, please set true to " - "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) + "'spark.sql.execution.arrow.enabled' is set to true, but has reached " + "the error below and will not continue because automatic fallback " + "with 'spark.sql.execution.arrow.fallback.enabled' has been set to " + "false.\n %s" % _exception_message(e)) warnings.warn(msg) raise @@ -2043,12 +2043,10 @@ def toPandas(self): # be executed. So, simply fail in this case for now. msg = ( "toPandas attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed unexpectedly:\n %s\n" - "Note that 'spark.sql.execution.arrow.fallback.enabled' does " - "not have an effect in such failure in the middle of " - "computation." % _exception_message(e)) - # TODO: e.args = (msg,) + e.args[1:] + "'spark.sql.execution.arrow.enabled' is set to true, but has reached " + "the error below and can not continue. Note that " + "'spark.sql.execution.arrow.fallback.enabled' does not have an effect " + "on failures in the middle of computation.\n %s" % _exception_message(e)) warnings.warn(msg) raise diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index ca0b770b4d78..990c2cd73149 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -681,10 +681,10 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr else: msg = ( "createDataFrame attempted Arrow optimization because " - "'spark.sql.execution.arrow.enabled' is set to true; however, " - "failed by the reason below:\n %s\n" - "For fallback to non-optimization automatically, please set true to " - "'spark.sql.execution.arrow.fallback.enabled'." % _exception_message(e)) + "'spark.sql.execution.arrow.enabled' is set to true, but has reached " + "the error below and will not continue because automatic fallback " + "with 'spark.sql.execution.arrow.fallback.enabled' has been set to " + "false.\n %s" % _exception_message(e)) warnings.warn(msg) raise data = self._convert_from_pandas(data, schema, timezone) From 5a43edf6c2ad0b6dda155a90e8831181376502e7 Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Fri, 23 Mar 2018 16:11:52 -0700 Subject: [PATCH 8/8] made a small fix in warning msg (again) --- python/pyspark/sql/dataframe.py | 2 +- python/pyspark/sql/session.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 5f6db0c26c98..16f8e52dead7 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2007,7 +2007,7 @@ def toPandas(self): "toPandas attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" - "Attempts non-optimization as " + "Attempting non-optimization as " "'spark.sql.execution.arrow.fallback.enabled' is set to " "true." % _exception_message(e)) warnings.warn(msg) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 990c2cd73149..13d6e2e53dbd 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -674,7 +674,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr "createDataFrame attempted Arrow optimization because " "'spark.sql.execution.arrow.enabled' is set to true; however, " "failed by the reason below:\n %s\n" - "Attempts non-optimization as " + "Attempting non-optimization as " "'spark.sql.execution.arrow.fallback.enabled' is set to " "true." % _exception_message(e)) warnings.warn(msg)