From 6b31ec7dda73c155fc94c5ccf53709099f8033dd Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Mon, 22 May 2017 19:37:50 +0800
Subject: [PATCH 1/3] fix visibility of numInstances and degreesOfFreedom in LR
 and GLR - Python version

---
 python/pyspark/ml/regression.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 3c3fcc8d9b8d..2d17f95b0c44 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -323,6 +323,14 @@ def numInstances(self):
         """
         return self._call_java("numInstances")
 
+    @property
+    @since("2.2.0")
+    def degreesOfFreedom(self):
+        """
+        Degrees of freedom.
+        """
+        return self._call_java("degreesOfFreedom")
+
     @property
     @since("2.0.0")
     def devianceResiduals(self):
@@ -1565,6 +1573,14 @@ def predictionCol(self):
         """
         return self._call_java("predictionCol")
 
+    @property
+    @since("2.2.0")
+    def numInstances(self):
+        """
+        Number of instances in DataFrame predictions.
+        """
+        return self._call_java("numInstances")
+
     @property
     @since("2.0.0")
     def rank(self):

From a8b407f877269f235611e5dc5bb338c421206a57 Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Tue, 23 May 2017 13:52:29 +0800
Subject: [PATCH 2/3] follow up of SPARK-20764

---
 python/pyspark/ml/tests.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 51a3e8efe8b4..78b76389ded3 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1066,6 +1066,7 @@ def test_linear_regression_summary(self):
         self.assertAlmostEqual(s.r2, 1.0, 2)
         self.assertTrue(isinstance(s.residuals, DataFrame))
         self.assertEqual(s.numInstances, 2)
+        self.assertEqual(s.degreesOfFreedom, 1)
         devResiduals = s.devianceResiduals
         self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
         coefStdErr = s.coefficientStandardErrors
@@ -1093,6 +1094,7 @@ def test_glr_summary(self):
         self.assertEqual(s.numIterations, 1)  # this should default to a single iteration of WLS
         self.assertTrue(isinstance(s.predictions, DataFrame))
         self.assertEqual(s.predictionCol, "prediction")
+        self.assertEqual(s.numInstances, 2)
         self.assertTrue(isinstance(s.residuals(), DataFrame))
         self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
         coefStdErr = s.coefficientStandardErrors

From 013adc4460c588e1e06a66d23ce66d864803554e Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Tue, 23 May 2017 14:18:47 +0800
Subject: [PATCH 3/3] revise comment

---
 python/pyspark/ml/tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 78b76389ded3..a3393c624865 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1076,7 +1076,8 @@ def test_linear_regression_summary(self):
         pValues = s.pValues
         self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
         # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
+        # one check is enough to verify a summary is returned
+        # The child class LinearRegressionTrainingSummary runs full test
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
 
@@ -1113,7 +1114,8 @@ def test_glr_summary(self):
         self.assertTrue(isinstance(s.nullDeviance, float))
         self.assertTrue(isinstance(s.dispersion, float))
         # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
+        # one check is enough to verify a summary is returned
+        # The child class GeneralizedLinearRegressionTrainingSummary runs full test
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.deviance, s.deviance)