From f9073438d09d1b2ab1517b8762877625e76c1c78 Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Wed, 1 Feb 2017 18:33:02 +0100
Subject: [PATCH 1/5] add bisectingKmeans

---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 14 ++++++++++++++
 docs/ml-clustering.md                |  7 +++++++
 2 files changed, 21 insertions(+)
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 36a78477dc26..d5e43735a6a4 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -494,6 +494,8 @@ SparkR supports the following machine learning models and algorithms.
 
 * Latent Dirichlet Allocation (LDA)
 
+* Bisecting $k$-means
+
 #### Collaborative Filtering
 
 * Alternating Least Squares (ALS)
@@ -819,6 +821,18 @@ perplexity <- spark.perplexity(model, corpusDF)
 perplexity
 ```
 
+#### Bisecting k-means
+
+`spark.bisectingKmeans` is a kind of [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) using a divisive (or "top-down") approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.
+
+```{r}
+df <- createDataFrame(iris)
+model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
+summary(kmeansModel)
+fitted <- predict(model, df)
+head(select(fitted, "Sepal_Length", "prediction"))
+```
+
 #### Alternating Least Squares
 
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index d8b6553c5b84..c526c9772055 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -167,6 +167,13 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 {% include_example python/ml/bisecting_k_means_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.bisectingKmeans.html) for more details. {% include_example r/ml/bisectingKmeans.R %}
+
+{% include_example r/ml/lda.R %}
+</div>
 </div>
 
 ## Gaussian Mixture Model (GMM)

From 66fd02792687b425ae338cac157ec46aca5fdd63 Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Wed, 1 Feb 2017 20:20:18 +0100
Subject: [PATCH 2/5] fix lda bad commit

---
 docs/ml-clustering.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index c526c9772055..1186fb73d0fa 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -170,9 +170,9 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 <div data-lang="r" markdown="1">
 
-Refer to the [R API docs](api/R/spark.bisectingKmeans.html) for more details. {% include_example r/ml/bisectingKmeans.R %}
+Refer to the [R API docs](api/R/spark.bisectingKmeans.html) for more details. 
 
-{% include_example r/ml/lda.R %}
+{% include_example r/ml/bisectingKmeans.R %}
 </div>
 </div>
 

From 73e1c12c83be932f5f1928b2e2964974e921bfb5 Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Wed, 1 Feb 2017 20:23:45 +0100
Subject: [PATCH 3/5] example

---
 examples/src/main/r/ml/bisectingKmeans.R | 42 ++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 examples/src/main/r/ml/bisectingKmeans.R

diff --git a/examples/src/main/r/ml/bisectingKmeans.R b/examples/src/main/r/ml/bisectingKmeans.R
new file mode 100644
index 000000000000..8228410ee59f
--- /dev/null
+++ b/examples/src/main/r/ml/bisectingKmeans.R
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/bisectingKmeans.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-bisectingKmeans-example")
+
+# $example on$
+irisDF <- suppressWarnings(createDataFrame(iris))
+
+# Fit bisecting k-means model with four centers
+model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
+
+# get fitted result from a bisecting k-means model
+fitted.model <- fitted(model, "centers")
+
+# Model summary
+showDF(fitted.model)
+
+# fitted values on training data
+fitted <- predict(model, df)
+head(select(fitted, "Sepal_Length", "prediction"))
+# $example off$

From 240c9893989914424bbd2cf1cc01cf95daa95e27 Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Wed, 1 Feb 2017 20:29:10 +0100
Subject: [PATCH 4/5] fix supress and summary

---
 examples/src/main/r/ml/bisectingKmeans.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/r/ml/bisectingKmeans.R b/examples/src/main/r/ml/bisectingKmeans.R
index 8228410ee59f..37aeb74fc761 100644
--- a/examples/src/main/r/ml/bisectingKmeans.R
+++ b/examples/src/main/r/ml/bisectingKmeans.R
@@ -25,7 +25,7 @@ library(SparkR)
 sparkR.session(appName = "SparkR-ML-bisectingKmeans-example")
 
 # $example on$
-irisDF <- suppressWarnings(createDataFrame(iris))
+irisDF <- createDataFrame(iris)
 
 # Fit bisecting k-means model with four centers
 model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
@@ -34,7 +34,7 @@ model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
 fitted.model <- fitted(model, "centers")
 
 # Model summary
-showDF(fitted.model)
+summary(fitted.model)
 
 # fitted values on training data
 fitted <- predict(model, df)

From 0cfb8914d8082f3311a3427dbe44001a1503b168 Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Thu, 2 Feb 2017 12:18:06 +0100
Subject: [PATCH 5/5] fix orider

---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index d5e43735a6a4..a7cac2f503d1 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -488,14 +488,14 @@ SparkR supports the following machine learning models and algorithms.
 
 #### Clustering
 
+* Bisecting $k$-means
+
 * Gaussian Mixture Model (GMM)
 
 * $k$-means Clustering
 
 * Latent Dirichlet Allocation (LDA)
 
-* Bisecting $k$-means
-
 #### Collaborative Filtering
 
 * Alternating Least Squares (ALS)
@@ -740,6 +740,18 @@ summary(rfModel)
 predictions <- predict(rfModel, df)
 ```
 
+#### Bisecting k-Means
+
+`spark.bisectingKmeans` is a kind of [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) using a divisive (or "top-down") approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.
+
+```{r}
+df <- createDataFrame(iris)
+model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
+summary(kmeansModel)
+fitted <- predict(model, df)
+head(select(fitted, "Sepal_Length", "prediction"))
+```
+
 #### Gaussian Mixture Model
 
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
@@ -821,18 +833,6 @@ perplexity <- spark.perplexity(model, corpusDF)
 perplexity
 ```
 
-#### Bisecting k-means
-
-`spark.bisectingKmeans` is a kind of [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) using a divisive (or "top-down") approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.
-
-```{r}
-df <- createDataFrame(iris)
-model <- spark.bisectingKmeans(df, Sepal_Length ~ Sepal_Width, k = 4)
-summary(kmeansModel)
-fitted <- predict(model, df)
-head(select(fitted, "Sepal_Length", "prediction"))
-```
-
 #### Alternating Least Squares
 
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).