Skip to content

Commit aeb7eb5

Browse files
committed
Upgrade breeze version to 0.13.1
1 parent 2eaf4f3 commit aeb7eb5

File tree

9 files changed

+37
-40
lines changed

9 files changed

+37
-40
lines changed

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
297297
(MIT License) RowsGroup (http://datatables.net/license/mit)
298298
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299299
(MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300+
(MIT License) machinist (https://github.com/typelevel/machinist)

R/pkg/inst/tests/testthat/test_mllib_classification.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,18 +288,18 @@ test_that("spark.mlp", {
288288
c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
289289
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
290290
expect_equal(head(mlpPredictions$prediction, 10),
291-
c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
291+
c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
292292

293293
model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
294294
c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
295295
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
296296
expect_equal(head(mlpPredictions$prediction, 10),
297-
c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
297+
c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
298298

299299
model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
300300
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
301301
expect_equal(head(mlpPredictions$prediction, 10),
302-
c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0"))
302+
c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", "1.0", "0.0"))
303303

304304
# Test formula works well
305305
df <- suppressWarnings(createDataFrame(iris))
@@ -310,8 +310,8 @@ test_that("spark.mlp", {
310310
expect_equal(summary$numOfOutputs, 3)
311311
expect_equal(summary$layers, c(4, 3))
312312
expect_equal(length(summary$weights), 15)
313-
expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413,
314-
-10.2376130), tolerance = 1e-6)
313+
expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, -6.649478,
314+
-10.51147), tolerance = 1e-3)
315315
})
316316

317317
test_that("spark.naiveBayes", {

dev/deps/spark-deps-hadoop-2.6

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
1919
base64-2.3.8.jar
2020
bcprov-jdk15on-1.51.jar
2121
bonecp-0.8.0.RELEASE.jar
22-
breeze-macros_2.11-0.12.jar
23-
breeze_2.11-0.12.jar
22+
breeze-macros_2.11-0.13.1.jar
23+
breeze_2.11-0.13.1.jar
2424
calcite-avatica-1.2.0-incubating.jar
2525
calcite-core-1.2.0-incubating.jar
2626
calcite-linq4j-1.2.0-incubating.jar
@@ -129,6 +129,8 @@ libfb303-0.9.3.jar
129129
libthrift-0.9.3.jar
130130
log4j-1.2.17.jar
131131
lz4-1.3.0.jar
132+
machinist_2.11-0.6.1.jar
133+
macro-compat_2.11-1.1.1.jar
132134
mail-1.4.7.jar
133135
mesos-1.0.0-shaded-protobuf.jar
134136
metrics-core-3.1.2.jar
@@ -162,13 +164,13 @@ scala-parser-combinators_2.11-1.0.4.jar
162164
scala-reflect-2.11.8.jar
163165
scala-xml_2.11-1.0.2.jar
164166
scalap-2.11.8.jar
165-
shapeless_2.11-2.0.0.jar
167+
shapeless_2.11-2.3.2.jar
166168
slf4j-api-1.7.16.jar
167169
slf4j-log4j12-1.7.16.jar
168170
snappy-0.2.jar
169171
snappy-java-1.1.2.6.jar
170-
spire-macros_2.11-0.7.4.jar
171-
spire_2.11-0.7.4.jar
172+
spire-macros_2.11-0.13.0.jar
173+
spire_2.11-0.13.0.jar
172174
stax-api-1.0-2.jar
173175
stax-api-1.0.1.jar
174176
stream-2.7.0.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
1919
base64-2.3.8.jar
2020
bcprov-jdk15on-1.51.jar
2121
bonecp-0.8.0.RELEASE.jar
22-
breeze-macros_2.11-0.12.jar
23-
breeze_2.11-0.12.jar
22+
breeze-macros_2.11-0.13.1.jar
23+
breeze_2.11-0.13.1.jar
2424
calcite-avatica-1.2.0-incubating.jar
2525
calcite-core-1.2.0-incubating.jar
2626
calcite-linq4j-1.2.0-incubating.jar
@@ -130,6 +130,8 @@ libfb303-0.9.3.jar
130130
libthrift-0.9.3.jar
131131
log4j-1.2.17.jar
132132
lz4-1.3.0.jar
133+
machinist_2.11-0.6.1.jar
134+
macro-compat_2.11-1.1.1.jar
133135
mail-1.4.7.jar
134136
mesos-1.0.0-shaded-protobuf.jar
135137
metrics-core-3.1.2.jar
@@ -163,13 +165,13 @@ scala-parser-combinators_2.11-1.0.4.jar
163165
scala-reflect-2.11.8.jar
164166
scala-xml_2.11-1.0.2.jar
165167
scalap-2.11.8.jar
166-
shapeless_2.11-2.0.0.jar
168+
shapeless_2.11-2.3.2.jar
167169
slf4j-api-1.7.16.jar
168170
slf4j-log4j12-1.7.16.jar
169171
snappy-0.2.jar
170172
snappy-java-1.1.2.6.jar
171-
spire-macros_2.11-0.7.4.jar
172-
spire_2.11-0.7.4.jar
173+
spire-macros_2.11-0.13.0.jar
174+
spire_2.11-0.13.0.jar
173175
stax-api-1.0-2.jar
174176
stax-api-1.0.1.jar
175177
stream-2.7.0.jar

mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -894,10 +894,10 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
894894

895895
private[regression] object Probit extends Link("probit") {
896896

897-
override def link(mu: Double): Double = dist.Gaussian(0.0, 1.0).icdf(mu)
897+
override def link(mu: Double): Double = dist.Gaussian(0.0, 1.0).inverseCdf(mu)
898898

899899
override def deriv(mu: Double): Double = {
900-
1.0 / dist.Gaussian(0.0, 1.0).pdf(dist.Gaussian(0.0, 1.0).icdf(mu))
900+
1.0 / dist.Gaussian(0.0, 1.0).pdf(dist.Gaussian(0.0, 1.0).inverseCdf(mu))
901901
}
902902

903903
override def unlink(eta: Double): Double = dist.Gaussian(0.0, 1.0).cdf(eta)

mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -788,20 +788,14 @@ class DistributedLDAModel private[clustering] (
788788
@Since("1.5.0")
789789
def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
790790
graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
791-
// TODO: Remove work-around for the breeze bug.
792-
// https://github.com/scalanlp/breeze/issues/561
793-
val topIndices = if (k == topicCounts.length) {
794-
Seq.range(0, k)
795-
} else {
796-
argtopk(topicCounts, k)
797-
}
791+
val topIndices = argtopk(topicCounts, k)
798792
val sumCounts = sum(topicCounts)
799793
val weights = if (sumCounts != 0) {
800-
topicCounts(topIndices) / sumCounts
794+
topicCounts(topIndices).toArray.map(_ / sumCounts)
801795
} else {
802-
topicCounts(topIndices)
796+
topicCounts(topIndices).toArray
803797
}
804-
(docID.toLong, topIndices.toArray, weights.toArray)
798+
(docID.toLong, topIndices.toArray, weights)
805799
}
806800
}
807801

mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers
191191
// With smaller convergenceTol, it takes more steps.
192192
assert(lossLBFGS3.length > lossLBFGS2.length)
193193

194-
// Based on observation, lossLBFGS2 runs 5 iterations, no theoretically guaranteed.
195-
assert(lossLBFGS3.length == 6)
194+
// Based on observation, lossLBFGS3 runs 7 iterations, no theoretically guaranteed.
195+
assert(lossLBFGS3.length == 7)
196196
assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
197197
}
198198

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,7 @@
658658
<dependency>
659659
<groupId>org.scalanlp</groupId>
660660
<artifactId>breeze_${scala.binary.version}</artifactId>
661-
<version>0.12</version>
661+
<version>0.13.1</version>
662662
<exclusions>
663663
<!-- This is included as a compile-scoped dependency by jtransforms, which is
664664
a dependency of breeze. -->

python/pyspark/ml/classification.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -190,30 +190,28 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
190190
>>> blor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
191191
>>> blorModel = blor.fit(bdf)
192192
>>> blorModel.coefficients
193-
DenseVector([5.5...])
193+
DenseVector([5.4...])
194194
>>> blorModel.intercept
195-
-2.68...
195+
-2.63...
196196
>>> mdf = sc.parallelize([
197197
... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
198198
... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], [])),
199199
... Row(label=2.0, weight=2.0, features=Vectors.dense(3.0))]).toDF()
200200
>>> mlor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight",
201201
... family="multinomial")
202202
>>> mlorModel = mlor.fit(mdf)
203-
>>> print(mlorModel.coefficientMatrix)
204-
DenseMatrix([[-2.3...],
205-
[ 0.2...],
206-
[ 2.1... ]])
203+
>>> mlorModel.coefficientMatrix
204+
DenseMatrix(3, 1, [-2.3..., 0.2..., 2.1...], 1)
207205
>>> mlorModel.interceptVector
208-
DenseVector([2.0..., 0.8..., -2.8...])
206+
DenseVector([2.1..., 0.6..., -2.8...])
209207
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
210208
>>> result = blorModel.transform(test0).head()
211209
>>> result.prediction
212210
0.0
213211
>>> result.probability
214212
DenseVector([0.99..., 0.00...])
215213
>>> result.rawPrediction
216-
DenseVector([8.22..., -8.22...])
214+
DenseVector([8.12..., -8.12...])
217215
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
218216
>>> blorModel.transform(test1).head().prediction
219217
1.0
@@ -1490,9 +1488,9 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
14901488
>>> ovr = OneVsRest(classifier=lr)
14911489
>>> model = ovr.fit(df)
14921490
>>> [x.coefficients for x in model.models]
1493-
[DenseVector([3.3925, 1.8785]), DenseVector([-4.3016, -6.3163]), DenseVector([-4.5855, 6.1785])]
1491+
[DenseVector([4.9791, 2.426]), DenseVector([-4.1198, -5.9326]), DenseVector([-3.314, 5.2423])]
14941492
>>> [x.intercept for x in model.models]
1495-
[-3.64747..., 2.55078..., -1.10165...]
1493+
[-5.06544..., 2.30341..., -1.29133...]
14961494
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
14971495
>>> model.transform(test0).head().prediction
14981496
1.0

0 commit comments

Comments
 (0)