Skip to content

Commit ca1fef0

Browse files
authored
Merge pull request #704 from tisonkun/compat-cpp-with-buffer-tdigest
Accept TDigest bytes with buffered values
2 parents 37edd8c + 19d5afd commit ca1fef0

File tree

3 files changed

+79
-33
lines changed

3 files changed

+79
-33
lines changed

.editorconfig

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
root = true
19+
20+
[*]
21+
end_of_line = lf
22+
indent_style = space
23+
insert_final_newline = true
24+
trim_trailing_whitespace = true
25+
26+
[*.java]
27+
indent_size = tab
28+
tab_width = 2

src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ public static TDigestDouble heapify(final MemorySegment seg, final boolean isFlo
426426
return new TDigestDouble(reverseMerge, k, value, value, new double[] {value}, new long[] {1}, 1, null);
427427
}
428428
final int numCentroids = posSeg.getInt();
429-
posSeg.getInt(); // unused
429+
final int numBuffered = posSeg.getInt();
430430
final double min;
431431
final double max;
432432
if (isFloat) {
@@ -444,7 +444,11 @@ public static TDigestDouble heapify(final MemorySegment seg, final boolean isFlo
444444
weights[i] = isFloat ? posSeg.getInt() : posSeg.getLong();
445445
totalWeight += weights[i];
446446
}
447-
return new TDigestDouble(reverseMerge, k, min, max, means, weights, totalWeight, null);
447+
final double[] buffered = new double[numBuffered];
448+
for (int i = 0; i < numBuffered; i++) {
449+
buffered[i] = isFloat ? posSeg.getFloat() : posSeg.getDouble();
450+
}
451+
return new TDigestDouble(reverseMerge, k, min, max, means, weights, totalWeight, buffered);
448452
}
449453

450454
// compatibility with the format of the reference implementation

src/test/java/org/apache/datasketches/tdigest/TDigestCrossLanguageTest.java

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,54 +25,68 @@
2525
import static org.apache.datasketches.common.TestUtil.javaPath;
2626
import static org.testng.Assert.assertEquals;
2727
import static org.testng.Assert.assertTrue;
28-
29-
import java.lang.foreign.MemorySegment;
3028
import java.io.IOException;
29+
import java.lang.foreign.MemorySegment;
3130
import java.nio.file.Files;
32-
3331
import org.testng.annotations.Test;
3432

3533
public class TDigestCrossLanguageTest {
3634

3735
@Test(groups = {CHECK_CPP_FILES})
3836
public void deserializeFromCppDouble() throws IOException {
39-
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
40-
for (final int n: nArr) {
41-
final byte[] bytes = Files.readAllBytes(cppPath.resolve("tdigest_double_n" + n + "_cpp.sk"));
42-
final TDigestDouble td = TDigestDouble.heapify(MemorySegment.ofArray(bytes));
43-
assertTrue(n == 0 ? td.isEmpty() : !td.isEmpty());
44-
assertEquals(td.getTotalWeight(), n);
45-
if (n > 0) {
46-
assertEquals(td.getMinValue(), 1);
47-
assertEquals(td.getMaxValue(), n);
48-
assertEquals(td.getRank(0), 0);
49-
assertEquals(td.getRank(n + 1), 1);
50-
if (n == 1) {
51-
assertEquals(td.getRank(n), 0.5);
37+
final boolean[] with_buffer = {false, true};
38+
for (final boolean buffered : with_buffer) {
39+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
40+
for (final int n : nArr) {
41+
final byte[] bytes;
42+
if (buffered) {
43+
bytes = Files.readAllBytes(cppPath.resolve("tdigest_double_buf_n" + n + "_cpp.sk"));
5244
} else {
53-
assertEquals(td.getRank(n / 2), 0.5, 0.05);
45+
bytes = Files.readAllBytes(cppPath.resolve("tdigest_double_n" + n + "_cpp.sk"));
46+
}
47+
final TDigestDouble td = TDigestDouble.heapify(MemorySegment.ofArray(bytes));
48+
assertTrue(n == 0 ? td.isEmpty() : !td.isEmpty());
49+
assertEquals(td.getTotalWeight(), n);
50+
if (n > 0) {
51+
assertEquals(td.getMinValue(), 1);
52+
assertEquals(td.getMaxValue(), n);
53+
assertEquals(td.getRank(0), 0);
54+
assertEquals(td.getRank(n + 1), 1);
55+
if (n == 1) {
56+
assertEquals(td.getRank(n), 0.5);
57+
} else {
58+
assertEquals(td.getRank(n / 2), 0.5, 0.05);
59+
}
5460
}
5561
}
5662
}
5763
}
5864

5965
@Test(groups = {CHECK_CPP_FILES})
6066
public void deserializeFromCppFloat() throws IOException {
67+
final boolean[] with_buffer = {false, true};
6168
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
62-
for (final int n: nArr) {
63-
final byte[] bytes = Files.readAllBytes(cppPath.resolve("tdigest_float_n" + n + "_cpp.sk"));
64-
final TDigestDouble td = TDigestDouble.heapify(MemorySegment.ofArray(bytes), true);
65-
assertTrue(n == 0 ? td.isEmpty() : !td.isEmpty());
66-
assertEquals(td.getTotalWeight(), n);
67-
if (n > 0) {
68-
assertEquals(td.getMinValue(), 1);
69-
assertEquals(td.getMaxValue(), n);
70-
assertEquals(td.getRank(0), 0);
71-
assertEquals(td.getRank(n + 1), 1);
72-
if (n == 1) {
73-
assertEquals(td.getRank(n), 0.5);
69+
for (final boolean buffered : with_buffer) {
70+
for (final int n : nArr) {
71+
final byte[] bytes;
72+
if (buffered) {
73+
bytes = Files.readAllBytes(cppPath.resolve("tdigest_float_buf_n" + n + "_cpp.sk"));
7474
} else {
75-
assertEquals(td.getRank(n / 2), 0.5, 0.05);
75+
bytes = Files.readAllBytes(cppPath.resolve("tdigest_float_n" + n + "_cpp.sk"));
76+
}
77+
final TDigestDouble td = TDigestDouble.heapify(MemorySegment.ofArray(bytes), true);
78+
assertTrue(n == 0 ? td.isEmpty() : !td.isEmpty());
79+
assertEquals(td.getTotalWeight(), n);
80+
if (n > 0) {
81+
assertEquals(td.getMinValue(), 1);
82+
assertEquals(td.getMaxValue(), n);
83+
assertEquals(td.getRank(0), 0);
84+
assertEquals(td.getRank(n + 1), 1);
85+
if (n == 1) {
86+
assertEquals(td.getRank(n), 0.5);
87+
} else {
88+
assertEquals(td.getRank(n / 2), 0.5, 0.05);
89+
}
7690
}
7791
}
7892
}
@@ -81,7 +95,7 @@ public void deserializeFromCppFloat() throws IOException {
8195
@Test(groups = {GENERATE_JAVA_FILES})
8296
public void generateForCppDouble() throws IOException {
8397
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
84-
for (final int n: nArr) {
98+
for (final int n : nArr) {
8599
final TDigestDouble td = new TDigestDouble((short) 100);
86100
for (int i = 1; i <= n; i++) {
87101
td.update(i);

0 commit comments

Comments
 (0)