diff --git a/README.md b/README.md index d3ec77054..00768a425 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ If you are using Maven without the BOM, add this to your dependencies: com.google.cloud google-cloud-firestore - 3.30.8 + 3.30.9 ``` diff --git a/google-cloud-firestore/src/main/java/com/google/cloud/firestore/Order.java b/google-cloud-firestore/src/main/java/com/google/cloud/firestore/Order.java index 4cd58c95b..7f37d8e05 100644 --- a/google-cloud-firestore/src/main/java/com/google/cloud/firestore/Order.java +++ b/google-cloud-firestore/src/main/java/com/google/cloud/firestore/Order.java @@ -136,9 +136,44 @@ public int compare(@Nonnull Value left, @Nonnull Value right) { /** Compare strings in UTF-8 encoded byte order */ public static int compareUtf8Strings(String left, String right) { - ByteString leftBytes = ByteString.copyFromUtf8(left); - ByteString rightBytes = ByteString.copyFromUtf8(right); - return compareByteStrings(leftBytes, rightBytes); + int i = 0; + while (i < left.length() && i < right.length()) { + int leftCodePoint = left.codePointAt(i); + int rightCodePoint = right.codePointAt(i); + + if (leftCodePoint != rightCodePoint) { + if (leftCodePoint < 128 && rightCodePoint < 128) { + // ASCII comparison + return Integer.compare(leftCodePoint, rightCodePoint); + } else { + // UTF-8 encode the character at index i for byte comparison. + ByteString leftBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(left, i)); + ByteString rightBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(right, i)); + int comp = compareByteStrings(leftBytes, rightBytes); + if (comp != 0) { + return comp; + } else { + // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte representations are + // identical. This can happen with malformed input (invalid surrogate pairs), where + // Java's encoding leads to unexpected byte sequences. Meanwhile, any invalid surrogate + // inputs get converted to "?" by protocol buffer while round tripping, so we almost + // never receive invalid strings from backend. + // Fallback to code point comparison for graceful handling. + return Integer.compare(leftCodePoint, rightCodePoint); + } + } + } + // Increment by 2 for surrogate pairs, 1 otherwise. + i += Character.charCount(leftCodePoint); + } + + // Compare lengths if all characters are equal + return Integer.compare(left.length(), right.length()); + } + + private static String getUtf8SafeBytes(String str, int index) { + int firstCodePoint = str.codePointAt(index); + return str.substring(index, index + Character.charCount(firstCodePoint)); } private int compareBlobs(Value left, Value right) { @@ -147,7 +182,7 @@ private int compareBlobs(Value left, Value right) { return compareByteStrings(leftBytes, rightBytes); } - private static int compareByteStrings(ByteString leftBytes, ByteString rightBytes) { + static int compareByteStrings(ByteString leftBytes, ByteString rightBytes) { int size = Math.min(leftBytes.size(), rightBytes.size()); for (int i = 0; i < size; i++) { // Make sure the bytes are unsigned diff --git a/google-cloud-firestore/src/test/java/com/google/cloud/firestore/OrderTest.java b/google-cloud-firestore/src/test/java/com/google/cloud/firestore/OrderTest.java index c4993b444..076cbf2db 100644 --- a/google-cloud-firestore/src/test/java/com/google/cloud/firestore/OrderTest.java +++ b/google-cloud-firestore/src/test/java/com/google/cloud/firestore/OrderTest.java @@ -16,7 +16,10 @@ package com.google.cloud.firestore; +import static com.google.cloud.firestore.Order.compareByteStrings; +import static com.google.cloud.firestore.Order.compareUtf8Strings; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import com.google.firestore.v1.ArrayValue; import com.google.firestore.v1.MapValue; @@ -25,7 +28,9 @@ import com.google.protobuf.NullValue; import com.google.protobuf.Timestamp; import com.google.type.LatLng; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Random; import org.junit.Test; public class OrderTest { @@ -194,4 +199,184 @@ private Value objectValue(String key, Value value, Object... keysAndValues) { return Value.newBuilder().setMapValue(mapBuilder.build()).build(); } + + @Test + public void compareUtf8StringsShouldReturnCorrectValue() { + ArrayList errors = new ArrayList<>(); + int seed = new Random().nextInt(Integer.MAX_VALUE); + int passCount = 0; + StringGenerator stringGenerator = new StringGenerator(29750468); + StringPairGenerator stringPairGenerator = new StringPairGenerator(stringGenerator); + for (int i = 0; i < 1_000_000 && errors.size() < 10; i++) { + StringPairGenerator.StringPair stringPair = stringPairGenerator.next(); + final String s1 = stringPair.s1; + final String s2 = stringPair.s2; + + int actual = compareUtf8Strings(s1, s2); + + ByteString b1 = ByteString.copyFromUtf8(s1); + ByteString b2 = ByteString.copyFromUtf8(s2); + int expected = compareByteStrings(b1, b2); + + if (actual == expected) { + passCount++; + } else { + errors.add( + "compareUtf8Strings(s1=\"" + + s1 + + "\", s2=\"" + + s2 + + "\") returned " + + actual + + ", but expected " + + expected + + " (i=" + + i + + ", s1.length=" + + s1.length() + + ", s2.length=" + + s2.length() + + ")"); + } + } + + if (!errors.isEmpty()) { + StringBuilder sb = new StringBuilder(); + sb.append(errors.size()).append(" test cases failed, "); + sb.append(passCount).append(" test cases passed, "); + sb.append("seed=").append(seed).append(";"); + for (int i = 0; i < errors.size(); i++) { + sb.append("\nerrors[").append(i).append("]: ").append(errors.get(i)); + } + fail(sb.toString()); + } + } + + private static class StringPairGenerator { + + private final StringGenerator stringGenerator; + + public StringPairGenerator(StringGenerator stringGenerator) { + this.stringGenerator = stringGenerator; + } + + public StringPair next() { + String prefix = stringGenerator.next(); + String s1 = prefix + stringGenerator.next(); + String s2 = prefix + stringGenerator.next(); + return new StringPair(s1, s2); + } + + public static class StringPair { + public final String s1, s2; + + public StringPair(String s1, String s2) { + this.s1 = s1; + this.s2 = s2; + } + } + } + + private static class StringGenerator { + + private static final float DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33f; + private static final int DEFAULT_MAX_LENGTH = 20; + + private static final int MIN_HIGH_SURROGATE = 0xD800; + private static final int MAX_HIGH_SURROGATE = 0xDBFF; + private static final int MIN_LOW_SURROGATE = 0xDC00; + private static final int MAX_LOW_SURROGATE = 0xDFFF; + + private final Random rnd; + private final float surrogatePairProbability; + private final int maxLength; + + public StringGenerator(int seed) { + this(new Random(seed), DEFAULT_SURROGATE_PAIR_PROBABILITY, DEFAULT_MAX_LENGTH); + } + + public StringGenerator(Random rnd, float surrogatePairProbability, int maxLength) { + this.rnd = rnd; + this.surrogatePairProbability = validateProbability(surrogatePairProbability); + this.maxLength = validateLength(maxLength); + } + + private static float validateProbability(float probability) { + if (!Float.isFinite(probability)) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be between 0.0 and 1.0, inclusive)"); + } else if (probability < 0.0f) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be greater than or equal to zero)"); + } else if (probability > 1.0f) { + throw new IllegalArgumentException( + "invalid surrogate pair probability: " + + probability + + " (must be less than or equal to 1)"); + } + return probability; + } + + private static int validateLength(int length) { + if (length < 0) { + throw new IllegalArgumentException( + "invalid maximum string length: " + + length + + " (must be greater than or equal to zero)"); + } + return length; + } + + public String next() { + final int length = rnd.nextInt(maxLength + 1); + final StringBuilder sb = new StringBuilder(); + while (sb.length() < length) { + int codePoint = nextCodePoint(); + sb.appendCodePoint(codePoint); + } + return sb.toString(); + } + + private boolean isNextSurrogatePair() { + return nextBoolean(rnd, surrogatePairProbability); + } + + private static boolean nextBoolean(Random rnd, float probability) { + if (probability == 0.0f) { + return false; + } else if (probability == 1.0f) { + return true; + } else { + return rnd.nextFloat() < probability; + } + } + + private int nextCodePoint() { + if (isNextSurrogatePair()) { + return nextSurrogateCodePoint(); + } else { + return nextNonSurrogateCodePoint(); + } + } + + private int nextSurrogateCodePoint() { + int highSurrogate = + rnd.nextInt(MAX_HIGH_SURROGATE - MIN_HIGH_SURROGATE + 1) + MIN_HIGH_SURROGATE; + int lowSurrogate = rnd.nextInt(MAX_LOW_SURROGATE - MIN_LOW_SURROGATE + 1) + MIN_LOW_SURROGATE; + return Character.toCodePoint((char) highSurrogate, (char) lowSurrogate); + } + + private int nextNonSurrogateCodePoint() { + int codePoint; + do { + codePoint = rnd.nextInt(0x10000); // BMP range + } while (codePoint >= MIN_HIGH_SURROGATE + && codePoint <= MAX_LOW_SURROGATE); // Exclude surrogate range + return codePoint; + } + } } diff --git a/google-cloud-firestore/src/test/java/com/google/cloud/firestore/it/ITQueryTest.java b/google-cloud-firestore/src/test/java/com/google/cloud/firestore/it/ITQueryTest.java index 5ffb80d2a..d688f10a1 100644 --- a/google-cloud-firestore/src/test/java/com/google/cloud/firestore/it/ITQueryTest.java +++ b/google-cloud-firestore/src/test/java/com/google/cloud/firestore/it/ITQueryTest.java @@ -1169,6 +1169,10 @@ public void snapshotListenerSortsUnicodeStringsSameWayAsServer() throws Exceptio .set(col.document("e"), map("value", "P")) .set(col.document("f"), map("value", "︒")) .set(col.document("g"), map("value", "🐵")) + .set(col.document("h"), map("value", "你好")) + .set(col.document("i"), map("value", "你顥")) + .set(col.document("j"), map("value", "😁")) + .set(col.document("k"), map("value", "😀")) .commit() .get(); @@ -1192,7 +1196,7 @@ public void snapshotListenerSortsUnicodeStringsSameWayAsServer() throws Exceptio latch.await(); registration.remove(); - assertEquals(queryOrder, Arrays.asList("b", "a", "c", "f", "e", "d", "g")); + assertEquals(queryOrder, Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j")); assertEquals(queryOrder, listenerOrder); } @@ -1209,6 +1213,10 @@ public void snapshotListenerSortsUnicodeStringsInArraySameWayAsServer() throws E .set(col.document("e"), map("value", Arrays.asList("P"))) .set(col.document("f"), map("value", Arrays.asList("︒"))) .set(col.document("g"), map("value", Arrays.asList("🐵"))) + .set(col.document("h"), map("value", Arrays.asList("你好"))) + .set(col.document("i"), map("value", Arrays.asList("你顥"))) + .set(col.document("j"), map("value", Arrays.asList("😁"))) + .set(col.document("k"), map("value", Arrays.asList("😀"))) .commit() .get(); @@ -1232,7 +1240,7 @@ public void snapshotListenerSortsUnicodeStringsInArraySameWayAsServer() throws E latch.await(); registration.remove(); - assertEquals(queryOrder, Arrays.asList("b", "a", "c", "f", "e", "d", "g")); + assertEquals(queryOrder, Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j")); assertEquals(queryOrder, listenerOrder); } @@ -1249,6 +1257,10 @@ public void snapshotListenerSortsUnicodeStringsInMapSameWayAsServer() throws Exc .set(col.document("e"), map("value", map("foo", "P"))) .set(col.document("f"), map("value", map("foo", "︒"))) .set(col.document("g"), map("value", map("foo", "🐵"))) + .set(col.document("h"), map("value", map("foo", "你好"))) + .set(col.document("i"), map("value", map("foo", "你顥"))) + .set(col.document("j"), map("value", map("foo", "😁"))) + .set(col.document("k"), map("value", map("foo", "😀"))) .commit() .get(); @@ -1272,7 +1284,7 @@ public void snapshotListenerSortsUnicodeStringsInMapSameWayAsServer() throws Exc latch.await(); registration.remove(); - assertEquals(queryOrder, Arrays.asList("b", "a", "c", "f", "e", "d", "g")); + assertEquals(queryOrder, Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j")); assertEquals(queryOrder, listenerOrder); } @@ -1289,6 +1301,10 @@ public void snapshotListenerSortsUnicodeStringsInMapKeySameWayAsServer() throws .set(col.document("e"), map("value", map("P", "foo"))) .set(col.document("f"), map("value", map("︒", "foo"))) .set(col.document("g"), map("value", map("🐵", "foo"))) + .set(col.document("h"), map("value", map("你好", "foo"))) + .set(col.document("i"), map("value", map("你顥", "foo"))) + .set(col.document("j"), map("value", map("😁", "foo"))) + .set(col.document("k"), map("value", map("😀", "foo"))) .commit() .get(); @@ -1312,7 +1328,7 @@ public void snapshotListenerSortsUnicodeStringsInMapKeySameWayAsServer() throws latch.await(); registration.remove(); - assertEquals(queryOrder, Arrays.asList("b", "a", "c", "f", "e", "d", "g")); + assertEquals(queryOrder, Arrays.asList("b", "a", "h", "i", "c", "f", "e", "d", "g", "k", "j")); assertEquals(queryOrder, listenerOrder); } @@ -1329,6 +1345,10 @@ public void snapshotListenerSortsUnicodeStringsInDocumentKeySameWayAsServer() th .set(col.document("P"), map("value", "foo")) .set(col.document("︒"), map("value", "foo")) .set(col.document("🐵"), map("value", "foo")) + .set(col.document("你好"), map("value", "你好")) + .set(col.document("你顥"), map("value", "你顥")) + .set(col.document("😁"), map("value", "😁")) + .set(col.document("😀"), map("value", "😀")) .commit() .get(); @@ -1353,7 +1373,50 @@ public void snapshotListenerSortsUnicodeStringsInDocumentKeySameWayAsServer() th registration.remove(); assertEquals( - queryOrder, Arrays.asList("Sierpiński", "Łukasiewicz", "岩澤", "︒", "P", "🄟", "🐵")); + queryOrder, + Arrays.asList( + "Sierpiński", "Łukasiewicz", "你好", "你顥", "岩澤", "︒", "P", "🄟", "🐵", "😀", "😁")); + assertEquals(queryOrder, listenerOrder); + } + + @Test + public void snapshotListenerSortsInvalidUnicodeStringsSameWayAsServer() throws Exception { + CollectionReference col = createEmptyCollection(); + + // Note: Protocol Buffer converts any invalid surrogates to "?". + firestore + .batch() + .set(col.document("a"), map("value", "Z")) + .set(col.document("b"), map("value", "你好")) + .set(col.document("c"), map("value", "😀")) + .set(col.document("d"), map("value", "ab\uD800")) // Lone high surrogate + .set(col.document("e"), map("value", "ab\uDC00")) // Lone low surrogate + .set(col.document("f"), map("value", "ab\uD800\uD800")) // Unpaired high surrogate + .set(col.document("g"), map("value", "ab\uDC00\uDC00")) // Unpaired low surrogate + .commit() + .get(); + + Query query = col.orderBy("value", Direction.ASCENDING); + + QuerySnapshot snapshot = query.get().get(); + List queryOrder = + snapshot.getDocuments().stream().map(doc -> doc.getId()).collect(Collectors.toList()); + + CountDownLatch latch = new CountDownLatch(1); + List listenerOrder = new ArrayList<>(); + ListenerRegistration registration = + query.addSnapshotListener( + (value, error) -> { + listenerOrder.addAll( + value.getDocuments().stream() + .map(doc -> doc.getId()) + .collect(Collectors.toList())); + latch.countDown(); + }); + latch.await(); + registration.remove(); + + assertEquals(queryOrder, Arrays.asList("a", "d", "e", "f", "g", "b", "c")); assertEquals(queryOrder, listenerOrder); } }