Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
96de3e3
add test
milaGGL Jan 6, 2025
24bd892
chore: generate libraries at Mon Jan 6 16:30:40 UTC 2025
cloud-java-bot Jan 6, 2025
9e812fa
Revert "chore: generate libraries at Mon Jan 6 16:30:40 UTC 2025"
milaGGL Jan 6, 2025
149d3e1
chore: generate libraries at Mon Jan 6 16:54:57 UTC 2025
cloud-java-bot Jan 6, 2025
dec5d02
add more tests
milaGGL Jan 7, 2025
89ed44c
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Jan 10, 2025
c876e5a
format
milaGGL Jan 10, 2025
b55992d
remove lines commented out
milaGGL Jan 10, 2025
ef2ae13
Update ITQueryTest.java
milaGGL Jan 15, 2025
31cf3ff
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Jan 15, 2025
f17c28a
resolve comment
milaGGL Jan 15, 2025
d4f299a
use lazy encoding in utf-8 encoded string comparison
milaGGL Feb 18, 2025
511469b
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Feb 18, 2025
f86dcfb
chore: generate libraries at Tue Feb 18 21:15:36 UTC 2025
cloud-java-bot Feb 18, 2025
602a356
Update Order.java
milaGGL Feb 18, 2025
e8b3f57
Merge branch 'mila/string-uses-byte-comparison' of https://github.com…
milaGGL Feb 18, 2025
4ce70e6
add unit test
milaGGL Feb 20, 2025
2f1cf4e
use charCount
milaGGL Feb 25, 2025
a575b82
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Feb 25, 2025
fa9f5b7
chore: generate libraries at Tue Feb 25 16:25:43 UTC 2025
cloud-java-bot Feb 25, 2025
6c77573
encode whole string when facing invalid surrogates
milaGGL Feb 25, 2025
f0a541d
Merge branch 'mila/string-uses-byte-comparison' of https://github.com…
milaGGL Feb 25, 2025
ee95149
Update ITQueryTest.java
milaGGL Feb 25, 2025
75dc68c
fix unit test, add invalid surrogate integration test
milaGGL Feb 26, 2025
237621a
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Feb 26, 2025
8a3a7f1
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Feb 26, 2025
87f2db6
resolve comments
milaGGL Feb 26, 2025
03a7356
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Feb 26, 2025
b2af75e
Merge branch 'main' into mila/string-uses-byte-comparison
milaGGL Mar 3, 2025
3f007c6
chore: generate libraries at Mon Mar 3 16:17:03 UTC 2025
cloud-java-bot Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ If you are using Maven without the BOM, add this to your dependencies:
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-firestore</artifactId>
<version>3.30.8</version>
<version>3.30.9</version>
</dependency>

```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,44 @@ public int compare(@Nonnull Value left, @Nonnull Value right) {

/** Compare strings in UTF-8 encoded byte order */
public static int compareUtf8Strings(String left, String right) {
ByteString leftBytes = ByteString.copyFromUtf8(left);
ByteString rightBytes = ByteString.copyFromUtf8(right);
return compareByteStrings(leftBytes, rightBytes);
int i = 0;
while (i < left.length() && i < right.length()) {
int leftCodePoint = left.codePointAt(i);
int rightCodePoint = right.codePointAt(i);

if (leftCodePoint != rightCodePoint) {
if (leftCodePoint < 128 && rightCodePoint < 128) {
// ASCII comparison
return Integer.compare(leftCodePoint, rightCodePoint);
} else {
// UTF-8 encode the character at index i for byte comparison.
ByteString leftBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(left, i));
ByteString rightBytes = ByteString.copyFromUtf8(getUtf8SafeBytes(right, i));
int comp = compareByteStrings(leftBytes, rightBytes);
if (comp != 0) {
return comp;
} else {
// EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte representations are
// identical. This can happen with malformed input (invalid surrogate pairs), where
// Java's encoding leads to unexpected byte sequences. Meanwhile, any invalid surrogate
// inputs get converted to "?" by protocol buffer while round tripping, so we almost
// never receive invalid strings from backend.
// Fallback to code point comparison for graceful handling.
return Integer.compare(leftCodePoint, rightCodePoint);
}
}
}
// Increment by 2 for surrogate pairs, 1 otherwise.
i += Character.charCount(leftCodePoint);
}

// Compare lengths if all characters are equal
return Integer.compare(left.length(), right.length());
}

private static String getUtf8SafeBytes(String str, int index) {
int firstCodePoint = str.codePointAt(index);
return str.substring(index, index + Character.charCount(firstCodePoint));
}

private int compareBlobs(Value left, Value right) {
Expand All @@ -147,7 +182,7 @@ private int compareBlobs(Value left, Value right) {
return compareByteStrings(leftBytes, rightBytes);
}

private static int compareByteStrings(ByteString leftBytes, ByteString rightBytes) {
static int compareByteStrings(ByteString leftBytes, ByteString rightBytes) {
int size = Math.min(leftBytes.size(), rightBytes.size());
for (int i = 0; i < size; i++) {
// Make sure the bytes are unsigned
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

package com.google.cloud.firestore;

import static com.google.cloud.firestore.Order.compareByteStrings;
import static com.google.cloud.firestore.Order.compareUtf8Strings;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

import com.google.firestore.v1.ArrayValue;
import com.google.firestore.v1.MapValue;
Expand All @@ -25,7 +28,9 @@
import com.google.protobuf.NullValue;
import com.google.protobuf.Timestamp;
import com.google.type.LatLng;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import org.junit.Test;

public class OrderTest {
Expand Down Expand Up @@ -194,4 +199,184 @@ private Value objectValue(String key, Value value, Object... keysAndValues) {

return Value.newBuilder().setMapValue(mapBuilder.build()).build();
}

@Test
public void compareUtf8StringsShouldReturnCorrectValue() {
ArrayList<String> errors = new ArrayList<>();
int seed = new Random().nextInt(Integer.MAX_VALUE);
int passCount = 0;
StringGenerator stringGenerator = new StringGenerator(29750468);
StringPairGenerator stringPairGenerator = new StringPairGenerator(stringGenerator);
for (int i = 0; i < 1_000_000 && errors.size() < 10; i++) {
StringPairGenerator.StringPair stringPair = stringPairGenerator.next();
final String s1 = stringPair.s1;
final String s2 = stringPair.s2;

int actual = compareUtf8Strings(s1, s2);

ByteString b1 = ByteString.copyFromUtf8(s1);
ByteString b2 = ByteString.copyFromUtf8(s2);
int expected = compareByteStrings(b1, b2);

if (actual == expected) {
passCount++;
} else {
errors.add(
"compareUtf8Strings(s1=\""
+ s1
+ "\", s2=\""
+ s2
+ "\") returned "
+ actual
+ ", but expected "
+ expected
+ " (i="
+ i
+ ", s1.length="
+ s1.length()
+ ", s2.length="
+ s2.length()
+ ")");
}
}

if (!errors.isEmpty()) {
StringBuilder sb = new StringBuilder();
sb.append(errors.size()).append(" test cases failed, ");
sb.append(passCount).append(" test cases passed, ");
sb.append("seed=").append(seed).append(";");
for (int i = 0; i < errors.size(); i++) {
sb.append("\nerrors[").append(i).append("]: ").append(errors.get(i));
}
fail(sb.toString());
}
}

private static class StringPairGenerator {

private final StringGenerator stringGenerator;

public StringPairGenerator(StringGenerator stringGenerator) {
this.stringGenerator = stringGenerator;
}

public StringPair next() {
String prefix = stringGenerator.next();
String s1 = prefix + stringGenerator.next();
String s2 = prefix + stringGenerator.next();
return new StringPair(s1, s2);
}

public static class StringPair {
public final String s1, s2;

public StringPair(String s1, String s2) {
this.s1 = s1;
this.s2 = s2;
}
}
}

private static class StringGenerator {

private static final float DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33f;
private static final int DEFAULT_MAX_LENGTH = 20;

private static final int MIN_HIGH_SURROGATE = 0xD800;
private static final int MAX_HIGH_SURROGATE = 0xDBFF;
private static final int MIN_LOW_SURROGATE = 0xDC00;
private static final int MAX_LOW_SURROGATE = 0xDFFF;

private final Random rnd;
private final float surrogatePairProbability;
private final int maxLength;

public StringGenerator(int seed) {
this(new Random(seed), DEFAULT_SURROGATE_PAIR_PROBABILITY, DEFAULT_MAX_LENGTH);
}

public StringGenerator(Random rnd, float surrogatePairProbability, int maxLength) {
this.rnd = rnd;
this.surrogatePairProbability = validateProbability(surrogatePairProbability);
this.maxLength = validateLength(maxLength);
}

private static float validateProbability(float probability) {
if (!Float.isFinite(probability)) {
throw new IllegalArgumentException(
"invalid surrogate pair probability: "
+ probability
+ " (must be between 0.0 and 1.0, inclusive)");
} else if (probability < 0.0f) {
throw new IllegalArgumentException(
"invalid surrogate pair probability: "
+ probability
+ " (must be greater than or equal to zero)");
} else if (probability > 1.0f) {
throw new IllegalArgumentException(
"invalid surrogate pair probability: "
+ probability
+ " (must be less than or equal to 1)");
}
return probability;
}

private static int validateLength(int length) {
if (length < 0) {
throw new IllegalArgumentException(
"invalid maximum string length: "
+ length
+ " (must be greater than or equal to zero)");
}
return length;
}

public String next() {
final int length = rnd.nextInt(maxLength + 1);
final StringBuilder sb = new StringBuilder();
while (sb.length() < length) {
int codePoint = nextCodePoint();
sb.appendCodePoint(codePoint);
}
return sb.toString();
}

private boolean isNextSurrogatePair() {
return nextBoolean(rnd, surrogatePairProbability);
}

private static boolean nextBoolean(Random rnd, float probability) {
if (probability == 0.0f) {
return false;
} else if (probability == 1.0f) {
return true;
} else {
return rnd.nextFloat() < probability;
}
}

private int nextCodePoint() {
if (isNextSurrogatePair()) {
return nextSurrogateCodePoint();
} else {
return nextNonSurrogateCodePoint();
}
}

private int nextSurrogateCodePoint() {
int highSurrogate =
rnd.nextInt(MAX_HIGH_SURROGATE - MIN_HIGH_SURROGATE + 1) + MIN_HIGH_SURROGATE;
int lowSurrogate = rnd.nextInt(MAX_LOW_SURROGATE - MIN_LOW_SURROGATE + 1) + MIN_LOW_SURROGATE;
return Character.toCodePoint((char) highSurrogate, (char) lowSurrogate);
}

private int nextNonSurrogateCodePoint() {
int codePoint;
do {
codePoint = rnd.nextInt(0x10000); // BMP range
} while (codePoint >= MIN_HIGH_SURROGATE
&& codePoint <= MAX_LOW_SURROGATE); // Exclude surrogate range
return codePoint;
}
}
}
Loading