From 5667e5c3c6c7deea07166a2e6cdf3ac86af09e0c Mon Sep 17 00:00:00 2001
From: Denver Coneybeare <dconeybe@google.com>
Date: Mon, 7 Jul 2025 14:22:32 -0400
Subject: [PATCH 1/3] fix: Improve performance of the UTF-8 string comparison
 logic.

The semantics of this logic were originally fixed by #2275, but this fix
caused a material performance degradation, which was then improved by #2299
The performance was, however, still suboptimal, and this PR further improves the
speed back to close to its original speed and, serendipitously, simplifies the
algorithm too.

This commit is a port of https://github.com/firebase/firebase-js-sdk/pull/9143
---
 dev/src/order.ts | 81 ++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

diff --git a/dev/src/order.ts b/dev/src/order.ts
index 04c93bbc1..eba6ddf7f 100644
--- a/dev/src/order.ts
+++ b/dev/src/order.ts
@@ -254,56 +254,49 @@ function compareVectors(left: ApiMapValue, right: ApiMapValue): number {
  * @internal
  */
 export function compareUtf8Strings(left: string, right: string): number {
-  let i = 0;
-  while (i < left.length && i < right.length) {
-    const leftCodePoint = left.codePointAt(i)!;
-    const rightCodePoint = right.codePointAt(i)!;
-
-    if (leftCodePoint !== rightCodePoint) {
-      if (leftCodePoint < 128 && rightCodePoint < 128) {
-        // ASCII comparison
-        return primitiveComparator(leftCodePoint, rightCodePoint);
-      } else {
-        // Lazy instantiate TextEncoder
-        const encoder = new TextEncoder();
-
-        // UTF-8 encode the character at index i for byte comparison.
-        const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i));
-        const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i));
-        const comp = compareBlobs(
-          Buffer.from(leftBytes),
-          Buffer.from(rightBytes)
-        );
-        if (comp !== 0) {
-          return comp;
-        } else {
-          // EXTREMELY RARE CASE: Code points differ, but their UTF-8 byte
-          // representations are identical. This can happen with malformed input
-          // (invalid surrogate pairs). The backend also actively prevents invalid
-          // surrogates as INVALID_ARGUMENT errors, so we almost never receive
-          // invalid strings from backend.
-          // Fallback to code point comparison for graceful handling.
-          return primitiveComparator(leftCodePoint, rightCodePoint);
-        }
-      }
+  // Find the first differing character (a.k.a. "UTF-16 code unit") in the two strings and,
+  // if found, use that character to determine the relative ordering of the two strings as a
+  // whole. Comparing UTF-16 strings in UTF-8 byte order can be done simply and efficiently by
+  // comparing the UTF-16 code units (chars). This serendipitously works because of the way UTF-8
+  // and UTF-16 happen to represent Unicode code points.
+  //
+  // After finding the first pair of differing characters, there are two cases:
+  //
+  // Case 1: Both characters are non-surrogates (code points less than or equal to 0xFFFF) or
+  // both are surrogates from a surrogate pair (that collectively represent code points greater
+  // than 0xFFFF). In this case their numeric order as UTF-16 code units is the same as the
+  // lexicographical order of their corresponding UTF-8 byte sequences. A direct comparison is
+  // sufficient.
+  //
+  // Case 2: One character is a surrogate and the other is not. In this case the surrogate-
+  // containing string is always ordered after the non-surrogate. This is because surrogates are
+  // used to represent code points greater than 0xFFFF which have 4-byte UTF-8 representations
+  // and are lexicographically greater than the 1, 2, or 3-byte representations of code points
+  // less than or equal to 0xFFFF.
+  const length = Math.min(left.length, right.length);
+  for (let i = 0; i < length; i++) {
+    const leftChar = left.charAt(i);
+    const rightChar = right.charAt(i);
+    if (leftChar !== rightChar) {
+      return isSurrogate(leftChar) === isSurrogate(rightChar)
+        ? primitiveComparator(leftChar, rightChar)
+        : isSurrogate(leftChar)
+        ? 1
+        : -1;
     }
-    // Increment by 2 for surrogate pairs, 1 otherwise
-    i += leftCodePoint > 0xffff ? 2 : 1;
   }
 
-  // Compare lengths if all characters are equal
+  // Use the lengths of the strings to determine the overall comparison result since either the
+  // strings were equal or one is a prefix of the other.
   return primitiveComparator(left.length, right.length);
 }
 
-function getUtf8SafeSubstring(str: string, index: number): string {
-  const firstCodePoint = str.codePointAt(index)!;
-  if (firstCodePoint > 0xffff) {
-    // It's a surrogate pair, return the whole pair
-    return str.substring(index, index + 2);
-  } else {
-    // It's a single code point, return it
-    return str.substring(index, index + 1);
-  }
+const MIN_SURROGATE = 0xd800;
+const MAX_SURROGATE = 0xdfff;
+
+export function isSurrogate(s: string): boolean {
+  const c = s.charCodeAt(0);
+  return c >= MIN_SURROGATE && c <= MAX_SURROGATE;
 }
 
 /*!

From 772c6257a04ab9ec33e72ff7fcb4827a4082d126 Mon Sep 17 00:00:00 2001
From: Denver Coneybeare <dconeybe@google.com>
Date: Mon, 7 Jul 2025 14:30:40 -0400
Subject: [PATCH 2/3] npm run fix

---
 dev/src/order.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/src/order.ts b/dev/src/order.ts
index eba6ddf7f..7397c8d62 100644
--- a/dev/src/order.ts
+++ b/dev/src/order.ts
@@ -281,8 +281,8 @@ export function compareUtf8Strings(left: string, right: string): number {
       return isSurrogate(leftChar) === isSurrogate(rightChar)
         ? primitiveComparator(leftChar, rightChar)
         : isSurrogate(leftChar)
-        ? 1
-        : -1;
+          ? 1
+          : -1;
     }
   }
 

From 7db26d7a384be47a2bec48f299d5db49e33031e5 Mon Sep 17 00:00:00 2001
From: Denver Coneybeare <dconeybe@google.com>
Date: Mon, 7 Jul 2025 14:40:13 -0400
Subject: [PATCH 3/3] order.ts: remove `export` keyword from `isSurrogate`
 function since it's not used outside of the file.

---
 dev/src/order.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/src/order.ts b/dev/src/order.ts
index 7397c8d62..9588ef675 100644
--- a/dev/src/order.ts
+++ b/dev/src/order.ts
@@ -294,7 +294,7 @@ export function compareUtf8Strings(left: string, right: string): number {
 const MIN_SURROGATE = 0xd800;
 const MAX_SURROGATE = 0xdfff;
 
-export function isSurrogate(s: string): boolean {
+function isSurrogate(s: string): boolean {
   const c = s.charCodeAt(0);
   return c >= MIN_SURROGATE && c <= MAX_SURROGATE;
 }