From cc39cf3e5d9c6eef43648b3b699dcf427812ef8d Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 31 Oct 2025 10:50:03 -0400
Subject: [PATCH 01/29] experiment with value view and simdutf

---
 src/workerd/api/encoding.c++ | 62 ++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 17 deletions(-)
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 3dc4d2d9367..e8fa5e797c0 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -491,25 +491,53 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
 
-namespace {
-TextEncoder::EncodeIntoResult encodeIntoImpl(
-    jsg::Lock& js, jsg::JsString input, jsg::BufferSource& buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
-  return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
-  };
-}
-}  // namespace
-
 jsg::BufferSource TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   auto str = input.orDefault(js.str());
-  auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
-      "Cannot allocate space for TextEncoder.encode");
-  [[maybe_unused]] auto result = encodeIntoImpl(js, str, view);
-  KJ_DASSERT(result.written == view.size());
-  return kj::mv(view);
+
+  // Do the conversion while ValueView is alive, but to a C++ heap buffer (not V8 heap)
+  kj::Array<kj::byte> output_data;
+
+  {
+    v8::String::ValueView value_view(js.v8Isolate, str);
+    size_t length = static_cast<size_t>(value_view.length());
+
+    if (value_view.is_one_byte()) {
+      auto data = reinterpret_cast<const char*>(value_view.data8());
+      size_t utf8_length = simdutf::utf8_length_from_latin1(data, length);
+      output_data = kj::heapArray<kj::byte>(utf8_length);
+      [[maybe_unused]] auto written =
+          simdutf::convert_latin1_to_utf8(data, length, output_data.asChars().begin());
+      KJ_DASSERT(written == output_data.size());
+    } else {
+      auto data = reinterpret_cast<const char16_t*>(value_view.data16());
+
+      // Check if UTF-16LE is valid
+      auto validation_result = simdutf::validate_utf16le(data, length);
+
+      if (validation_result) {
+        // Valid UTF-16LE, convert directly
+        size_t utf8_length = simdutf::utf8_length_from_utf16le(data, length);
+        output_data = kj::heapArray<kj::byte>(utf8_length);
+        [[maybe_unused]] auto written =
+            simdutf::convert_utf16le_to_utf8(data, length, output_data.asChars().begin());
+        KJ_DASSERT(written == output_data.size());
+      } else {
+        // Invalid UTF-16LE (unpaired surrogates), fix it first
+        auto well_formed = kj::heapArray<char16_t>(length);
+        simdutf::to_well_formed_utf16le(data, length, well_formed.begin());
+
+        // Now convert the well-formed UTF-16LE to UTF-8
+        size_t utf8_length = simdutf::utf8_length_from_utf16le(well_formed.begin(), length);
+        output_data = kj::heapArray<kj::byte>(utf8_length);
+        [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
+            well_formed.begin(), length, output_data.asChars().begin());
+        KJ_DASSERT(written == output_data.size());
+      }
+    }
+  }  // ValueView destroyed here, releasing the heap lock
+
+  // Now create BufferSource from the output data (this allocates V8 objects, which is now safe)
+  return jsg::BufferSource(js, jsg::BackingStore::from(js, kj::mv(output_data)));
 }
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(

From d93fcf7e6d434dbf7bcd39ea91a20291b2656657 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 31 Oct 2025 13:05:25 -0400
Subject: [PATCH 02/29] address pr reviews

---
 src/workerd/api/encoding.c++   | 91 +++++++++++++++++++++-------------
 src/workerd/jsg/buffersource.h |  7 +--
 2 files changed, 60 insertions(+), 38 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index e8fa5e797c0..83e875fbd01 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -492,52 +492,73 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 }
 
 jsg::BufferSource TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
-  auto str = input.orDefault(js.str());
+  jsg::JsString str = input.orDefault(js.str());
 
-  // Do the conversion while ValueView is alive, but to a C++ heap buffer (not V8 heap)
-  kj::Array<kj::byte> output_data;
+  if (str.length(js) == 0) {
+    return jsg::BufferSource(js, jsg::BackingStore::alloc<v8::Uint8Array>(js, 0));
+  }
 
-  {
+  // Allocate the output buffer and perform the conversion while ValueView is alive, but defer
+  // creating the V8 BufferSource until after ValueView is destroyed. This approach uses
+  // BackingStore::wrap with a custom disposer to avoid the copy overhead that would occur with
+  // BackingStore::from in the v8 sandbox, since from() copies data when it's not already in the
+  // sandbox. By using new/delete with wrap(), we maintain ownership semantics compatible with V8's
+  // C-style BackingStore API while avoiding the extra allocation and copy.
+  jsg::BackingStore backing = [&]() {
     v8::String::ValueView value_view(js.v8Isolate, str);
     size_t length = static_cast<size_t>(value_view.length());
 
     if (value_view.is_one_byte()) {
+      // Fast path for Latin-1 encoded strings. V8 uses Latin-1 (ISO-8859-1) encoding internally
+      // for strings that contain only code points <= U+00FF. We need to convert to UTF-8.
       auto data = reinterpret_cast<const char*>(value_view.data8());
       size_t utf8_length = simdutf::utf8_length_from_latin1(data, length);
-      output_data = kj::heapArray<kj::byte>(utf8_length);
+      auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
       [[maybe_unused]] auto written =
-          simdutf::convert_latin1_to_utf8(data, length, output_data.asChars().begin());
-      KJ_DASSERT(written == output_data.size());
-    } else {
-      auto data = reinterpret_cast<const char16_t*>(value_view.data16());
-
-      // Check if UTF-16LE is valid
-      auto validation_result = simdutf::validate_utf16le(data, length);
-
-      if (validation_result) {
-        // Valid UTF-16LE, convert directly
-        size_t utf8_length = simdutf::utf8_length_from_utf16le(data, length);
-        output_data = kj::heapArray<kj::byte>(utf8_length);
-        [[maybe_unused]] auto written =
-            simdutf::convert_utf16le_to_utf8(data, length, output_data.asChars().begin());
-        KJ_DASSERT(written == output_data.size());
-      } else {
-        // Invalid UTF-16LE (unpaired surrogates), fix it first
-        auto well_formed = kj::heapArray<char16_t>(length);
-        simdutf::to_well_formed_utf16le(data, length, well_formed.begin());
-
-        // Now convert the well-formed UTF-16LE to UTF-8
-        size_t utf8_length = simdutf::utf8_length_from_utf16le(well_formed.begin(), length);
-        output_data = kj::heapArray<kj::byte>(utf8_length);
-        [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
-            well_formed.begin(), length, output_data.asChars().begin());
-        KJ_DASSERT(written == output_data.size());
-      }
+          simdutf::convert_latin1_to_utf8(data, length, output->asChars().begin());
+      KJ_DASSERT(written == output->size());
+      return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
+          [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
+          output);
+    }
+
+    // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
+    // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
+    auto data = reinterpret_cast<const char16_t*>(value_view.data16());
+    auto valid_utf16 = simdutf::validate_utf16le(data, length);
+
+    if (valid_utf16) {
+      // Common case: valid UTF-16LE, convert directly to UTF-8
+      size_t utf8_length = simdutf::utf8_length_from_utf16le(data, length);
+      auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
+      [[maybe_unused]] auto written =
+          simdutf::convert_utf16le_to_utf8(data, length, output->asChars().begin());
+      KJ_DASSERT(written == output->size());
+      return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
+          [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
+          output);
     }
-  }  // ValueView destroyed here, releasing the heap lock
 
-  // Now create BufferSource from the output data (this allocates V8 objects, which is now safe)
-  return jsg::BufferSource(js, jsg::BackingStore::from(js, kj::mv(output_data)));
+    // Rare case: Invalid UTF-16LE with unpaired surrogates. Per the Encoding Standard, we must
+    // replace unpaired surrogates with U+FFFD replacement characters. We do this in two passes:
+    // first fix the UTF-16, then convert to UTF-8. This extra buffer allocation only happens
+    // for malformed strings, which should be uncommon in practice.
+    auto well_formed = kj::heapArray<char16_t>(length);
+    simdutf::to_well_formed_utf16le(data, length, well_formed.begin());
+
+    size_t utf8_length = simdutf::utf8_length_from_utf16le(well_formed.begin(), length);
+    auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
+    [[maybe_unused]] auto written =
+        simdutf::convert_utf16le_to_utf8(well_formed.begin(), length, output->asChars().begin());
+    KJ_DASSERT(written == output->size());
+    return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
+        [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
+        output);
+  }();  // ValueView destroyed here, releasing the heap lock
+
+  // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
+  // Construct the BufferSource which will create the actual Uint8Array that gets returned to JS.
+  return jsg::BufferSource(js, kj::mv(backing));
 }
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h
index 65fc36ab960..018e4a3b8e9 100644
--- a/src/workerd/jsg/buffersource.h
+++ b/src/workerd/jsg/buffersource.h
@@ -102,9 +102,10 @@ class BackingStore {
 
   // Creates a new BackingStore of the given size.
   template <BufferSourceType T = v8::Uint8Array>
-  static BackingStore alloc(Lock& js, size_t size) {
-    return BackingStore(js.allocBackingStore(size), size, 0, getBufferSourceElementSize<T>(),
-        construct<T>, checkIsIntegerType<T>());
+  static BackingStore alloc(
+      Lock& js, size_t size, Lock::AllocOption init_mode = Lock::AllocOption::ZERO_INITIALIZED) {
+    return BackingStore(js.allocBackingStore(size, init_mode), size, 0,
+        getBufferSourceElementSize<T>(), construct<T>, checkIsIntegerType<T>());
   }
 
   using Disposer = void(void*, size_t, void*);

From 28b102d7ef79b5b90262871720056ecc26322e02 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 31 Oct 2025 14:07:10 -0400
Subject: [PATCH 03/29] address pr reviews

---
 src/workerd/api/encoding.c++ | 146 ++++++++++++++++++++++-------------
 src/workerd/api/encoding.h   |   6 +-
 src/workerd/jsg/jsvalue.c++  |   4 +
 src/workerd/jsg/jsvalue.h    |   7 ++
 4 files changed, 103 insertions(+), 60 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 83e875fbd01..7f51be02837 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -491,74 +491,110 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
 
-jsg::BufferSource TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
+jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   jsg::JsString str = input.orDefault(js.str());
 
-  if (str.length(js) == 0) {
-    return jsg::BufferSource(js, jsg::BackingStore::alloc<v8::Uint8Array>(js, 0));
-  }
-
-  // Allocate the output buffer and perform the conversion while ValueView is alive, but defer
-  // creating the V8 BufferSource until after ValueView is destroyed. This approach uses
-  // BackingStore::wrap with a custom disposer to avoid the copy overhead that would occur with
-  // BackingStore::from in the v8 sandbox, since from() copies data when it's not already in the
-  // sandbox. By using new/delete with wrap(), we maintain ownership semantics compatible with V8's
-  // C-style BackingStore API while avoiding the extra allocation and copy.
-  jsg::BackingStore backing = [&]() {
-    v8::String::ValueView value_view(js.v8Isolate, str);
-    size_t length = static_cast<size_t>(value_view.length());
-
-    if (value_view.is_one_byte()) {
-      // Fast path for Latin-1 encoded strings. V8 uses Latin-1 (ISO-8859-1) encoding internally
-      // for strings that contain only code points <= U+00FF. We need to convert to UTF-8.
-      auto data = reinterpret_cast<const char*>(value_view.data8());
-      size_t utf8_length = simdutf::utf8_length_from_latin1(data, length);
-      auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
-      [[maybe_unused]] auto written =
-          simdutf::convert_latin1_to_utf8(data, length, output->asChars().begin());
-      KJ_DASSERT(written == output->size());
-      return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
-          [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
-          output);
+  if (str.isOneByte(js)) {
+    auto length = str.length(js);
+    // Fast path for one-byte strings (Latin-1). writeOneByte() copies the raw bytes without
+    // flattening the string, which is more efficient than using ValueView. Note that we
+    // allocate `length * 2` bytes because Latin-1 characters 0x80-0xFF need 2 bytes in UTF-8.
+    auto backing =
+        jsg::BackingStore::alloc<v8::Uint8Array>(js, length, jsg::Lock::AllocOption::UNINITIALIZED);
+    str.writeOneByte(
+        js, backing.asArrayPtr<kj::byte>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    auto backingData = reinterpret_cast<const char*>(backing.asArrayPtr<kj::byte>().begin());
+
+    size_t utf8_length = simdutf::utf8_length_from_latin1(backingData, length);
+
+    if (utf8_length == length) {
+      return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
     }
 
+    auto backing2 = jsg::BackingStore::alloc<v8::Uint8Array>(
+        js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    auto written = simdutf::convert_latin1_to_utf8(
+        backingData, length, reinterpret_cast<char*>(backing2.asArrayPtr<kj::byte>().begin()));
+    KJ_DASSERT(backing2.size() == written);
+    return jsg::JsUint8Array(backing2.createHandle(js).As<v8::Uint8Array>());
+  }
+
+  // First pass: Calculate the required UTF-8 output buffer size.
+  // We need to do this in a separate ValueView because:
+  // 1. ValueView holds the V8 heap lock, which prevents us from allocating new V8 objects
+  // 2. We must determine the exact output size before allocating the BackingStore
+  // 3. Once we know the size, we'll create a second ValueView to do the actual conversion
+  size_t utf8_length = 0;
+  bool isValidUtf16 = true;
+  // For invalid UTF-16 strings (with unpaired surrogates), we need to fix them to well-formed
+  // UTF-16 before calculating the UTF-8 length. We store the fixed version here so it can be
+  // reused in the second pass, avoiding the need to fix it twice.
+  kj::Array<char16_t> wellFormed;
+
+  {
+    v8::String::ValueView view(js.v8Isolate, str);
+    // One-byte strings are handled by the fast path above
+    KJ_DASSERT(!view.is_one_byte());
+
+    auto data = reinterpret_cast<const char16_t*>(view.data16());
     // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
     // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
-    auto data = reinterpret_cast<const char16_t*>(value_view.data16());
-    auto valid_utf16 = simdutf::validate_utf16le(data, length);
-
-    if (valid_utf16) {
+    isValidUtf16 = simdutf::validate_utf16le(data, view.length());
+
+    if (isValidUtf16) {
+      // Common case: valid UTF-16, calculate UTF-8 length directly
+      utf8_length = simdutf::utf8_length_from_utf16le(data, view.length());
+    } else {
+      // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
+      // unpaired surrogates must be replaced with U+FFFD (replacement character).
+      // U+FFFD is 3 bytes in UTF-8, which means the UTF-8 length will differ from what
+      // we'd calculate from the invalid UTF-16. We must fix the UTF-16 first, then
+      // calculate the UTF-8 length from the well-formed version to get the correct size.
+      wellFormed = kj::heapArray<char16_t>(view.length());
+      simdutf::to_well_formed_utf16le(data, view.length(), wellFormed.begin());
+      utf8_length = simdutf::utf8_length_from_utf16le(wellFormed.begin(), view.length());
+    }
+  }  // ValueView destroyed here, releasing the heap lock
+
+  // Pre-allocate the jsg::BackingStore to avoid the copy overhead that would occur with
+  // BackingStore::from() in the v8 sandbox, since from() copies data when it's not already in the
+  // sandbox. By pre-allocating with alloc(), the memory is already in the sandbox and we can
+  // perform the conversion directly into it.
+  auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(
+      js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+
+  // Second pass: Perform the actual UTF-8 conversion.
+  // We create a new ValueView here to access the string data again, now that we have a
+  // pre-allocated output buffer. The closure ensures the ValueView is destroyed before we
+  // return the result, which is important for proper V8 heap management.
+  [&]() {
+    v8::String::ValueView view(js.v8Isolate, str);
+    // One-byte strings are handled by the fast path above
+    KJ_DASSERT(!view.is_one_byte());
+
+    size_t length = static_cast<size_t>(view.length());
+    auto* output = backing.asArrayPtr<char>().begin();
+    auto data = reinterpret_cast<const char16_t*>(view.data16());
+
+    if (isValidUtf16) {
       // Common case: valid UTF-16LE, convert directly to UTF-8
-      size_t utf8_length = simdutf::utf8_length_from_utf16le(data, length);
-      auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
-      [[maybe_unused]] auto written =
-          simdutf::convert_utf16le_to_utf8(data, length, output->asChars().begin());
-      KJ_DASSERT(written == output->size());
-      return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
-          [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
-          output);
+      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(data, length, output);
+      KJ_DASSERT(written == backing.size());
+      return;
     }
 
-    // Rare case: Invalid UTF-16LE with unpaired surrogates. Per the Encoding Standard, we must
-    // replace unpaired surrogates with U+FFFD replacement characters. We do this in two passes:
-    // first fix the UTF-16, then convert to UTF-8. This extra buffer allocation only happens
-    // for malformed strings, which should be uncommon in practice.
-    auto well_formed = kj::heapArray<char16_t>(length);
-    simdutf::to_well_formed_utf16le(data, length, well_formed.begin());
-
-    size_t utf8_length = simdutf::utf8_length_from_utf16le(well_formed.begin(), length);
-    auto* output = new kj::Array<kj::byte>(kj::heapArray<kj::byte>(utf8_length));
+    // Rare case: Invalid UTF-16LE with unpaired surrogates. We already fixed the UTF-16 to
+    // well-formed in the first pass (stored in wellFormed array), so now we just convert that
+    // fixed version to UTF-8. This reuses the wellFormed array created earlier, avoiding the
+    // need to fix the UTF-16 a second time.
     [[maybe_unused]] auto written =
-        simdutf::convert_utf16le_to_utf8(well_formed.begin(), length, output->asChars().begin());
-    KJ_DASSERT(written == output->size());
-    return jsg::BackingStore::wrap<v8::Uint8Array>(output->begin(), output->size(),
-        [](void*, size_t, void* ptr) { delete reinterpret_cast<kj::Array<kj::byte>*>(ptr); },
-        output);
+        simdutf::convert_utf16le_to_utf8(wellFormed.begin(), wellFormed.size(), output);
+    KJ_DASSERT(written == backing.size());
   }();  // ValueView destroyed here, releasing the heap lock
 
   // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
-  // Construct the BufferSource which will create the actual Uint8Array that gets returned to JS.
-  return jsg::BufferSource(js, kj::mv(backing));
+  // Create the Uint8Array from the BackingStore and return it to JS.
+  return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
 }
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h
index 732ee916cba..e694ad1b355 100644
--- a/src/workerd/api/encoding.h
+++ b/src/workerd/api/encoding.h
@@ -218,7 +218,7 @@ class TextEncoder final: public jsg::Object {
 
   static jsg::Ref<TextEncoder> constructor(jsg::Lock& js);
 
-  jsg::BufferSource encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
+  jsg::JsUint8Array encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
 
   EncodeIntoResult encodeInto(jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer);
 
@@ -236,11 +236,7 @@ class TextEncoder final: public jsg::Object {
       JSG_READONLY_INSTANCE_PROPERTY(encoding, getEncoding);
     }
 
-    // `encode()` returns `jsg::BufferSource`, which may be an `ArrayBuffer` or `ArrayBufferView`,
-    // but the implementation uses `jsg::BufferSource::tryAlloc()` which always tries to allocate a
-    // `Uint8Array`. The spec defines that this function returns a `Uint8Array` too.
     JSG_TS_OVERRIDE({
-      encode(input?: string): Uint8Array;
       encodeInto(input: string, buffer: Uint8Array): TextEncoderEncodeIntoResult;
     });
   }
diff --git a/src/workerd/jsg/jsvalue.c++ b/src/workerd/jsg/jsvalue.c++
index 20a9614ed93..4eb3298fec7 100644
--- a/src/workerd/jsg/jsvalue.c++
+++ b/src/workerd/jsg/jsvalue.c++
@@ -377,6 +377,10 @@ JsString JsString::internalize(Lock& js) const {
   return JsString(inner->InternalizeString(js.v8Isolate));
 }
 
+void JsString::writeOneByte(Lock& js, kj::ArrayPtr<kj::byte> buffer, WriteFlags flags) {
+  inner->WriteOneByteV2(js.v8Isolate, 0, buffer.size(), buffer.begin(), flags);
+}
+
 JsString::WriteIntoStatus JsString::writeInto(
     Lock& js, kj::ArrayPtr<char> buffer, WriteFlags options) const {
   WriteIntoStatus result = {0, 0};
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index 61481f4521d..f487f713881 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -277,6 +277,7 @@ class JsString final: public JsBase<v8::String, JsString> {
   int hashCode() const;
 
   bool isFlat() const;
+  bool isOneByte(Lock& js) const KJ_WARN_UNUSED_RESULT;
   bool containsOnlyOneByte() const;
 
   bool operator==(const JsString& other) const;
@@ -311,6 +312,8 @@ class JsString final: public JsBase<v8::String, JsString> {
   WriteIntoStatus writeInto(
       Lock& js, kj::ArrayPtr<uint16_t> buffer, WriteFlags options = WriteFlags::NONE) const;
 
+  void writeOneByte(Lock& js, kj::ArrayPtr<kj::byte> buffer, WriteFlags flags = WriteFlags::NONE);
+
   using JsBase<v8::String, JsString>::JsBase;
 };
 
@@ -986,6 +989,10 @@ inline int JsString::length(jsg::Lock& js) const {
   return inner->Length();
 }
 
+inline bool JsString::isOneByte(jsg::Lock& js) const {
+  return inner->IsOneByte();
+}
+
 inline size_t JsString::utf8Length(jsg::Lock& js) const {
   return inner->Utf8LengthV2(js.v8Isolate);
 }

From d6691fe57098866d23327dde83e246b2e20691ef Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 3 Nov 2025 12:41:24 -0500
Subject: [PATCH 04/29] get rid of multiple valueviews

---
 src/workerd/api/encoding.c++ | 118 +++++++++++++----------------------
 1 file changed, 45 insertions(+), 73 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 7f51be02837..1d2fd33d27c 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -493,108 +493,80 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 
 jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   jsg::JsString str = input.orDefault(js.str());
+  std::shared_ptr<v8::BackingStore> backingStore;
+  size_t utf8_length = 0;
 
+  // Fast path: check if string is one-byte before creating ValueView
   if (str.isOneByte(js)) {
     auto length = str.length(js);
-    // Fast path for one-byte strings (Latin-1). writeOneByte() copies the raw bytes without
-    // flattening the string, which is more efficient than using ValueView. Note that we
-    // allocate `length * 2` bytes because Latin-1 characters 0x80-0xFF need 2 bytes in UTF-8.
-    auto backing =
-        jsg::BackingStore::alloc<v8::Uint8Array>(js, length, jsg::Lock::AllocOption::UNINITIALIZED);
-    str.writeOneByte(
-        js, backing.asArrayPtr<kj::byte>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
-    auto backingData = reinterpret_cast<const char*>(backing.asArrayPtr<kj::byte>().begin());
+    // Allocate buffer for Latin-1. Use v8::ArrayBuffer::NewBackingStore to avoid creating
+    // JS objects during conversion.
+    backingStore = v8::ArrayBuffer::NewBackingStore(
+        js.v8Isolate, length, v8::BackingStoreInitializationMode::kUninitialized);
+    auto backingData = reinterpret_cast<kj::byte*>(backingStore->Data());
+
+    str.writeOneByte(js, kj::ArrayPtr<kj::byte>(backingData, length),
+        jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
 
-    size_t utf8_length = simdutf::utf8_length_from_latin1(backingData, length);
+    utf8_length =
+        simdutf::utf8_length_from_latin1(reinterpret_cast<const char*>(backingData), length);
 
     if (utf8_length == length) {
-      return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
+      // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
+      auto array = v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, length);
+      return jsg::JsUint8Array(array);
     }
 
-    auto backing2 = jsg::BackingStore::alloc<v8::Uint8Array>(
-        js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-    auto written = simdutf::convert_latin1_to_utf8(
-        backingData, length, reinterpret_cast<char*>(backing2.asArrayPtr<kj::byte>().begin()));
-    KJ_DASSERT(backing2.size() == written);
-    return jsg::JsUint8Array(backing2.createHandle(js).As<v8::Uint8Array>());
+    // Need to convert Latin-1 to UTF-8
+    std::shared_ptr<v8::BackingStore> backingStore2 = v8::ArrayBuffer::NewBackingStore(
+        js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
+    auto written = simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(backingData),
+        length, reinterpret_cast<char*>(backingStore2->Data()));
+    KJ_DASSERT(utf8_length == written);
+    auto array =
+        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore2), 0, utf8_length);
+    return jsg::JsUint8Array(array);
   }
 
-  // First pass: Calculate the required UTF-8 output buffer size.
-  // We need to do this in a separate ValueView because:
-  // 1. ValueView holds the V8 heap lock, which prevents us from allocating new V8 objects
-  // 2. We must determine the exact output size before allocating the BackingStore
-  // 3. Once we know the size, we'll create a second ValueView to do the actual conversion
-  size_t utf8_length = 0;
-  bool isValidUtf16 = true;
-  // For invalid UTF-16 strings (with unpaired surrogates), we need to fix them to well-formed
-  // UTF-16 before calculating the UTF-8 length. We store the fixed version here so it can be
-  // reused in the second pass, avoiding the need to fix it twice.
-  kj::Array<char16_t> wellFormed;
-
+  // Two-byte string path
   {
+    // Note that ValueView flattens the string, if it's not already flattened
     v8::String::ValueView view(js.v8Isolate, str);
-    // One-byte strings are handled by the fast path above
-    KJ_DASSERT(!view.is_one_byte());
-
-    auto data = reinterpret_cast<const char16_t*>(view.data16());
     // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
     // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
-    isValidUtf16 = simdutf::validate_utf16le(data, view.length());
+    auto data = reinterpret_cast<const char16_t*>(view.data16());
+    bool isValidUtf16 = simdutf::validate_utf16le(data, view.length());
 
     if (isValidUtf16) {
-      // Common case: valid UTF-16, calculate UTF-8 length directly
+      // Common case: valid UTF-16, convert directly to UTF-8
       utf8_length = simdutf::utf8_length_from_utf16le(data, view.length());
+      backingStore = v8::ArrayBuffer::NewBackingStore(
+          js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
+      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
+          data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(written == utf8_length);
     } else {
       // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
       // unpaired surrogates must be replaced with U+FFFD (replacement character).
       // U+FFFD is 3 bytes in UTF-8, which means the UTF-8 length will differ from what
       // we'd calculate from the invalid UTF-16. We must fix the UTF-16 first, then
       // calculate the UTF-8 length from the well-formed version to get the correct size.
-      wellFormed = kj::heapArray<char16_t>(view.length());
+      auto wellFormed = kj::heapArray<char16_t>(view.length());
       simdutf::to_well_formed_utf16le(data, view.length(), wellFormed.begin());
       utf8_length = simdutf::utf8_length_from_utf16le(wellFormed.begin(), view.length());
+      backingStore = v8::ArrayBuffer::NewBackingStore(
+          js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
+      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
+          wellFormed.begin(), wellFormed.size(), reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(written == utf8_length);
     }
   }  // ValueView destroyed here, releasing the heap lock
 
-  // Pre-allocate the jsg::BackingStore to avoid the copy overhead that would occur with
-  // BackingStore::from() in the v8 sandbox, since from() copies data when it's not already in the
-  // sandbox. By pre-allocating with alloc(), the memory is already in the sandbox and we can
-  // perform the conversion directly into it.
-  auto backing = jsg::BackingStore::alloc<v8::Uint8Array>(
-      js, utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-
-  // Second pass: Perform the actual UTF-8 conversion.
-  // We create a new ValueView here to access the string data again, now that we have a
-  // pre-allocated output buffer. The closure ensures the ValueView is destroyed before we
-  // return the result, which is important for proper V8 heap management.
-  [&]() {
-    v8::String::ValueView view(js.v8Isolate, str);
-    // One-byte strings are handled by the fast path above
-    KJ_DASSERT(!view.is_one_byte());
-
-    size_t length = static_cast<size_t>(view.length());
-    auto* output = backing.asArrayPtr<char>().begin();
-    auto data = reinterpret_cast<const char16_t*>(view.data16());
-
-    if (isValidUtf16) {
-      // Common case: valid UTF-16LE, convert directly to UTF-8
-      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(data, length, output);
-      KJ_DASSERT(written == backing.size());
-      return;
-    }
-
-    // Rare case: Invalid UTF-16LE with unpaired surrogates. We already fixed the UTF-16 to
-    // well-formed in the first pass (stored in wellFormed array), so now we just convert that
-    // fixed version to UTF-8. This reuses the wellFormed array created earlier, avoiding the
-    // need to fix the UTF-16 a second time.
-    [[maybe_unused]] auto written =
-        simdutf::convert_utf16le_to_utf8(wellFormed.begin(), wellFormed.size(), output);
-    KJ_DASSERT(written == backing.size());
-  }();  // ValueView destroyed here, releasing the heap lock
-
   // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
-  // Create the Uint8Array from the BackingStore and return it to JS.
-  return jsg::JsUint8Array(backing.createHandle(js).As<v8::Uint8Array>());
+  // Create the Uint8Array from the raw v8::BackingStore.
+  auto array =
+      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+  return jsg::JsUint8Array(array);
 }
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(

From 572ffa373da8be27ec97d56b91a4293579c4a35c Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 3 Nov 2025 16:29:10 -0500
Subject: [PATCH 05/29] apply optimization to improve invalid utf16

---
 src/workerd/api/encoding.c++ | 136 ++++++++++++++++++++++++++++++++---
 1 file changed, 127 insertions(+), 9 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 1d2fd33d27c..f3cd2bb8fbb 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -487,6 +487,129 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 // =======================================================================================
 // TextEncoder implementation
 
+namespace {
+
+constexpr inline bool isLeadSurrogate(char16_t c) {
+  return 0xD800 <= c && c < 0xDC00;
+}
+
+constexpr inline bool isTrailSurrogate(char16_t c) {
+  return 0xDC00 <= c && c <= 0xDFFF;
+}
+
+// Calculate the number of UTF-8 bytes needed for a single UTF-16 code unit
+constexpr inline size_t utf8BytesForCodeUnit(char16_t c) {
+  if (c < 0x80) return 1;
+  if (c < 0x800) return 2;
+  return 3;
+}
+
+// Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
+// Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
+size_t utf8LengthFromInvalidUtf16(const char16_t* input, size_t length) {
+  size_t utf8Length = 0;
+  bool pendingSurrogate = false;
+
+  for (size_t i = 0; i < length; i++) {
+    char16_t c = input[i];
+
+    if (pendingSurrogate) {
+      if (isTrailSurrogate(c)) {
+        // Valid surrogate pair = 4 bytes in UTF-8
+        utf8Length += 4;
+        pendingSurrogate = false;
+      } else {
+        // Unpaired lead surrogate = U+FFFD (3 bytes)
+        utf8Length += 3;
+        if (!isLeadSurrogate(c)) {
+          utf8Length += utf8BytesForCodeUnit(c);
+          pendingSurrogate = false;
+        }
+      }
+    } else if (isLeadSurrogate(c)) {
+      pendingSurrogate = true;
+    } else {
+      if (isTrailSurrogate(c)) {
+        // Unpaired trail surrogate = U+FFFD (3 bytes)
+        utf8Length += 3;
+      } else {
+        utf8Length += utf8BytesForCodeUnit(c);
+      }
+    }
+  }
+
+  if (pendingSurrogate) {
+    utf8Length += 3;  // Trailing unpaired lead surrogate
+  }
+
+  return utf8Length;
+}
+
+// Encode a single UTF-16 code unit to UTF-8
+inline size_t encodeUtf8CodeUnit(char16_t c, char* out) {
+  if (c < 0x80) {
+    *out = static_cast<char>(c);
+    return 1;
+  } else if (c < 0x800) {
+    out[0] = static_cast<char>(0xC0 | (c >> 6));
+    out[1] = static_cast<char>(0x80 | (c & 0x3F));
+    return 2;
+  } else {
+    out[0] = static_cast<char>(0xE0 | (c >> 12));
+    out[1] = static_cast<char>(0x80 | ((c >> 6) & 0x3F));
+    out[2] = static_cast<char>(0x80 | (c & 0x3F));
+    return 3;
+  }
+}
+
+// Encode a valid surrogate pair to UTF-8
+inline void encodeSurrogatePair(char16_t lead, char16_t trail, char* out) {
+  uint32_t codepoint = 0x10000 + (((lead & 0x3FF) << 10) | (trail & 0x3FF));
+  out[0] = static_cast<char>(0xF0 | (codepoint >> 18));
+  out[1] = static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
+  out[2] = static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
+  out[3] = static_cast<char>(0x80 | (codepoint & 0x3F));
+}
+
+// Convert UTF-16 with potentially invalid surrogates to UTF-8.
+// Invalid surrogates are replaced with U+FFFD.
+void convertInvalidUtf16ToUtf8(const char16_t* input, size_t length, char* out) {
+  size_t position = 0;
+  bool pendingSurrogate = false;
+
+  for (size_t i = 0; i < length; i++) {
+    char16_t c = input[i];
+
+    if (pendingSurrogate) {
+      if (isTrailSurrogate(c)) {
+        encodeSurrogatePair(input[i - 1], c, out + position);
+        position += 4;
+        pendingSurrogate = false;
+      } else {
+        position += encodeUtf8CodeUnit(0xFFFD, out + position);
+        if (!isLeadSurrogate(c)) {
+          position += encodeUtf8CodeUnit(c, out + position);
+          pendingSurrogate = false;
+        }
+      }
+    } else if (isLeadSurrogate(c)) {
+      pendingSurrogate = true;
+    } else {
+      if (isTrailSurrogate(c)) {
+        position += encodeUtf8CodeUnit(0xFFFD, out + position);
+      } else {
+        position += encodeUtf8CodeUnit(c, out + position);
+      }
+    }
+  }
+
+  if (pendingSurrogate) {
+    encodeUtf8CodeUnit(0xFFFD, out + position);
+  }
+}
+
+}  // namespace
+
 jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
@@ -548,17 +671,12 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     } else {
       // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
       // unpaired surrogates must be replaced with U+FFFD (replacement character).
-      // U+FFFD is 3 bytes in UTF-8, which means the UTF-8 length will differ from what
-      // we'd calculate from the invalid UTF-16. We must fix the UTF-16 first, then
-      // calculate the UTF-8 length from the well-formed version to get the correct size.
-      auto wellFormed = kj::heapArray<char16_t>(view.length());
-      simdutf::to_well_formed_utf16le(data, view.length(), wellFormed.begin());
-      utf8_length = simdutf::utf8_length_from_utf16le(wellFormed.begin(), view.length());
+      // Use custom conversion that handles invalid surrogates without creating an
+      // intermediate well-formed UTF-16 buffer.
+      utf8_length = utf8LengthFromInvalidUtf16(data, view.length());
       backingStore = v8::ArrayBuffer::NewBackingStore(
           js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
-      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
-          wellFormed.begin(), wellFormed.size(), reinterpret_cast<char*>(backingStore->Data()));
-      KJ_DASSERT(written == utf8_length);
+      convertInvalidUtf16ToUtf8(data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
     }
   }  // ValueView destroyed here, releasing the heap lock
 

From 6ce652b3d17b5d981b2c0ae5ff68d908d0f19e93 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 3 Nov 2025 16:39:04 -0500
Subject: [PATCH 06/29] add missing simdutf dependency

---
 src/workerd/api/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index 04df1378abe..0254d0994b9 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -436,6 +436,7 @@ wd_cc_library(
     implementation_deps = [
         "//src/workerd/io:features",
         "//src/workerd/util:strings",
+        "@simdutf",
     ],
     visibility = ["//visibility:public"],
     deps = [

From d980b423ac8e532e110f543d4e94c53488111e9f Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 4 Nov 2025 15:21:13 -0500
Subject: [PATCH 07/29] apply review recommendations

---
 src/workerd/api/encoding.c++ | 63 +++++++++++++++++++-----------------
 src/workerd/jsg/jsvalue.c++  |  4 ---
 src/workerd/jsg/jsvalue.h    |  2 --
 3 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index f3cd2bb8fbb..147edbc60b1 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -506,11 +506,11 @@ constexpr inline size_t utf8BytesForCodeUnit(char16_t c) {
 
 // Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
 // Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
-size_t utf8LengthFromInvalidUtf16(const char16_t* input, size_t length) {
+size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
   size_t utf8Length = 0;
   bool pendingSurrogate = false;
 
-  for (size_t i = 0; i < length; i++) {
+  for (size_t i = 0; i < input.size(); i++) {
     char16_t c = input[i];
 
     if (pendingSurrogate) {
@@ -546,9 +546,9 @@ size_t utf8LengthFromInvalidUtf16(const char16_t* input, size_t length) {
 }
 
 // Encode a single UTF-16 code unit to UTF-8
-inline size_t encodeUtf8CodeUnit(char16_t c, char* out) {
+inline size_t encodeUtf8CodeUnit(char16_t c, kj::ArrayPtr<char> out) {
   if (c < 0x80) {
-    *out = static_cast<char>(c);
+    out[0] = static_cast<char>(c);
     return 1;
   } else if (c < 0x800) {
     out[0] = static_cast<char>(0xC0 | (c >> 6));
@@ -563,7 +563,7 @@ inline size_t encodeUtf8CodeUnit(char16_t c, char* out) {
 }
 
 // Encode a valid surrogate pair to UTF-8
-inline void encodeSurrogatePair(char16_t lead, char16_t trail, char* out) {
+inline void encodeSurrogatePair(char16_t lead, char16_t trail, kj::ArrayPtr<char> out) {
   uint32_t codepoint = 0x10000 + (((lead & 0x3FF) << 10) | (trail & 0x3FF));
   out[0] = static_cast<char>(0xF0 | (codepoint >> 18));
   out[1] = static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
@@ -573,22 +573,22 @@ inline void encodeSurrogatePair(char16_t lead, char16_t trail, char* out) {
 
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
-void convertInvalidUtf16ToUtf8(const char16_t* input, size_t length, char* out) {
+void convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
   size_t position = 0;
   bool pendingSurrogate = false;
 
-  for (size_t i = 0; i < length; i++) {
+  for (size_t i = 0; i < input.size(); i++) {
     char16_t c = input[i];
 
     if (pendingSurrogate) {
       if (isTrailSurrogate(c)) {
-        encodeSurrogatePair(input[i - 1], c, out + position);
+        encodeSurrogatePair(input[i - 1], c, out.slice(position, out.size()));
         position += 4;
         pendingSurrogate = false;
       } else {
-        position += encodeUtf8CodeUnit(0xFFFD, out + position);
+        position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
         if (!isLeadSurrogate(c)) {
-          position += encodeUtf8CodeUnit(c, out + position);
+          position += encodeUtf8CodeUnit(c, out.slice(position, out.size()));
           pendingSurrogate = false;
         }
       }
@@ -596,15 +596,15 @@ void convertInvalidUtf16ToUtf8(const char16_t* input, size_t length, char* out)
       pendingSurrogate = true;
     } else {
       if (isTrailSurrogate(c)) {
-        position += encodeUtf8CodeUnit(0xFFFD, out + position);
+        position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
       } else {
-        position += encodeUtf8CodeUnit(c, out + position);
+        position += encodeUtf8CodeUnit(c, out.slice(position, out.size()));
       }
     }
   }
 
   if (pendingSurrogate) {
-    encodeUtf8CodeUnit(0xFFFD, out + position);
+    encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
   }
 }
 
@@ -624,12 +624,12 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     auto length = str.length(js);
     // Allocate buffer for Latin-1. Use v8::ArrayBuffer::NewBackingStore to avoid creating
     // JS objects during conversion.
-    backingStore = v8::ArrayBuffer::NewBackingStore(
-        js.v8Isolate, length, v8::BackingStoreInitializationMode::kUninitialized);
+    backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
     auto backingData = reinterpret_cast<kj::byte*>(backingStore->Data());
 
-    str.writeOneByte(js, kj::ArrayPtr<kj::byte>(backingData, length),
-        jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    [[maybe_unused]] auto writeResult = str.writeInto(js, kj::arrayPtr(backingData, length));
+    KJ_DASSERT(
+        writeResult.written == length, "writeInto must completely overwrite the backing buffer");
 
     utf8_length =
         simdutf::utf8_length_from_latin1(reinterpret_cast<const char*>(backingData), length);
@@ -640,11 +640,14 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
       return jsg::JsUint8Array(array);
     }
 
+    KJ_DASSERT(utf8_length > length);
+
     // Need to convert Latin-1 to UTF-8
-    std::shared_ptr<v8::BackingStore> backingStore2 = v8::ArrayBuffer::NewBackingStore(
-        js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
-    auto written = simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(backingData),
-        length, reinterpret_cast<char*>(backingStore2->Data()));
+    std::shared_ptr<v8::BackingStore> backingStore2 =
+        js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    [[maybe_unused]] auto written =
+        simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(backingData), length,
+            reinterpret_cast<char*>(backingStore2->Data()));
     KJ_DASSERT(utf8_length == written);
     auto array =
         v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore2), 0, utf8_length);
@@ -658,25 +661,25 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
     // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
     auto data = reinterpret_cast<const char16_t*>(view.data16());
-    bool isValidUtf16 = simdutf::validate_utf16le(data, view.length());
 
-    if (isValidUtf16) {
+    if (simdutf::validate_utf16le(data, view.length())) {
       // Common case: valid UTF-16, convert directly to UTF-8
       utf8_length = simdutf::utf8_length_from_utf16le(data, view.length());
-      backingStore = v8::ArrayBuffer::NewBackingStore(
-          js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
+      backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
       [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
           data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
       KJ_DASSERT(written == utf8_length);
     } else {
-      // Rare case: Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
+      // Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
       // unpaired surrogates must be replaced with U+FFFD (replacement character).
       // Use custom conversion that handles invalid surrogates without creating an
       // intermediate well-formed UTF-16 buffer.
-      utf8_length = utf8LengthFromInvalidUtf16(data, view.length());
-      backingStore = v8::ArrayBuffer::NewBackingStore(
-          js.v8Isolate, utf8_length, v8::BackingStoreInitializationMode::kUninitialized);
-      convertInvalidUtf16ToUtf8(data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
+      auto inputArray = kj::ArrayPtr<const char16_t>(data, view.length());
+      utf8_length = utf8LengthFromInvalidUtf16(inputArray);
+      backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+      auto outputArray =
+          kj::ArrayPtr<char>(reinterpret_cast<char*>(backingStore->Data()), utf8_length);
+      convertInvalidUtf16ToUtf8(inputArray, outputArray);
     }
   }  // ValueView destroyed here, releasing the heap lock
 
diff --git a/src/workerd/jsg/jsvalue.c++ b/src/workerd/jsg/jsvalue.c++
index 4eb3298fec7..20a9614ed93 100644
--- a/src/workerd/jsg/jsvalue.c++
+++ b/src/workerd/jsg/jsvalue.c++
@@ -377,10 +377,6 @@ JsString JsString::internalize(Lock& js) const {
   return JsString(inner->InternalizeString(js.v8Isolate));
 }
 
-void JsString::writeOneByte(Lock& js, kj::ArrayPtr<kj::byte> buffer, WriteFlags flags) {
-  inner->WriteOneByteV2(js.v8Isolate, 0, buffer.size(), buffer.begin(), flags);
-}
-
 JsString::WriteIntoStatus JsString::writeInto(
     Lock& js, kj::ArrayPtr<char> buffer, WriteFlags options) const {
   WriteIntoStatus result = {0, 0};
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index f487f713881..2c9c1f55fde 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -312,8 +312,6 @@ class JsString final: public JsBase<v8::String, JsString> {
   WriteIntoStatus writeInto(
       Lock& js, kj::ArrayPtr<uint16_t> buffer, WriteFlags options = WriteFlags::NONE) const;
 
-  void writeOneByte(Lock& js, kj::ArrayPtr<kj::byte> buffer, WriteFlags flags = WriteFlags::NONE);
-
   using JsBase<v8::String, JsString>::JsBase;
 };
 

From c4395650cfb30b1ebba986c3cc819280e1e5885f Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Wed, 12 Nov 2025 12:21:18 -0500
Subject: [PATCH 08/29] optimize encodeInto

---
 src/workerd/api/encoding.c++ | 139 ++++++++++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 147edbc60b1..5dd02f67148 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -13,6 +13,9 @@
 
 #include <unicode/ucnv.h>
 #include <unicode/utf8.h>
+#include <v8.h>
+
+#include <kj/string.h>
 
 #include <algorithm>
 
@@ -690,13 +693,141 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   return jsg::JsUint8Array(array);
 }
 
+namespace {
+
+// Binary search to find how many Latin-1 characters fit when converted to UTF-8.
+// Latin-1 bytes 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF encode as 2 UTF-8 bytes.
+size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
+  size_t left = 0;
+  size_t right = length;
+  size_t bestFit = 0;
+
+  while (left <= right) {
+    size_t mid = left + (right - left) / 2;
+    if (mid == 0) break;
+
+    size_t midUtf8Length = simdutf::utf8_length_from_latin1(data, mid);
+    if (midUtf8Length <= bufferSize) {
+      bestFit = mid;
+      left = mid + 1;
+    } else {
+      right = mid - 1;
+    }
+  }
+
+  return bestFit;
+}
+
+// Binary search to find how many UTF-16 code units fit when converted to UTF-8.
+// Ensures surrogate pairs (0xD800-0xDFFF) are never split across the boundary.
+size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize) {
+  size_t left = 0;
+  size_t right = length;
+  size_t bestFit = 0;
+
+  while (left <= right) {
+    size_t mid = left + (right - left) / 2;
+    if (mid == 0) break;
+
+    // Don't split surrogate pairs - adjust backwards if mid lands after a high surrogate
+    size_t adjustedMid = mid;
+    if (adjustedMid > 0 && adjustedMid < length) {
+      char16_t prev = data[adjustedMid - 1];
+      if (prev >= 0xD800 && prev < 0xDC00) {
+        adjustedMid--;
+      }
+    }
+
+    if (adjustedMid == 0) {
+      right = 0;
+      break;
+    }
+
+    size_t midUtf8Length = simdutf::utf8_length_from_utf16(data, adjustedMid);
+    if (midUtf8Length <= bufferSize) {
+      bestFit = adjustedMid;
+      left = adjustedMid + 1;
+    } else {
+      right = adjustedMid - 1;
+    }
+  }
+
+  return bestFit;
+}
+
+}  // namespace
+
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr<char>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+  auto outputBuf = buffer.asArrayPtr<char>();
+  size_t bufferSize = outputBuf.size();
+
+  // ValueView provides zero-copy access to V8's internal string representation.
+  // V8 stores strings as either Latin-1 (one byte per character) or UTF-16.
+  v8::String::ValueView view(js.v8Isolate, input);
+  uint32_t length = view.length();
+
+  if (view.is_one_byte()) {
+    // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
+    auto data = reinterpret_cast<const char*>(view.data8());
+    size_t utf8Length = simdutf::utf8_length_from_latin1(data, length);
+
+    if (utf8Length <= bufferSize) {
+      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Buffer too small - find how many characters fit
+    size_t bestFit = findBestFitLatin1(data, length, bufferSize);
+    size_t written = simdutf::convert_latin1_to_utf8(data, bestFit, outputBuf.begin());
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(bestFit),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // UTF-16 path: check for invalid surrogate pairs first
+  auto data = reinterpret_cast<const char16_t*>(view.data16());
+
+  if (simdutf::validate_utf16(data, length)) {
+    size_t utf8Length = simdutf::utf8_length_from_utf16(data, length);
+    if (utf8Length <= bufferSize) {
+      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    size_t bestFit = findBestFitUtf16(data, length, bufferSize);
+    size_t written = simdutf::convert_utf16_to_utf8(data, bestFit, outputBuf.begin());
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(bestFit),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Invalid UTF-16: normalize unpaired surrogates to U+FFFD before converting
+  kj::SmallArray<char16_t, 4096> tempBuf(length);
+  simdutf::to_well_formed_utf16(data, length, tempBuf.begin());
+
+  size_t utf8Length = simdutf::utf8_length_from_utf16(tempBuf.begin(), length);
+  if (utf8Length <= bufferSize) {
+    size_t written = simdutf::convert_utf16_to_utf8(tempBuf.begin(), length, outputBuf.begin());
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(length),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  size_t bestFit = findBestFitUtf16(tempBuf.begin(), length, bufferSize);
+  size_t written = simdutf::convert_utf16_to_utf8(tempBuf.begin(), bestFit, outputBuf.begin());
   return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
+    .read = static_cast<int>(bestFit),
+    .written = static_cast<int>(written),
   };
 }
 

From abef75cfd39c668a60788f96469a410de30b9c43 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Wed, 12 Nov 2025 14:04:09 -0500
Subject: [PATCH 09/29] optimize ASCII paths

---
 src/workerd/api/encoding.c++ | 87 +++++++++++++++++++++++++-----------
 1 file changed, 60 insertions(+), 27 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 5dd02f67148..c4848883722 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -576,7 +576,8 @@ inline void encodeSurrogatePair(char16_t lead, char16_t trail, kj::ArrayPtr<char
 
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
-void convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
+// Returns the number of UTF-8 bytes written.
+size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
   size_t position = 0;
   bool pendingSurrogate = false;
 
@@ -607,8 +608,10 @@ void convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<
   }
 
   if (pendingSurrogate) {
-    encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
+    position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
   }
+
+  return position;
 }
 
 }  // namespace
@@ -755,6 +758,43 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
   return bestFit;
 }
 
+// Binary search to find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8.
+// Ensures surrogate pairs are never split, and unpaired surrogates are replaced with U+FFFD.
+size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t bufferSize) {
+  size_t left = 0;
+  size_t right = length;
+  size_t bestFit = 0;
+
+  while (left <= right) {
+    size_t mid = left + (right - left) / 2;
+    if (mid == 0) break;
+
+    // Don't split surrogate pairs - adjust backwards if mid lands after a high surrogate
+    size_t adjustedMid = mid;
+    if (adjustedMid > 0 && adjustedMid < length) {
+      char16_t prev = data[adjustedMid - 1];
+      if (prev >= 0xD800 && prev < 0xDC00) {
+        adjustedMid--;
+      }
+    }
+
+    if (adjustedMid == 0) {
+      right = 0;
+      break;
+    }
+
+    size_t midUtf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, adjustedMid));
+    if (midUtf8Length <= bufferSize) {
+      bestFit = adjustedMid;
+      left = adjustedMid + 1;
+    } else {
+      right = adjustedMid - 1;
+    }
+  }
+
+  return bestFit;
+}
+
 }  // namespace
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
@@ -762,39 +802,36 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   auto outputBuf = buffer.asArrayPtr<char>();
   size_t bufferSize = outputBuf.size();
 
-  // ValueView provides zero-copy access to V8's internal string representation.
-  // V8 stores strings as either Latin-1 (one byte per character) or UTF-16.
   v8::String::ValueView view(js.v8Isolate, input);
   uint32_t length = view.length();
 
   if (view.is_one_byte()) {
     // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
     auto data = reinterpret_cast<const char*>(view.data8());
-    size_t utf8Length = simdutf::utf8_length_from_latin1(data, length);
 
-    if (utf8Length <= bufferSize) {
-      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
+    // Fast path: avoid length calculation when we can prove the string fits.
+    // Check worst-case (2x), ASCII (1:1), or calculate exact length as fallback.
+    size_t read = length;
+    if (!(length * 2 <= bufferSize ||
+            (length <= bufferSize && simdutf::validate_ascii(data, length)) ||
+            simdutf::utf8_length_from_latin1(data, length) <= bufferSize)) {
+      // Binary search to find how many characters fit
+      read = findBestFitLatin1(data, length, bufferSize);
     }
 
-    // Buffer too small - find how many characters fit
-    size_t bestFit = findBestFitLatin1(data, length, bufferSize);
-    size_t written = simdutf::convert_latin1_to_utf8(data, bestFit, outputBuf.begin());
+    size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(bestFit),
+      .read = static_cast<int>(read),
       .written = static_cast<int>(written),
     };
   }
 
-  // UTF-16 path: check for invalid surrogate pairs first
+  // UTF-16 path: validate to ensure spec compliance (replace invalid surrogates with U+FFFD)
   auto data = reinterpret_cast<const char16_t*>(view.data16());
 
   if (simdutf::validate_utf16(data, length)) {
-    size_t utf8Length = simdutf::utf8_length_from_utf16(data, length);
-    if (utf8Length <= bufferSize) {
+    // Valid UTF-16: use fast SIMD conversion
+    if (simdutf::utf8_length_from_utf16(data, length) <= bufferSize) {
       size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -810,21 +847,17 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     };
   }
 
-  // Invalid UTF-16: normalize unpaired surrogates to U+FFFD before converting
-  kj::SmallArray<char16_t, 4096> tempBuf(length);
-  simdutf::to_well_formed_utf16(data, length, tempBuf.begin());
-
-  size_t utf8Length = simdutf::utf8_length_from_utf16(tempBuf.begin(), length);
-  if (utf8Length <= bufferSize) {
-    size_t written = simdutf::convert_utf16_to_utf8(tempBuf.begin(), length, outputBuf.begin());
+  // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
+  if (utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize) {
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(length),
       .written = static_cast<int>(written),
     };
   }
 
-  size_t bestFit = findBestFitUtf16(tempBuf.begin(), length, bufferSize);
-  size_t written = simdutf::convert_utf16_to_utf8(tempBuf.begin(), bestFit, outputBuf.begin());
+  size_t bestFit = findBestFitInvalidUtf16(data, length, bufferSize);
+  size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, bestFit), outputBuf);
   return TextEncoder::EncodeIntoResult{
     .read = static_cast<int>(bestFit),
     .written = static_cast<int>(written),

From de0de3878e99f60d7e7bb1b403ca4ca243ec493c Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Wed, 12 Nov 2025 14:55:02 -0500
Subject: [PATCH 10/29] add fast path that avoids length calculation

---
 src/workerd/api/encoding.c++ | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index c4848883722..16117342771 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -831,7 +831,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
 
   if (simdutf::validate_utf16(data, length)) {
     // Valid UTF-16: use fast SIMD conversion
-    if (simdutf::utf8_length_from_utf16(data, length) <= bufferSize) {
+    // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
+    if (length * 3 <= bufferSize || simdutf::utf8_length_from_utf16(data, length) <= bufferSize) {
       size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -848,7 +849,9 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   }
 
   // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
-  if (utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize) {
+  // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
+  if (length * 3 <= bufferSize ||
+      utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize) {
     size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(length),

From 06b03497a0db1d24949c0ba7fbc8d55371407236 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Wed, 12 Nov 2025 15:12:53 -0500
Subject: [PATCH 11/29] make the code reviewable

---
 src/workerd/api/encoding.c++ | 152 ++++++++++++++++++-----------------
 1 file changed, 77 insertions(+), 75 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 16117342771..2e862c982e1 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -500,51 +500,50 @@ constexpr inline bool isTrailSurrogate(char16_t c) {
   return 0xDC00 <= c && c <= 0xDFFF;
 }
 
-// Calculate the number of UTF-8 bytes needed for a single UTF-16 code unit
-constexpr inline size_t utf8BytesForCodeUnit(char16_t c) {
-  if (c < 0x80) return 1;
-  if (c < 0x800) return 2;
-  return 3;
-}
-
 // Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
 // Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
+// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
 size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
+  size_t inputPos = 0;
   size_t utf8Length = 0;
-  bool pendingSurrogate = false;
 
-  for (size_t i = 0; i < input.size(); i++) {
-    char16_t c = input[i];
+  while (inputPos < input.size()) {
+    // Find the next invalid surrogate using SIMD validation
+    auto result =
+        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
+
+    if (result.error == simdutf::error_code::SUCCESS) {
+      // Remaining input is valid - calculate length with SIMD
+      utf8Length +=
+          simdutf::utf8_length_from_utf16(input.begin() + inputPos, input.size() - inputPos);
+      break;
+    }
+
+    if (result.error == simdutf::error_code::SURROGATE) {
+      // Calculate length for the valid portion before the error with SIMD
+      if (result.count > 0) {
+        utf8Length += simdutf::utf8_length_from_utf16(input.begin() + inputPos, result.count);
+        inputPos += result.count;
+      }
 
-    if (pendingSurrogate) {
-      if (isTrailSurrogate(c)) {
+      // Handle the invalid surrogate at inputPos
+      char16_t c = input[inputPos];
+      if (isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+          isTrailSurrogate(input[inputPos + 1])) {
         // Valid surrogate pair = 4 bytes in UTF-8
         utf8Length += 4;
-        pendingSurrogate = false;
+        inputPos += 2;
       } else {
-        // Unpaired lead surrogate = U+FFFD (3 bytes)
+        // Invalid surrogate = U+FFFD (3 bytes)
         utf8Length += 3;
-        if (!isLeadSurrogate(c)) {
-          utf8Length += utf8BytesForCodeUnit(c);
-          pendingSurrogate = false;
-        }
+        inputPos++;
       }
-    } else if (isLeadSurrogate(c)) {
-      pendingSurrogate = true;
     } else {
-      if (isTrailSurrogate(c)) {
-        // Unpaired trail surrogate = U+FFFD (3 bytes)
-        utf8Length += 3;
-      } else {
-        utf8Length += utf8BytesForCodeUnit(c);
-      }
+      // Unexpected error - fall back to scalar calculation for safety
+      break;
     }
   }
 
-  if (pendingSurrogate) {
-    utf8Length += 3;  // Trailing unpaired lead surrogate
-  }
-
   return utf8Length;
 }
 
@@ -577,41 +576,51 @@ inline void encodeSurrogatePair(char16_t lead, char16_t trail, kj::ArrayPtr<char
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
 // Returns the number of UTF-8 bytes written.
+// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
 size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
-  size_t position = 0;
-  bool pendingSurrogate = false;
+  size_t inputPos = 0;
+  size_t outputPos = 0;
+
+  while (inputPos < input.size()) {
+    // Find the next invalid surrogate using SIMD validation
+    auto result =
+        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
+
+    if (result.error == simdutf::error_code::SUCCESS) {
+      // Remaining input is valid - convert it all with SIMD
+      outputPos += simdutf::convert_utf16_to_utf8(
+          input.begin() + inputPos, input.size() - inputPos, out.begin() + outputPos);
+      break;
+    }
 
-  for (size_t i = 0; i < input.size(); i++) {
-    char16_t c = input[i];
+    if (result.error == simdutf::error_code::SURROGATE) {
+      // Convert the valid portion before the error with SIMD
+      if (result.count > 0) {
+        outputPos += simdutf::convert_valid_utf16_to_utf8(
+            input.begin() + inputPos, result.count, out.begin() + outputPos);
+        inputPos += result.count;
+      }
 
-    if (pendingSurrogate) {
-      if (isTrailSurrogate(c)) {
-        encodeSurrogatePair(input[i - 1], c, out.slice(position, out.size()));
-        position += 4;
-        pendingSurrogate = false;
+      // Handle the invalid surrogate at inputPos
+      char16_t c = input[inputPos];
+      if (isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+          isTrailSurrogate(input[inputPos + 1])) {
+        // Valid surrogate pair - encode it (this shouldn't happen if SURROGATE error)
+        encodeSurrogatePair(c, input[inputPos + 1], out.slice(outputPos, out.size()));
+        outputPos += 4;
+        inputPos += 2;
       } else {
-        position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
-        if (!isLeadSurrogate(c)) {
-          position += encodeUtf8CodeUnit(c, out.slice(position, out.size()));
-          pendingSurrogate = false;
-        }
+        // Invalid surrogate - replace with U+FFFD (3 bytes)
+        outputPos += encodeUtf8CodeUnit(0xFFFD, out.slice(outputPos, out.size()));
+        inputPos++;
       }
-    } else if (isLeadSurrogate(c)) {
-      pendingSurrogate = true;
     } else {
-      if (isTrailSurrogate(c)) {
-        position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
-      } else {
-        position += encodeUtf8CodeUnit(c, out.slice(position, out.size()));
-      }
+      // Unexpected error - fall back to scalar processing for safety
+      break;
     }
   }
 
-  if (pendingSurrogate) {
-    position += encodeUtf8CodeUnit(0xFFFD, out.slice(position, out.size()));
-  }
-
-  return position;
+  return outputPos;
 }
 
 }  // namespace
@@ -832,37 +841,30 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   if (simdutf::validate_utf16(data, length)) {
     // Valid UTF-16: use fast SIMD conversion
     // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
-    if (length * 3 <= bufferSize || simdutf::utf8_length_from_utf16(data, length) <= bufferSize) {
-      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
+    size_t read = length;
+    if (!(length * 3 <= bufferSize ||
+            simdutf::utf8_length_from_utf16(data, length) <= bufferSize)) {
+      read = findBestFitUtf16(data, length, bufferSize);
     }
 
-    size_t bestFit = findBestFitUtf16(data, length, bufferSize);
-    size_t written = simdutf::convert_utf16_to_utf8(data, bestFit, outputBuf.begin());
+    size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(bestFit),
+      .read = static_cast<int>(read),
       .written = static_cast<int>(written),
     };
   }
 
   // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
   // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
-  if (length * 3 <= bufferSize ||
-      utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize) {
-    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(length),
-      .written = static_cast<int>(written),
-    };
+  size_t read = length;
+  if (!(length * 3 <= bufferSize ||
+          utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize)) {
+    read = findBestFitInvalidUtf16(data, length, bufferSize);
   }
 
-  size_t bestFit = findBestFitInvalidUtf16(data, length, bufferSize);
-  size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, bestFit), outputBuf);
+  size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
   return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(bestFit),
+    .read = static_cast<int>(read),
     .written = static_cast<int>(written),
   };
 }

From 9e892822c036ed8901635ef19829b3e5441bcca2 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 10:19:40 -0500
Subject: [PATCH 12/29] address pr reviews

---
 src/workerd/api/encoding.c++ | 91 +++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 2e862c982e1..7b1b88cc9f3 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -493,11 +493,11 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 namespace {
 
 constexpr inline bool isLeadSurrogate(char16_t c) {
-  return 0xD800 <= c && c < 0xDC00;
+  return (c & 0xFC00) == 0xD800;
 }
 
 constexpr inline bool isTrailSurrogate(char16_t c) {
-  return 0xDC00 <= c && c <= 0xDFFF;
+  return (c & 0xFC00) == 0xDC00;
 }
 
 // Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
@@ -527,20 +527,18 @@ size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
       }
 
       // Handle the invalid surrogate at inputPos
+      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
       char16_t c = input[inputPos];
-      if (isLeadSurrogate(c) && inputPos + 1 < input.size() &&
-          isTrailSurrogate(input[inputPos + 1])) {
-        // Valid surrogate pair = 4 bytes in UTF-8
-        utf8Length += 4;
-        inputPos += 2;
-      } else {
-        // Invalid surrogate = U+FFFD (3 bytes)
-        utf8Length += 3;
-        inputPos++;
-      }
+      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+                     isTrailSurrogate(input[inputPos + 1])),
+          "Valid surrogate pair should not trigger SURROGATE error");
+
+      // Invalid surrogate = U+FFFD (3 bytes)
+      utf8Length += 3;
+      inputPos++;
     } else {
-      // Unexpected error - fall back to scalar calculation for safety
-      break;
+      KJ_FAIL_REQUIRE(
+          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
     }
   }
 
@@ -550,13 +548,16 @@ size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
 // Encode a single UTF-16 code unit to UTF-8
 inline size_t encodeUtf8CodeUnit(char16_t c, kj::ArrayPtr<char> out) {
   if (c < 0x80) {
+    KJ_DASSERT(out.size() >= 1);
     out[0] = static_cast<char>(c);
     return 1;
   } else if (c < 0x800) {
+    KJ_DASSERT(out.size() >= 2);
     out[0] = static_cast<char>(0xC0 | (c >> 6));
     out[1] = static_cast<char>(0x80 | (c & 0x3F));
     return 2;
   } else {
+    KJ_DASSERT(out.size() >= 3);
     out[0] = static_cast<char>(0xE0 | (c >> 12));
     out[1] = static_cast<char>(0x80 | ((c >> 6) & 0x3F));
     out[2] = static_cast<char>(0x80 | (c & 0x3F));
@@ -564,15 +565,6 @@ inline size_t encodeUtf8CodeUnit(char16_t c, kj::ArrayPtr<char> out) {
   }
 }
 
-// Encode a valid surrogate pair to UTF-8
-inline void encodeSurrogatePair(char16_t lead, char16_t trail, kj::ArrayPtr<char> out) {
-  uint32_t codepoint = 0x10000 + (((lead & 0x3FF) << 10) | (trail & 0x3FF));
-  out[0] = static_cast<char>(0xF0 | (codepoint >> 18));
-  out[1] = static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F));
-  out[2] = static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F));
-  out[3] = static_cast<char>(0x80 | (codepoint & 0x3F));
-}
-
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
 // Returns the number of UTF-8 bytes written.
@@ -590,6 +582,7 @@ size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPt
       // Remaining input is valid - convert it all with SIMD
       outputPos += simdutf::convert_utf16_to_utf8(
           input.begin() + inputPos, input.size() - inputPos, out.begin() + outputPos);
+      KJ_DASSERT(outputPos <= out.size());
       break;
     }
 
@@ -598,25 +591,24 @@ size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPt
       if (result.count > 0) {
         outputPos += simdutf::convert_valid_utf16_to_utf8(
             input.begin() + inputPos, result.count, out.begin() + outputPos);
+        KJ_DASSERT(outputPos <= out.size());
         inputPos += result.count;
       }
 
       // Handle the invalid surrogate at inputPos
+      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
       char16_t c = input[inputPos];
-      if (isLeadSurrogate(c) && inputPos + 1 < input.size() &&
-          isTrailSurrogate(input[inputPos + 1])) {
-        // Valid surrogate pair - encode it (this shouldn't happen if SURROGATE error)
-        encodeSurrogatePair(c, input[inputPos + 1], out.slice(outputPos, out.size()));
-        outputPos += 4;
-        inputPos += 2;
-      } else {
-        // Invalid surrogate - replace with U+FFFD (3 bytes)
-        outputPos += encodeUtf8CodeUnit(0xFFFD, out.slice(outputPos, out.size()));
-        inputPos++;
-      }
+      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+                     isTrailSurrogate(input[inputPos + 1])),
+          "Valid surrogate pair should not trigger SURROGATE error");
+
+      // Invalid surrogate - replace with U+FFFD (3 bytes)
+      outputPos += encodeUtf8CodeUnit(0xFFFD, out.slice(outputPos, out.size()));
+      KJ_DASSERT(outputPos <= out.size());
+      inputPos++;
     } else {
-      // Unexpected error - fall back to scalar processing for safety
-      break;
+      KJ_FAIL_REQUIRE(
+          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
     }
   }
 
@@ -745,7 +737,7 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
     size_t adjustedMid = mid;
     if (adjustedMid > 0 && adjustedMid < length) {
       char16_t prev = data[adjustedMid - 1];
-      if (prev >= 0xD800 && prev < 0xDC00) {
+      if (isLeadSurrogate(prev)) {
         adjustedMid--;
       }
     }
@@ -782,7 +774,7 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
     size_t adjustedMid = mid;
     if (adjustedMid > 0 && adjustedMid < length) {
       char16_t prev = data[adjustedMid - 1];
-      if (prev >= 0xD800 && prev < 0xDC00) {
+      if (isLeadSurrogate(prev)) {
         adjustedMid--;
       }
     }
@@ -818,16 +810,27 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
     auto data = reinterpret_cast<const char*>(view.data8());
 
-    // Fast path: avoid length calculation when we can prove the string fits.
-    // Check worst-case (2x), ASCII (1:1), or calculate exact length as fallback.
+    // Determine if we need binary search using short-circuit evaluation to minimize checks
     size_t read = length;
-    if (!(length * 2 <= bufferSize ||
-            (length <= bufferSize && simdutf::validate_ascii(data, length)) ||
-            simdutf::utf8_length_from_latin1(data, length) <= bufferSize)) {
-      // Binary search to find how many characters fit
+    size_t utf8Length = 0;
+    bool needsBinarySearch = !(length * 2 <= bufferSize ||  // Fast: worst-case (2x) fits
+        (length <= bufferSize && simdutf::validate_ascii(data, length)) ||           // ASCII check
+        (utf8Length = simdutf::utf8_length_from_latin1(data, length)) <= bufferSize  // Exact length
+    );
+
+    if (needsBinarySearch) {
       read = findBestFitLatin1(data, length, bufferSize);
     }
 
+    // ASCII fast path: use memcpy instead of conversion
+    if (utf8Length == length || (utf8Length == 0 && simdutf::validate_ascii(data, read))) {
+      memcpy(outputBuf.begin(), data, read);
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(read),
+        .written = static_cast<int>(read),
+      };
+    }
+
     size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(read),

From 2bfb85a6dffa1739623146c4702fc07e4d9ed7a1 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 10:49:28 -0500
Subject: [PATCH 13/29] more optimizations

---
 src/workerd/api/encoding.c++ | 370 +++++++++++++++++++++++++----------
 1 file changed, 267 insertions(+), 103 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 7b1b88cc9f3..74fe800b49c 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -496,7 +496,7 @@ constexpr inline bool isLeadSurrogate(char16_t c) {
   return (c & 0xFC00) == 0xD800;
 }
 
-constexpr inline bool isTrailSurrogate(char16_t c) {
+[[maybe_unused]] constexpr inline bool isTrailSurrogate(char16_t c) {
   return (c & 0xFC00) == 0xDC00;
 }
 
@@ -629,20 +629,22 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   // Fast path: check if string is one-byte before creating ValueView
   if (str.isOneByte(js)) {
     auto length = str.length(js);
-    // Allocate buffer for Latin-1. Use v8::ArrayBuffer::NewBackingStore to avoid creating
-    // JS objects during conversion.
-    backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
-    auto backingData = reinterpret_cast<kj::byte*>(backingStore->Data());
+    // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
+    // and potentially triggering GC. Stack allocation for small strings, heap for large.
+    kj::SmallArray<kj::byte, 4096> latin1Buffer(length);
 
-    [[maybe_unused]] auto writeResult = str.writeInto(js, kj::arrayPtr(backingData, length));
+    [[maybe_unused]] auto writeResult = str.writeInto(js, latin1Buffer.asPtr());
     KJ_DASSERT(
         writeResult.written == length, "writeInto must completely overwrite the backing buffer");
 
-    utf8_length =
-        simdutf::utf8_length_from_latin1(reinterpret_cast<const char*>(backingData), length);
+    utf8_length = simdutf::utf8_length_from_latin1(
+        reinterpret_cast<const char*>(latin1Buffer.begin()), length);
 
     if (utf8_length == length) {
       // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
+      // Allocate final on-heap buffer and copy
+      backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
+      memcpy(backingStore->Data(), latin1Buffer.begin(), length);
       auto array = v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, length);
       return jsg::JsUint8Array(array);
     }
@@ -650,14 +652,13 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     KJ_DASSERT(utf8_length > length);
 
     // Need to convert Latin-1 to UTF-8
-    std::shared_ptr<v8::BackingStore> backingStore2 =
-        js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
     [[maybe_unused]] auto written =
-        simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(backingData), length,
-            reinterpret_cast<char*>(backingStore2->Data()));
+        simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()), length,
+            reinterpret_cast<char*>(backingStore->Data()));
     KJ_DASSERT(utf8_length == written);
     auto array =
-        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore2), 0, utf8_length);
+        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
     return jsg::JsUint8Array(array);
   }
 
@@ -699,101 +700,192 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
 
 namespace {
 
-// Binary search to find how many Latin-1 characters fit when converted to UTF-8.
+// Forward scan to find how many Latin-1 characters fit when converted to UTF-8.
+// Uses SIMD for fast processing while maintaining O(result) complexity.
 // Latin-1 bytes 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF encode as 2 UTF-8 bytes.
 size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
-  size_t left = 0;
-  size_t right = length;
-  size_t bestFit = 0;
-
-  while (left <= right) {
-    size_t mid = left + (right - left) / 2;
-    if (mid == 0) break;
-
-    size_t midUtf8Length = simdutf::utf8_length_from_latin1(data, mid);
-    if (midUtf8Length <= bufferSize) {
-      bestFit = mid;
-      left = mid + 1;
-    } else {
-      right = mid - 1;
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+
+  // Process in chunks using SIMD for speed
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
+    size_t chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // This chunk would overflow - binary search within this chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        size_t midUtf8Length = simdutf::utf8_length_from_latin1(data + pos, mid);
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = mid;
+          left = mid + 1;
+        } else {
+          right = mid - 1;
+        }
+      }
+
+      return pos + bestFit;
     }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
   }
 
-  return bestFit;
+  return pos;
 }
 
-// Binary search to find how many UTF-16 code units fit when converted to UTF-8.
+// Forward scan to find how many UTF-16 code units fit when converted to UTF-8.
+// Uses SIMD for fast processing while maintaining O(result) complexity.
 // Ensures surrogate pairs (0xD800-0xDFFF) are never split across the boundary.
 size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize) {
-  size_t left = 0;
-  size_t right = length;
-  size_t bestFit = 0;
-
-  while (left <= right) {
-    size_t mid = left + (right - left) / 2;
-    if (mid == 0) break;
-
-    // Don't split surrogate pairs - adjust backwards if mid lands after a high surrogate
-    size_t adjustedMid = mid;
-    if (adjustedMid > 0 && adjustedMid < length) {
-      char16_t prev = data[adjustedMid - 1];
-      if (isLeadSurrogate(prev)) {
-        adjustedMid--;
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+
+  // Process in chunks using SIMD for speed
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
+
+    // Adjust chunk to not split surrogate pairs
+    if (pos + chunkSize < length && chunkSize > 0) {
+      char16_t last = data[pos + chunkSize - 1];
+      if (isLeadSurrogate(last)) {
+        chunkSize--;
       }
     }
 
-    if (adjustedMid == 0) {
-      right = 0;
-      break;
+    if (chunkSize == 0) {
+      // Edge case: chunk would be empty, process at least 2 code units (surrogate pair)
+      chunkSize = (remaining >= 2) ? 2 : remaining;
     }
 
-    size_t midUtf8Length = simdutf::utf8_length_from_utf16(data, adjustedMid);
-    if (midUtf8Length <= bufferSize) {
-      bestFit = adjustedMid;
-      left = adjustedMid + 1;
-    } else {
-      right = adjustedMid - 1;
+    size_t chunkUtf8Len = simdutf::utf8_length_from_utf16(data + pos, chunkSize);
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // This chunk would overflow - binary search within this chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        // Don't split surrogate pairs
+        size_t adjustedMid = mid;
+        if (adjustedMid > 0 && pos + adjustedMid < length) {
+          char16_t prev = data[pos + adjustedMid - 1];
+          if (isLeadSurrogate(prev)) {
+            adjustedMid--;
+          }
+        }
+
+        if (adjustedMid == 0) {
+          right = 0;
+          break;
+        }
+
+        size_t midUtf8Length = simdutf::utf8_length_from_utf16(data + pos, adjustedMid);
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = adjustedMid;
+          left = adjustedMid + 1;
+        } else {
+          right = adjustedMid - 1;
+        }
+      }
+
+      return pos + bestFit;
     }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
   }
 
-  return bestFit;
+  return pos;
 }
 
-// Binary search to find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8.
+// Forward scan to find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8.
+// Uses SIMD for fast processing while maintaining O(result) complexity.
 // Ensures surrogate pairs are never split, and unpaired surrogates are replaced with U+FFFD.
 size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t bufferSize) {
-  size_t left = 0;
-  size_t right = length;
-  size_t bestFit = 0;
-
-  while (left <= right) {
-    size_t mid = left + (right - left) / 2;
-    if (mid == 0) break;
-
-    // Don't split surrogate pairs - adjust backwards if mid lands after a high surrogate
-    size_t adjustedMid = mid;
-    if (adjustedMid > 0 && adjustedMid < length) {
-      char16_t prev = data[adjustedMid - 1];
-      if (isLeadSurrogate(prev)) {
-        adjustedMid--;
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+
+  // Process in chunks using SIMD for speed
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
+
+    // Adjust chunk to not split surrogate pairs
+    if (pos + chunkSize < length && chunkSize > 0) {
+      char16_t last = data[pos + chunkSize - 1];
+      if (isLeadSurrogate(last)) {
+        chunkSize--;
       }
     }
 
-    if (adjustedMid == 0) {
-      right = 0;
-      break;
+    if (chunkSize == 0) {
+      // Edge case: chunk would be empty, process at least 2 code units (surrogate pair)
+      chunkSize = (remaining >= 2) ? 2 : remaining;
     }
 
-    size_t midUtf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, adjustedMid));
-    if (midUtf8Length <= bufferSize) {
-      bestFit = adjustedMid;
-      left = adjustedMid + 1;
-    } else {
-      right = adjustedMid - 1;
+    size_t chunkUtf8Len = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, chunkSize));
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // This chunk would overflow - binary search within this chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        // Don't split surrogate pairs
+        size_t adjustedMid = mid;
+        if (adjustedMid > 0 && pos + adjustedMid < length) {
+          char16_t prev = data[pos + adjustedMid - 1];
+          if (isLeadSurrogate(prev)) {
+            adjustedMid--;
+          }
+        }
+
+        if (adjustedMid == 0) {
+          right = 0;
+          break;
+        }
+
+        size_t midUtf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, adjustedMid));
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = adjustedMid;
+          left = adjustedMid + 1;
+        } else {
+          right = adjustedMid - 1;
+        }
+      }
+
+      return pos + bestFit;
     }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
   }
 
-  return bestFit;
+  return pos;
 }
 
 }  // namespace
@@ -810,27 +902,49 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
     auto data = reinterpret_cast<const char*>(view.data8());
 
-    // Determine if we need binary search using short-circuit evaluation to minimize checks
-    size_t read = length;
-    size_t utf8Length = 0;
-    bool needsBinarySearch = !(length * 2 <= bufferSize ||  // Fast: worst-case (2x) fits
-        (length <= bufferSize && simdutf::validate_ascii(data, length)) ||           // ASCII check
-        (utf8Length = simdutf::utf8_length_from_latin1(data, length)) <= bufferSize  // Exact length
-    );
+    // Optimize for incremental encoding: if buffer is much smaller than input,
+    // skip all "whole string fits" checks and go straight to forward scan
+    if (length > bufferSize * 2) {
+      // Incremental mode: forward scan to find what fits, then convert
+      size_t read = findBestFitLatin1(data, length, bufferSize);
+      size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(read),
+        .written = static_cast<int>(written),
+      };
+    }
 
-    if (needsBinarySearch) {
-      read = findBestFitLatin1(data, length, bufferSize);
+    // Buffer might fit most/all of string: try optimized fast paths
+    // Fast path 1: Worst-case (2x) definitely fits
+    if (length * 2 <= bufferSize) {
+      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
     }
 
-    // ASCII fast path: use memcpy instead of conversion
-    if (utf8Length == length || (utf8Length == 0 && simdutf::validate_ascii(data, read))) {
-      memcpy(outputBuf.begin(), data, read);
+    // Fast path 2: Check if ASCII (which is 1:1 Latin-1 to UTF-8)
+    if (length <= bufferSize && simdutf::validate_ascii(data, length)) {
+      memcpy(outputBuf.begin(), data, length);
       return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(read),
-        .written = static_cast<int>(read),
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(length),
       };
     }
 
+    // Slow path: Calculate exact UTF-8 length to determine if it fits
+    size_t utf8Length = simdutf::utf8_length_from_latin1(data, length);
+    if (utf8Length <= bufferSize) {
+      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Doesn't fit: forward scan to find what does
+    size_t read = findBestFitLatin1(data, length, bufferSize);
     size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(read),
@@ -843,13 +957,38 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
 
   if (simdutf::validate_utf16(data, length)) {
     // Valid UTF-16: use fast SIMD conversion
-    // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
-    size_t read = length;
-    if (!(length * 3 <= bufferSize ||
-            simdutf::utf8_length_from_utf16(data, length) <= bufferSize)) {
-      read = findBestFitUtf16(data, length, bufferSize);
+
+    // Incremental mode: buffer much smaller than input, skip "whole string fits" checks
+    if (length > bufferSize) {
+      size_t read = findBestFitUtf16(data, length, bufferSize);
+      size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(read),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Fast path: worst-case (3 bytes per UTF-16 code unit) fits
+    if (length * 3 <= bufferSize) {
+      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Slow path: calculate exact UTF-8 length
+    size_t utf8Length = simdutf::utf8_length_from_utf16(data, length);
+    if (utf8Length <= bufferSize) {
+      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
     }
 
+    // Doesn't fit: forward scan to find what does
+    size_t read = findBestFitUtf16(data, length, bufferSize);
     size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(read),
@@ -858,13 +997,38 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   }
 
   // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
-  // Fast path: skip length calculation if worst-case UTF-8 size fits (3 bytes per code unit)
-  size_t read = length;
-  if (!(length * 3 <= bufferSize ||
-          utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length)) <= bufferSize)) {
-    read = findBestFitInvalidUtf16(data, length, bufferSize);
+
+  // Incremental mode: buffer much smaller than input, skip "whole string fits" checks
+  if (length > bufferSize) {
+    size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(read),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Fast path: worst-case (3 bytes per UTF-16 code unit) fits
+  if (length * 3 <= bufferSize) {
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(length),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Slow path: calculate exact UTF-8 length
+  size_t utf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+  if (utf8Length <= bufferSize) {
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(length),
+      .written = static_cast<int>(written),
+    };
   }
 
+  // Doesn't fit: forward scan to find what does
+  size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
   size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
   return TextEncoder::EncodeIntoResult{
     .read = static_cast<int>(read),

From 022e1a2cc8168ceb751b5da99afe1d11cd953211 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 11:06:19 -0500
Subject: [PATCH 14/29] make the code reviewable

---
 src/workerd/api/encoding.c++ | 53 +++++++++++++++---------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 74fe800b49c..353a44c776d 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -700,14 +700,11 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
 
 namespace {
 
-// Forward scan to find how many Latin-1 characters fit when converted to UTF-8.
-// Uses SIMD for fast processing while maintaining O(result) complexity.
-// Latin-1 bytes 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF encode as 2 UTF-8 bytes.
+// Find how many Latin-1 characters fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity
 size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
-
-  // Process in chunks using SIMD for speed
   constexpr size_t CHUNK = 256;
 
   while (pos < length) {
@@ -716,7 +713,7 @@ size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
     size_t chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // This chunk would overflow - binary search within this chunk
+      // Chunk would overflow - binary search within chunk
       size_t left = 0;
       size_t right = chunkSize;
       size_t bestFit = 0;
@@ -744,21 +741,19 @@ size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
   return pos;
 }
 
-// Forward scan to find how many UTF-16 code units fit when converted to UTF-8.
-// Uses SIMD for fast processing while maintaining O(result) complexity.
-// Ensures surrogate pairs (0xD800-0xDFFF) are never split across the boundary.
+// Find how many UTF-16 code units fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
 size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
 
-  // Process in chunks using SIMD for speed
   constexpr size_t CHUNK = 256;
 
   while (pos < length) {
     size_t remaining = length - pos;
     size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
 
-    // Adjust chunk to not split surrogate pairs
+    // Don't split surrogate pairs at chunk boundary
     if (pos + chunkSize < length && chunkSize > 0) {
       char16_t last = data[pos + chunkSize - 1];
       if (isLeadSurrogate(last)) {
@@ -767,14 +762,13 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
     }
 
     if (chunkSize == 0) {
-      // Edge case: chunk would be empty, process at least 2 code units (surrogate pair)
       chunkSize = (remaining >= 2) ? 2 : remaining;
     }
 
     size_t chunkUtf8Len = simdutf::utf8_length_from_utf16(data + pos, chunkSize);
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // This chunk would overflow - binary search within this chunk
+      // Chunk would overflow - binary search within chunk
       size_t left = 0;
       size_t right = chunkSize;
       size_t bestFit = 0;
@@ -816,21 +810,19 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
   return pos;
 }
 
-// Forward scan to find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8.
-// Uses SIMD for fast processing while maintaining O(result) complexity.
-// Ensures surrogate pairs are never split, and unpaired surrogates are replaced with U+FFFD.
+// Find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
+// Unpaired surrogates replaced with U+FFFD.
 size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
-
-  // Process in chunks using SIMD for speed
   constexpr size_t CHUNK = 256;
 
   while (pos < length) {
     size_t remaining = length - pos;
     size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
 
-    // Adjust chunk to not split surrogate pairs
+    // Don't split surrogate pairs at chunk boundary
     if (pos + chunkSize < length && chunkSize > 0) {
       char16_t last = data[pos + chunkSize - 1];
       if (isLeadSurrogate(last)) {
@@ -839,14 +831,13 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
     }
 
     if (chunkSize == 0) {
-      // Edge case: chunk would be empty, process at least 2 code units (surrogate pair)
       chunkSize = (remaining >= 2) ? 2 : remaining;
     }
 
     size_t chunkUtf8Len = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, chunkSize));
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // This chunk would overflow - binary search within this chunk
+      // Chunk would overflow - binary search within chunk
       size_t left = 0;
       size_t right = chunkSize;
       size_t bestFit = 0;
@@ -924,18 +915,18 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Fast path 2: Check if ASCII (which is 1:1 Latin-1 to UTF-8)
-    if (length <= bufferSize && simdutf::validate_ascii(data, length)) {
-      memcpy(outputBuf.begin(), data, length);
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(length),
-      };
-    }
-
-    // Slow path: Calculate exact UTF-8 length to determine if it fits
+    // Calculate exact UTF-8 length to determine if it fits
     size_t utf8Length = simdutf::utf8_length_from_latin1(data, length);
     if (utf8Length <= bufferSize) {
+      // Fast path 2: ASCII (utf8Length == length means no conversion needed)
+      if (utf8Length == length) {
+        memcpy(outputBuf.begin(), data, length);
+        return TextEncoder::EncodeIntoResult{
+          .read = static_cast<int>(length),
+          .written = static_cast<int>(length),
+        };
+      }
+      // Fits: convert with SIMD
       size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),

From 69829217df7c46c0096d2ee771910e71b67db388 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 11:15:04 -0500
Subject: [PATCH 15/29] use simdutf trim_partial_utf16

---
 src/workerd/api/encoding.c++ | 44 ++++++------------------------------
 1 file changed, 7 insertions(+), 37 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 353a44c776d..d6c84fb001a 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -492,7 +492,7 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 
 namespace {
 
-constexpr inline bool isLeadSurrogate(char16_t c) {
+[[maybe_unused]] constexpr inline bool isLeadSurrogate(char16_t c) {
   return (c & 0xFC00) == 0xD800;
 }
 
@@ -528,7 +528,7 @@ size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
 
       // Handle the invalid surrogate at inputPos
       // SURROGATE error means unpaired surrogate, so valid pair should be impossible
-      char16_t c = input[inputPos];
+      [[maybe_unused]] char16_t c = input[inputPos];
       KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
                      isTrailSurrogate(input[inputPos + 1])),
           "Valid surrogate pair should not trigger SURROGATE error");
@@ -597,7 +597,7 @@ size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPt
 
       // Handle the invalid surrogate at inputPos
       // SURROGATE error means unpaired surrogate, so valid pair should be impossible
-      char16_t c = input[inputPos];
+      [[maybe_unused]] char16_t c = input[inputPos];
       KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
                      isTrailSurrogate(input[inputPos + 1])),
           "Valid surrogate pair should not trigger SURROGATE error");
@@ -751,15 +751,7 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
 
   while (pos < length) {
     size_t remaining = length - pos;
-    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
-
-    // Don't split surrogate pairs at chunk boundary
-    if (pos + chunkSize < length && chunkSize > 0) {
-      char16_t last = data[pos + chunkSize - 1];
-      if (isLeadSurrogate(last)) {
-        chunkSize--;
-      }
-    }
+    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
 
     if (chunkSize == 0) {
       chunkSize = (remaining >= 2) ? 2 : remaining;
@@ -777,14 +769,7 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
         size_t mid = left + (right - left) / 2;
         if (mid == 0) break;
 
-        // Don't split surrogate pairs
-        size_t adjustedMid = mid;
-        if (adjustedMid > 0 && pos + adjustedMid < length) {
-          char16_t prev = data[pos + adjustedMid - 1];
-          if (isLeadSurrogate(prev)) {
-            adjustedMid--;
-          }
-        }
+        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
 
         if (adjustedMid == 0) {
           right = 0;
@@ -820,15 +805,7 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
 
   while (pos < length) {
     size_t remaining = length - pos;
-    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
-
-    // Don't split surrogate pairs at chunk boundary
-    if (pos + chunkSize < length && chunkSize > 0) {
-      char16_t last = data[pos + chunkSize - 1];
-      if (isLeadSurrogate(last)) {
-        chunkSize--;
-      }
-    }
+    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
 
     if (chunkSize == 0) {
       chunkSize = (remaining >= 2) ? 2 : remaining;
@@ -846,14 +823,7 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
         size_t mid = left + (right - left) / 2;
         if (mid == 0) break;
 
-        // Don't split surrogate pairs
-        size_t adjustedMid = mid;
-        if (adjustedMid > 0 && pos + adjustedMid < length) {
-          char16_t prev = data[pos + adjustedMid - 1];
-          if (isLeadSurrogate(prev)) {
-            adjustedMid--;
-          }
-        }
+        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
 
         if (adjustedMid == 0) {
           right = 0;

From 525cbacc858774128b6631c3cea714fd18af9180 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 11:46:58 -0500
Subject: [PATCH 16/29] avoid repetitive simdutf_length calls

---
 src/workerd/api/encoding.c++ | 70 +++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index d6c84fb001a..a5e6053bd19 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -702,7 +702,10 @@ namespace {
 
 // Find how many Latin-1 characters fit when converted to UTF-8
 // Uses chunked forward scan with SIMD, O(result) complexity
-size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
+// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
+template <bool ReturnLength = false>
+std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitLatin1(
+    const char* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
   constexpr size_t CHUNK = 256;
@@ -731,22 +734,35 @@ size_t findBestFitLatin1(const char* data, size_t length, size_t bufferSize) {
         }
       }
 
-      return pos + bestFit;
+      if constexpr (ReturnLength) {
+        size_t finalPos = pos + bestFit;
+        size_t finalUtf8Len =
+            utf8Accumulated + simdutf::utf8_length_from_latin1(data + pos, bestFit);
+        return {finalPos, finalUtf8Len};
+      } else {
+        return pos + bestFit;
+      }
     }
 
     utf8Accumulated += chunkUtf8Len;
     pos += chunkSize;
   }
 
-  return pos;
+  if constexpr (ReturnLength) {
+    return {pos, utf8Accumulated};
+  } else {
+    return pos;
+  }
 }
 
 // Find how many UTF-16 code units fit when converted to UTF-8
 // Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
-size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize) {
+// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
+template <bool ReturnLength = false>
+std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitUtf16(
+    const char16_t* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
-
   constexpr size_t CHUNK = 256;
 
   while (pos < length) {
@@ -785,14 +801,25 @@ size_t findBestFitUtf16(const char16_t* data, size_t length, size_t bufferSize)
         }
       }
 
-      return pos + bestFit;
+      if constexpr (ReturnLength) {
+        size_t finalPos = pos + bestFit;
+        size_t finalUtf8Len =
+            utf8Accumulated + simdutf::utf8_length_from_utf16(data + pos, bestFit);
+        return {finalPos, finalUtf8Len};
+      } else {
+        return pos + bestFit;
+      }
     }
 
     utf8Accumulated += chunkUtf8Len;
     pos += chunkSize;
   }
 
-  return pos;
+  if constexpr (ReturnLength) {
+    return {pos, utf8Accumulated};
+  } else {
+    return pos;
+  }
 }
 
 // Find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8
@@ -875,8 +902,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Buffer might fit most/all of string: try optimized fast paths
-    // Fast path 1: Worst-case (2x) definitely fits
+    // Fast path: Worst-case (2x) definitely fits
     if (length * 2 <= bufferSize) {
       size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
@@ -885,10 +911,12 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Calculate exact UTF-8 length to determine if it fits
-    size_t utf8Length = simdutf::utf8_length_from_latin1(data, length);
-    if (utf8Length <= bufferSize) {
-      // Fast path 2: ASCII (utf8Length == length means no conversion needed)
+    // Use forward scan that also returns UTF-8 length (avoids redundant full-string scan)
+    auto [read, utf8Length] = findBestFitLatin1<true>(data, length, bufferSize);
+
+    // Check if everything fit
+    if (read == length) {
+      // ASCII fast path: utf8Length == length means no conversion needed
       if (utf8Length == length) {
         memcpy(outputBuf.begin(), data, length);
         return TextEncoder::EncodeIntoResult{
@@ -896,7 +924,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
           .written = static_cast<int>(length),
         };
       }
-      // Fits: convert with SIMD
+      // All fit: convert with SIMD
       size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -904,8 +932,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Doesn't fit: forward scan to find what does
-    size_t read = findBestFitLatin1(data, length, bufferSize);
+    // Partial fit: convert only what fits
     size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(read),
@@ -938,9 +965,11 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Slow path: calculate exact UTF-8 length
-    size_t utf8Length = simdutf::utf8_length_from_utf16(data, length);
-    if (utf8Length <= bufferSize) {
+    // Use forward scan that also returns UTF-8 length (avoids redundant full-string scan)
+    auto [read, utf8Length] = findBestFitUtf16<true>(data, length, bufferSize);
+
+    if (read == length) {
+      // Everything fit: convert all
       size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -948,8 +977,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Doesn't fit: forward scan to find what does
-    size_t read = findBestFitUtf16(data, length, bufferSize);
+    // Partial fit: convert only what fits
     size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
     return TextEncoder::EncodeIntoResult{
       .read = static_cast<int>(read),

From 538ed749a0849d43dd183bf7ec160b6032f346d1 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 15:33:04 -0500
Subject: [PATCH 17/29] get rid of string flattening

---
 src/workerd/api/encoding.c++ | 70 ++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index a5e6053bd19..de0fecb144b 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -625,10 +625,10 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   jsg::JsString str = input.orDefault(js.str());
   std::shared_ptr<v8::BackingStore> backingStore;
   size_t utf8_length = 0;
+  auto length = str.length(js);
 
   // Fast path: check if string is one-byte before creating ValueView
   if (str.isOneByte(js)) {
-    auto length = str.length(js);
     // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
     // and potentially triggering GC. Stack allocation for small strings, heap for large.
     kj::SmallArray<kj::byte, 4096> latin1Buffer(length);
@@ -663,36 +663,28 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   }
 
   // Two-byte string path
-  {
-    // Note that ValueView flattens the string, if it's not already flattened
-    v8::String::ValueView view(js.v8Isolate, str);
-    // Two-byte string path. V8 uses UTF-16LE encoding internally for strings with code points
-    // > U+00FF. Check if the UTF-16 is valid (no unpaired surrogates) to determine the path.
-    auto data = reinterpret_cast<const char16_t*>(view.data16());
-
-    if (simdutf::validate_utf16le(data, view.length())) {
-      // Common case: valid UTF-16, convert directly to UTF-8
-      utf8_length = simdutf::utf8_length_from_utf16le(data, view.length());
-      backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-      [[maybe_unused]] auto written = simdutf::convert_utf16le_to_utf8(
-          data, view.length(), reinterpret_cast<char*>(backingStore->Data()));
-      KJ_DASSERT(written == utf8_length);
-    } else {
-      // Invalid UTF-16 with unpaired surrogates. Per the Encoding Standard,
-      // unpaired surrogates must be replaced with U+FFFD (replacement character).
-      // Use custom conversion that handles invalid surrogates without creating an
-      // intermediate well-formed UTF-16 buffer.
-      auto inputArray = kj::ArrayPtr<const char16_t>(data, view.length());
-      utf8_length = utf8LengthFromInvalidUtf16(inputArray);
-      backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-      auto outputArray =
-          kj::ArrayPtr<char>(reinterpret_cast<char*>(backingStore->Data()), utf8_length);
-      convertInvalidUtf16ToUtf8(inputArray, outputArray);
-    }
-  }  // ValueView destroyed here, releasing the heap lock
+  // Use off-heap allocation for intermediate UTF-16 buffer to avoid triggering GC.
+  // Stack allocation for small strings, heap for large.
+  kj::SmallArray<uint16_t, 4096> utf16Buffer(length);
+
+  // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
+  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat (written by Erik in 2008).
+  // This means we may read from multiple string segments, but that's fine for our use case.
+  [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
+  KJ_DASSERT(
+      writeResult.written == length, "writeInto must completely overwrite the backing buffer");
+
+  auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
+  utf8_length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+
+  if (!simdutf::validate_utf16(data, length)) {
+    simdutf::to_well_formed_utf16(data, length, data);
+  }
+
+  backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+  [[maybe_unused]] auto written = simdutf::convert_valid_utf16_to_utf8(
+      data, length, reinterpret_cast<char*>(backingStore->Data()));
 
-  // Now that ValueView is destroyed and the heap lock is released, it's safe to create V8 objects.
-  // Create the Uint8Array from the raw v8::BackingStore.
   auto array =
       v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
   return jsg::JsUint8Array(array);
@@ -911,12 +903,15 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Use forward scan that also returns UTF-8 length (avoids redundant full-string scan)
+    // "Maybe fits" zone: bufferSize < length*2, but might still fit entirely.
+    // Use forward scan with ReturnLength=true to get both position and UTF-8 length.
+    // This avoids redundant work: if we called utf8_length_from_latin1() to check if it fits,
+    // then called findBestFitLatin1() when it doesn't, we'd scan the string twice.
     auto [read, utf8Length] = findBestFitLatin1<true>(data, length, bufferSize);
 
     // Check if everything fit
     if (read == length) {
-      // ASCII fast path: utf8Length == length means no conversion needed
+      // ASCII fast path: utf8Length == length means all chars are ASCII, no conversion needed
       if (utf8Length == length) {
         memcpy(outputBuf.begin(), data, length);
         return TextEncoder::EncodeIntoResult{
@@ -924,8 +919,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
           .written = static_cast<int>(length),
         };
       }
-      // All fit: convert with SIMD
-      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+
+      auto written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
         .written = static_cast<int>(written),
@@ -965,11 +960,14 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Use forward scan that also returns UTF-8 length (avoids redundant full-string scan)
+    // "Maybe fits" zone: bufferSize < length*3, but might still fit entirely.
+    // Use forward scan with ReturnLength=true to get both position and UTF-8 length.
+    // This avoids redundant work: if we called utf8_length_from_utf16() to check if it fits,
+    // then called findBestFitUtf16() when it doesn't, we'd scan the string twice.
     auto [read, utf8Length] = findBestFitUtf16<true>(data, length, bufferSize);
 
     if (read == length) {
-      // Everything fit: convert all
+      // Everything fit: convert entire string with SIMD
       size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),

From 575373e8a74e77c7f6f0468877fbb86a4850c053 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 15:46:33 -0500
Subject: [PATCH 18/29] add more comments

---
 src/workerd/api/encoding.c++ | 76 +++++++++++++++++++++++++++++-------
 src/workerd/jsg/jsg.h        |  8 ++++
 src/workerd/jsg/jsvalue.h    |  6 +++
 3 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index de0fecb144b..4ab9eb1a121 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -882,10 +882,37 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
     auto data = reinterpret_cast<const char*>(view.data8());
 
-    // Optimize for incremental encoding: if buffer is much smaller than input,
-    // skip all "whole string fits" checks and go straight to forward scan
+    // Latin-1 encoding strategy: three zones based on input size vs buffer capacity
+    //
+    // For Latin-1: ASCII chars (0x00-0x7F) → 1 byte, extended chars (0x80-0xFF) → 2 bytes
+    // Worst-case expansion: 2x, Best-case: 1x (pure ASCII), Typical mixed: ~1.2-1.5x
+    //
+    // Zone 1: "Definitely doesn't fit" (length > bufferSize * 2)
+    //   Even if all ASCII (best case 1:1), string won't fit. Go straight to incremental mode.
+    //   Uses forward scan without length calculation for maximum efficiency.
+    //   Example: 1M chars, 400k buffer → can't possibly fit, scan to find cutoff point
+    //
+    // Zone 2: "Definitely fits" (length * 2 <= bufferSize)
+    //   Even if all extended Latin-1 (worst case 1:2), string will fit. Convert directly.
+    //   Example: 100k chars, 250k buffer → worst case 200k bytes, guaranteed to fit
+    //
+    // Zone 3: "Maybe fits" (bufferSize < length * 2 AND length <= bufferSize * 2)
+    //   Might fit depending on ASCII/extended ratio. Use forward scan with length calculation.
+    //   Avoids redundant work: scanning once gets us both position and UTF-8 length.
+    //   Example: 600k chars, 700k buffer → fits if mostly ASCII, doesn't if mixed
+    //
+    // Threshold selection (bufferSize * 2):
+    //   - Chosen based on worst-case Latin-1 expansion of 2x
+    //   - Optimized for common case: small buffer relative to input (SSR, streaming)
+    //   - Trade-off: Zone 3 still does forward scan, but with length calculation overhead
+    //   - Performance cliff exists for borderline cases (e.g., 1M chars, 500k buffer falls
+    //     into Zone 3), but forward scan with length is still reasonably efficient
+    //
+    // Future optimization: Could use sampling to estimate ASCII ratio and choose zone
+    // dynamically, but adds complexity for marginal benefit in typical workloads.
+
     if (length > bufferSize * 2) {
-      // Incremental mode: forward scan to find what fits, then convert
+      // Zone 1: Incremental mode - forward scan to find what fits, then convert
       size_t read = findBestFitLatin1(data, length, bufferSize);
       size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
@@ -894,8 +921,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Fast path: Worst-case (2x) definitely fits
     if (length * 2 <= bufferSize) {
+      // Zone 2: Fast path - worst-case (2x) definitely fits, convert directly
       size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -903,10 +930,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // "Maybe fits" zone: bufferSize < length*2, but might still fit entirely.
-    // Use forward scan with ReturnLength=true to get both position and UTF-8 length.
-    // This avoids redundant work: if we called utf8_length_from_latin1() to check if it fits,
-    // then called findBestFitLatin1() when it doesn't, we'd scan the string twice.
+    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
     auto [read, utf8Length] = findBestFitLatin1<true>(data, length, bufferSize);
 
     // Check if everything fit
@@ -940,9 +964,36 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
 
   if (simdutf::validate_utf16(data, length)) {
     // Valid UTF-16: use fast SIMD conversion
+    //
+    // UTF-16 to UTF-8 encoding: variable expansion based on code point ranges
+    //   U+0000-U+007F (ASCII):           1 byte   (rare in two-byte strings)
+    //   U+0080-U+07FF:                    2 bytes  (most common)
+    //   U+0800-U+FFFF (BMP):             3 bytes  (common: CJK, etc.)
+    //   U+10000-U+10FFFF (surrogate pairs): 4 bytes (less common: emoji, etc.)
+    // Worst-case: 3 bytes per code unit (BMP chars), Typical: ~2-3 bytes per code unit
+    //
+    // Zone 1: "Definitely doesn't fit" (length > bufferSize)
+    //   Conservative threshold: even if all ASCII (impossible for two-byte strings), won't fit.
+    //   This differs from Latin-1 (bufferSize * 2) due to different typical expansion patterns.
+    //   Example: 1M code units, 900k buffer → can't fit, use incremental mode
+    //
+    // Zone 2: "Definitely fits" (length * 3 <= bufferSize)
+    //   Even if all BMP characters (worst case 1:3), string will fit. Convert directly.
+    //   Example: 200k code units, 700k buffer → worst case 600k bytes, guaranteed to fit
+    //
+    // Zone 3: "Maybe fits" (bufferSize < length * 3 AND length <= bufferSize)
+    //   Might fit depending on character distribution. Use forward scan with length calculation.
+    //   Example: 300k code units, 800k buffer → fits if mostly 2-byte chars, doesn't if BMP
+    //
+    // Threshold selection (bufferSize vs bufferSize * 3):
+    //   - Zone 1 threshold (length > bufferSize) is conservative: even 1:1 ratio won't fit
+    //   - More aggressive than Latin-1 because UTF-16 typical expansion is higher (~2-3x)
+    //   - Zone 3 (maybe fits) is large: from bufferSize to bufferSize * 3
+    //   - Optimized for common case where UTF-16 strings are mostly 2-3 byte encodings
+    //   - Performance cliff: Zone 3 still uses forward scan with length calculation overhead
 
-    // Incremental mode: buffer much smaller than input, skip "whole string fits" checks
     if (length > bufferSize) {
+      // Zone 1: Incremental mode - forward scan to find what fits, then convert
       size_t read = findBestFitUtf16(data, length, bufferSize);
       size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
@@ -951,8 +1002,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // Fast path: worst-case (3 bytes per UTF-16 code unit) fits
     if (length * 3 <= bufferSize) {
+      // Zone 2: Fast path - worst-case (3x) definitely fits, convert directly
       size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
       return TextEncoder::EncodeIntoResult{
         .read = static_cast<int>(length),
@@ -960,10 +1011,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       };
     }
 
-    // "Maybe fits" zone: bufferSize < length*3, but might still fit entirely.
-    // Use forward scan with ReturnLength=true to get both position and UTF-8 length.
-    // This avoids redundant work: if we called utf8_length_from_utf16() to check if it fits,
-    // then called findBestFitUtf16() when it doesn't, we'd scan the string twice.
+    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
     auto [read, utf8Length] = findBestFitUtf16<true>(data, length, bufferSize);
 
     if (read == length) {
diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h
index cab79368322..0f41ea9ea63 100644
--- a/src/workerd/jsg/jsg.h
+++ b/src/workerd/jsg/jsg.h
@@ -2757,6 +2757,14 @@ class Lock {
 
   // Utility method to safely allocate a v8::BackingStore with allocation failure handling.
   // Throws a javascript error if allocation fails.
+  //
+  // IMPORTANT: This method can trigger garbage collection, which may move or invalidate V8
+  // objects. Do NOT call this method while:
+  // - A v8::String::ValueView is alive (it holds internal V8 heap locks)
+  // - You have raw pointers to V8 heap data (e.g., from view.data8(), view.data16())
+  //
+  // Safe pattern: Copy V8 string data to off-heap memory FIRST (e.g., via JsString::writeInto()
+  // into kj::SmallArray), THEN call allocBackingStore(). See TextEncoder::encode() for example.
   std::unique_ptr<v8::BackingStore> allocBackingStore(
       size_t size, AllocOption init_mode = AllocOption::ZERO_INITIALIZED) KJ_WARN_UNUSED_RESULT;
 
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index 2c9c1f55fde..8a52a5d5bd7 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -305,6 +305,12 @@ class JsString final: public JsBase<v8::String, JsString> {
     // The number of elements (e.g. char, byte, uint16_t) written to the buffer.
     size_t written;
   };
+
+  // Copy string contents into a provided buffer (off-heap memory).
+  //
+  // IMPORTANT: This method does NOT flatten the V8 string or hold V8 heap locks. It safely
+  // copies data out of V8's heap into your buffer. This makes it safe to use before calling
+  // GC-triggering operations like Lock::allocBackingStore().
   WriteIntoStatus writeInto(
       Lock& js, kj::ArrayPtr<char> buffer, WriteFlags options = WriteFlags::NONE) const;
   WriteIntoStatus writeInto(

From 621e3cee2390e2e8e2b5102088dc63ab01fdd0fd Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Fri, 14 Nov 2025 15:51:08 -0500
Subject: [PATCH 19/29] simplify things

---
 src/workerd/api/encoding.c++ | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 4ab9eb1a121..28439ca98af 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -704,7 +704,7 @@ std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitL
 
   while (pos < length) {
     size_t remaining = length - pos;
-    size_t chunkSize = remaining < CHUNK ? remaining : CHUNK;
+    size_t chunkSize = kj::min(remaining, CHUNK);
     size_t chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {

From 4558093508a78d37e23d4f1b0589da69af73b4d3 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 17 Nov 2025 11:43:44 -0500
Subject: [PATCH 20/29] address pr reviews

---
 src/workerd/api/encoding.c++ | 62 +++++++++++++++---------------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 28439ca98af..a4f9ea0c49b 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -492,13 +492,15 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 
 namespace {
 
-[[maybe_unused]] constexpr inline bool isLeadSurrogate(char16_t c) {
+#ifdef KJ_DEBUG
+constexpr inline bool isLeadSurrogate(char16_t c) {
   return (c & 0xFC00) == 0xD800;
 }
 
-[[maybe_unused]] constexpr inline bool isTrailSurrogate(char16_t c) {
+constexpr inline bool isTrailSurrogate(char16_t c) {
   return (c & 0xFC00) == 0xDC00;
 }
+#endif  // KJ_DEBUG
 
 // Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
 // Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
@@ -545,26 +547,6 @@ size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
   return utf8Length;
 }
 
-// Encode a single UTF-16 code unit to UTF-8
-inline size_t encodeUtf8CodeUnit(char16_t c, kj::ArrayPtr<char> out) {
-  if (c < 0x80) {
-    KJ_DASSERT(out.size() >= 1);
-    out[0] = static_cast<char>(c);
-    return 1;
-  } else if (c < 0x800) {
-    KJ_DASSERT(out.size() >= 2);
-    out[0] = static_cast<char>(0xC0 | (c >> 6));
-    out[1] = static_cast<char>(0x80 | (c & 0x3F));
-    return 2;
-  } else {
-    KJ_DASSERT(out.size() >= 3);
-    out[0] = static_cast<char>(0xE0 | (c >> 12));
-    out[1] = static_cast<char>(0x80 | ((c >> 6) & 0x3F));
-    out[2] = static_cast<char>(0x80 | (c & 0x3F));
-    return 3;
-  }
-}
-
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
 // Returns the number of UTF-8 bytes written.
@@ -597,14 +579,15 @@ size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPt
 
       // Handle the invalid surrogate at inputPos
       // SURROGATE error means unpaired surrogate, so valid pair should be impossible
-      [[maybe_unused]] char16_t c = input[inputPos];
-      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+      KJ_DASSERT(!(isLeadSurrogate(input[inputPos]) && inputPos + 1 < input.size() &&
                      isTrailSurrogate(input[inputPos + 1])),
           "Valid surrogate pair should not trigger SURROGATE error");
 
-      // Invalid surrogate - replace with U+FFFD (3 bytes)
-      outputPos += encodeUtf8CodeUnit(0xFFFD, out.slice(outputPos, out.size()));
-      KJ_DASSERT(outputPos <= out.size());
+      // Invalid surrogate - replace with U+FFFD (3 bytes: 0xEF 0xBF 0xBD)
+      KJ_DASSERT(outputPos + 3 <= out.size());
+      out[outputPos++] = static_cast<char>(0xEF);
+      out[outputPos++] = static_cast<char>(0xBF);
+      out[outputPos++] = static_cast<char>(0xBD);
       inputPos++;
     } else {
       KJ_FAIL_REQUIRE(
@@ -623,7 +606,12 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 
 jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   jsg::JsString str = input.orDefault(js.str());
-  std::shared_ptr<v8::BackingStore> backingStore;
+
+#ifdef KJ_DEBUG
+  bool wasAlreadyFlat = str.isFlat();
+  KJ_DEFER({ KJ_ASSERT(wasAlreadyFlat || !str.isFlat()); });
+#endif
+
   size_t utf8_length = 0;
   auto length = str.length(js);
 
@@ -643,22 +631,23 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     if (utf8_length == length) {
       // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
       // Allocate final on-heap buffer and copy
-      backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
+      auto backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
       memcpy(backingStore->Data(), latin1Buffer.begin(), length);
-      auto array = v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, length);
+      auto array =
+          v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, length);
       return jsg::JsUint8Array(array);
     }
 
     KJ_DASSERT(utf8_length > length);
 
     // Need to convert Latin-1 to UTF-8
-    backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
     [[maybe_unused]] auto written =
         simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()), length,
             reinterpret_cast<char*>(backingStore->Data()));
     KJ_DASSERT(utf8_length == written);
-    auto array =
-        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+    auto array = v8::Uint8Array::New(
+        v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
     return jsg::JsUint8Array(array);
   }
 
@@ -681,12 +670,12 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     simdutf::to_well_formed_utf16(data, length, data);
   }
 
-  backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+  auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
   [[maybe_unused]] auto written = simdutf::convert_valid_utf16_to_utf8(
       data, length, reinterpret_cast<char*>(backingStore->Data()));
 
   auto array =
-      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
   return jsg::JsUint8Array(array);
 }
 
@@ -937,7 +926,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     if (read == length) {
       // ASCII fast path: utf8Length == length means all chars are ASCII, no conversion needed
       if (utf8Length == length) {
-        memcpy(outputBuf.begin(), data, length);
+        KJ_DASSERT(length <= bufferSize);
+        outputBuf.slice(0, length).copyFrom(kj::arrayPtr(data, length));
         return TextEncoder::EncodeIntoResult{
           .read = static_cast<int>(length),
           .written = static_cast<int>(length),

From 95fe6424da0b892a5d024cf1d5fda72f27e0a196 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 18 Nov 2025 13:57:56 -0500
Subject: [PATCH 21/29] simplify implementation

---
 src/workerd/api/encoding.c++ | 56 ++++--------------------------------
 1 file changed, 6 insertions(+), 50 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index a4f9ea0c49b..01297bf4f41 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -502,51 +502,6 @@ constexpr inline bool isTrailSurrogate(char16_t c) {
 }
 #endif  // KJ_DEBUG
 
-// Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
-// Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
-// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
-size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
-  size_t inputPos = 0;
-  size_t utf8Length = 0;
-
-  while (inputPos < input.size()) {
-    // Find the next invalid surrogate using SIMD validation
-    auto result =
-        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
-
-    if (result.error == simdutf::error_code::SUCCESS) {
-      // Remaining input is valid - calculate length with SIMD
-      utf8Length +=
-          simdutf::utf8_length_from_utf16(input.begin() + inputPos, input.size() - inputPos);
-      break;
-    }
-
-    if (result.error == simdutf::error_code::SURROGATE) {
-      // Calculate length for the valid portion before the error with SIMD
-      if (result.count > 0) {
-        utf8Length += simdutf::utf8_length_from_utf16(input.begin() + inputPos, result.count);
-        inputPos += result.count;
-      }
-
-      // Handle the invalid surrogate at inputPos
-      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
-      [[maybe_unused]] char16_t c = input[inputPos];
-      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
-                     isTrailSurrogate(input[inputPos + 1])),
-          "Valid surrogate pair should not trigger SURROGATE error");
-
-      // Invalid surrogate = U+FFFD (3 bytes)
-      utf8Length += 3;
-      inputPos++;
-    } else {
-      KJ_FAIL_REQUIRE(
-          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
-    }
-  }
-
-  return utf8Length;
-}
-
 // Convert UTF-16 with potentially invalid surrogates to UTF-8.
 // Invalid surrogates are replaced with U+FFFD.
 // Returns the number of UTF-8 bytes written.
@@ -657,14 +612,14 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   kj::SmallArray<uint16_t, 4096> utf16Buffer(length);
 
   // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
-  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat (written by Erik in 2008).
+  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat.
   // This means we may read from multiple string segments, but that's fine for our use case.
   [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
   KJ_DASSERT(
       writeResult.written == length, "writeInto must completely overwrite the backing buffer");
 
   auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
-  utf8_length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+  utf8_length = simdutf::utf8_length_from_utf16_with_replacement(data, length);
 
   if (!simdutf::validate_utf16(data, length)) {
     simdutf::to_well_formed_utf16(data, length, data);
@@ -819,7 +774,7 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
       chunkSize = (remaining >= 2) ? 2 : remaining;
     }
 
-    size_t chunkUtf8Len = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, chunkSize));
+    size_t chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize);
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {
       // Chunk would overflow - binary search within chunk
@@ -838,7 +793,8 @@ size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t buffe
           break;
         }
 
-        size_t midUtf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, adjustedMid));
+        size_t midUtf8Length =
+            simdutf::utf8_length_from_utf16_with_replacement(data + pos, adjustedMid);
         if (utf8Accumulated + midUtf8Length <= bufferSize) {
           bestFit = adjustedMid;
           left = adjustedMid + 1;
@@ -1043,7 +999,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   }
 
   // Slow path: calculate exact UTF-8 length
-  size_t utf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+  size_t utf8Length = simdutf::utf8_length_from_utf16_with_replacement(data, length);
   if (utf8Length <= bufferSize) {
     size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
     return TextEncoder::EncodeIntoResult{

From 411a0559c763a5f97867397f1f97ce893f6a1c21 Mon Sep 17 00:00:00 2001
From: Erik Corry <ecorry@cloudflare.com>
Date: Fri, 21 Nov 2025 20:08:00 +0100
Subject: [PATCH 22/29] An attempt to simplify the encodeInto change. (#5565)

* Simplify

* Tune slightly

* Fix assert

* Fix perf regression and OOM read

* Handle very tiny output buffers

* feedback
---
 src/workerd/api/encoding.c++ | 572 ++++++++---------------------------
 1 file changed, 128 insertions(+), 444 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 01297bf4f41..17af01cbff7 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -490,71 +490,6 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 // =======================================================================================
 // TextEncoder implementation
 
-namespace {
-
-#ifdef KJ_DEBUG
-constexpr inline bool isLeadSurrogate(char16_t c) {
-  return (c & 0xFC00) == 0xD800;
-}
-
-constexpr inline bool isTrailSurrogate(char16_t c) {
-  return (c & 0xFC00) == 0xDC00;
-}
-#endif  // KJ_DEBUG
-
-// Convert UTF-16 with potentially invalid surrogates to UTF-8.
-// Invalid surrogates are replaced with U+FFFD.
-// Returns the number of UTF-8 bytes written.
-// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
-size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
-  size_t inputPos = 0;
-  size_t outputPos = 0;
-
-  while (inputPos < input.size()) {
-    // Find the next invalid surrogate using SIMD validation
-    auto result =
-        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
-
-    if (result.error == simdutf::error_code::SUCCESS) {
-      // Remaining input is valid - convert it all with SIMD
-      outputPos += simdutf::convert_utf16_to_utf8(
-          input.begin() + inputPos, input.size() - inputPos, out.begin() + outputPos);
-      KJ_DASSERT(outputPos <= out.size());
-      break;
-    }
-
-    if (result.error == simdutf::error_code::SURROGATE) {
-      // Convert the valid portion before the error with SIMD
-      if (result.count > 0) {
-        outputPos += simdutf::convert_valid_utf16_to_utf8(
-            input.begin() + inputPos, result.count, out.begin() + outputPos);
-        KJ_DASSERT(outputPos <= out.size());
-        inputPos += result.count;
-      }
-
-      // Handle the invalid surrogate at inputPos
-      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
-      KJ_DASSERT(!(isLeadSurrogate(input[inputPos]) && inputPos + 1 < input.size() &&
-                     isTrailSurrogate(input[inputPos + 1])),
-          "Valid surrogate pair should not trigger SURROGATE error");
-
-      // Invalid surrogate - replace with U+FFFD (3 bytes: 0xEF 0xBF 0xBD)
-      KJ_DASSERT(outputPos + 3 <= out.size());
-      out[outputPos++] = static_cast<char>(0xEF);
-      out[outputPos++] = static_cast<char>(0xBF);
-      out[outputPos++] = static_cast<char>(0xBD);
-      inputPos++;
-    } else {
-      KJ_FAIL_REQUIRE(
-          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
-    }
-  }
-
-  return outputPos;
-}
-
-}  // namespace
-
 jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
@@ -562,15 +497,13 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
   jsg::JsString str = input.orDefault(js.str());
 
-#ifdef KJ_DEBUG
-  bool wasAlreadyFlat = str.isFlat();
-  KJ_DEFER({ KJ_ASSERT(wasAlreadyFlat || !str.isFlat()); });
-#endif
-
   size_t utf8_length = 0;
   auto length = str.length(js);
 
-  // Fast path: check if string is one-byte before creating ValueView
+  // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
+  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat.
+  // This means we may read from multiple string segments, but that's fine for our use case.
+
   if (str.isOneByte(js)) {
     // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
     // and potentially triggering GC. Stack allocation for small strings, heap for large.
@@ -583,37 +516,26 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     utf8_length = simdutf::utf8_length_from_latin1(
         reinterpret_cast<const char*>(latin1Buffer.begin()), length);
 
+    auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
     if (utf8_length == length) {
       // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
-      // Allocate final on-heap buffer and copy
-      auto backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
       memcpy(backingStore->Data(), latin1Buffer.begin(), length);
-      auto array =
-          v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, length);
-      return jsg::JsUint8Array(array);
+    } else {
+      [[maybe_unused]] auto written =
+          simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()),
+              length, reinterpret_cast<char*>(backingStore->Data()));
+      KJ_DASSERT(utf8_length == written);
     }
-
-    KJ_DASSERT(utf8_length > length);
-
-    // Need to convert Latin-1 to UTF-8
-    auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-    [[maybe_unused]] auto written =
-        simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()), length,
-            reinterpret_cast<char*>(backingStore->Data()));
-    KJ_DASSERT(utf8_length == written);
     auto array = v8::Uint8Array::New(
         v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
     return jsg::JsUint8Array(array);
   }
 
-  // Two-byte string path
-  // Use off-heap allocation for intermediate UTF-16 buffer to avoid triggering GC.
+  // Use off-heap allocation for intermediate UTF-16 buffer to avoid wasting V8 heap space
+  // and potentially triggering GC. Stack allocation for small strings, heap for large.
   // Stack allocation for small strings, heap for large.
   kj::SmallArray<uint16_t, 4096> utf16Buffer(length);
 
-  // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
-  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat.
-  // This means we may read from multiple string segments, but that's fine for our use case.
   [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
   KJ_DASSERT(
       writeResult.written == length, "writeInto must completely overwrite the backing buffer");
@@ -621,14 +543,20 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
   utf8_length = simdutf::utf8_length_from_utf16_with_replacement(data, length);
 
-  if (!simdutf::validate_utf16(data, length)) {
-    simdutf::to_well_formed_utf16(data, length, data);
-  }
-
   auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-  [[maybe_unused]] auto written = simdutf::convert_valid_utf16_to_utf8(
+  auto result = simdutf::convert_utf16_to_utf8_with_errors(
       data, length, reinterpret_cast<char*>(backingStore->Data()));
 
+  if (result.error != simdutf::SUCCESS) {
+    // Oh, no, there are unpaired surrogates.  This is hopefully rare.
+    simdutf::to_well_formed_utf16(data, length, data);
+    [[maybe_unused]] auto written =
+        simdutf::convert_utf16_to_utf8(data, length, reinterpret_cast<char*>(backingStore->Data()));
+    KJ_DASSERT(written == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
+  } else {
+    KJ_DASSERT(result.count == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
+  }
+
   auto array =
       v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
   return jsg::JsUint8Array(array);
@@ -636,180 +564,93 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
 
 namespace {
 
-// Find how many Latin-1 characters fit when converted to UTF-8
-// Uses chunked forward scan with SIMD, O(result) complexity
-// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
-template <bool ReturnLength = false>
-std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitLatin1(
-    const char* data, size_t length, size_t bufferSize) {
-  size_t pos = 0;
-  size_t utf8Accumulated = 0;
-  constexpr size_t CHUNK = 256;
-
-  while (pos < length) {
-    size_t remaining = length - pos;
-    size_t chunkSize = kj::min(remaining, CHUNK);
-    size_t chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
-
-    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // Chunk would overflow - binary search within chunk
-      size_t left = 0;
-      size_t right = chunkSize;
-      size_t bestFit = 0;
-
-      while (left <= right) {
-        size_t mid = left + (right - left) / 2;
-        if (mid == 0) break;
-
-        size_t midUtf8Length = simdutf::utf8_length_from_latin1(data + pos, mid);
-        if (utf8Accumulated + midUtf8Length <= bufferSize) {
-          bestFit = mid;
-          left = mid + 1;
-        } else {
-          right = mid - 1;
-        }
-      }
-
-      if constexpr (ReturnLength) {
-        size_t finalPos = pos + bestFit;
-        size_t finalUtf8Len =
-            utf8Accumulated + simdutf::utf8_length_from_latin1(data + pos, bestFit);
-        return {finalPos, finalUtf8Len};
-      } else {
-        return pos + bestFit;
-      }
-    }
-
-    utf8Accumulated += chunkUtf8Len;
-    pos += chunkSize;
-  }
+constexpr bool isSurrogatePair(uint16_t lead, uint16_t trail) {
+  // We would like to use simdutf::trim_partial_utf16, but it's not guaranteed
+  // to work right on invalid UTF-16.
+  return (lead & 0xfc00) == 0xd800 && (trail & 0xfc00) == 0xdc00;
+}
 
-  if constexpr (ReturnLength) {
-    return {pos, utf8Accumulated};
-  } else {
-    return pos;
-  }
+// Ignores surrogates conservatively.
+constexpr size_t simpleUtfEncodingLength(uint16_t c) {
+  if (c < 0x80) return 1;
+  if (c < 0x400) return 2;
+  return 3;
 }
 
-// Find how many UTF-16 code units fit when converted to UTF-8
-// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
-// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
-template <bool ReturnLength = false>
-std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitUtf16(
-    const char16_t* data, size_t length, size_t bufferSize) {
+// Find how many UTF-16 or Latin1 code units fit when converted to UTF-8.
+// May conservatively underestimate the largest number of code units we can fit
+// because of undetected surrogate pairs on boundaries.
+// Works even on malformed UTF-16.
+template <typename Char>
+size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
   size_t pos = 0;
   size_t utf8Accumulated = 0;
-  constexpr size_t CHUNK = 256;
-
-  while (pos < length) {
-    size_t remaining = length - pos;
-    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
-
-    if (chunkSize == 0) {
-      chunkSize = (remaining >= 2) ? 2 : remaining;
+  // The SIMD is more efficient with a size that's a little over a multiple of 16.
+  constexpr size_t CHUNK = 257;
+  // The max number of UTF-8 output bytes per input code unit.
+  constexpr bool UTF16 = sizeof(Char) == 2;
+  constexpr size_t MAX_FACTOR = UTF16 ? 3 : 2;
+
+  // Our initial guess at how much the number of elements expands in the
+  // conversion to UTF-8.
+  double expansion = 1.15;
+
+  while (pos < length && utf8Accumulated < bufferSize) {
+    size_t remainingInput = length - pos;
+    size_t spaceRemaining = bufferSize - utf8Accumulated;
+    KJ_DASSERT(expansion >= 1.15);
+
+    // We estimate how many characters are likely to fit in the buffer, but
+    // only try for CHUNK characters at a time to minimize the worst case
+    // waste of time if we guessed too high.
+    size_t guaranteedToFit = spaceRemaining / MAX_FACTOR;
+    if (guaranteedToFit >= remainingInput) {
+      // Don't even bother checking any more, it's all going to fit.  Hitting
+      // this halfway through is also a good reason to limit the CHUNK size.
+      return length;
+    }
+    size_t likelyToFit = kj::min(static_cast<size_t>(spaceRemaining / expansion), CHUNK);
+    size_t fitEstimate = kj::max(1, kj::max(guaranteedToFit, likelyToFit));
+    size_t chunkSize = kj::min(remainingInput, fitEstimate);
+    if (chunkSize == 1) break;  // Not worth running this complicated stuff one char at a time.
+    // No div-by-zero because remainingInput and fitEstimate are at least 1.
+    KJ_DASSERT(chunkSize >= 1);
+
+    size_t chunkUtf8Len;
+    if constexpr (UTF16) {
+      chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize);
+    } else {
+      chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
     }
-
-    size_t chunkUtf8Len = simdutf::utf8_length_from_utf16(data + pos, chunkSize);
 
     if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // Chunk would overflow - binary search within chunk
-      size_t left = 0;
-      size_t right = chunkSize;
-      size_t bestFit = 0;
-
-      while (left <= right) {
-        size_t mid = left + (right - left) / 2;
-        if (mid == 0) break;
-
-        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
-
-        if (adjustedMid == 0) {
-          right = 0;
-          break;
-        }
-
-        size_t midUtf8Length = simdutf::utf8_length_from_utf16(data + pos, adjustedMid);
-        if (utf8Accumulated + midUtf8Length <= bufferSize) {
-          bestFit = adjustedMid;
-          left = adjustedMid + 1;
-        } else {
-          right = adjustedMid - 1;
-        }
-      }
-
-      if constexpr (ReturnLength) {
-        size_t finalPos = pos + bestFit;
-        size_t finalUtf8Len =
-            utf8Accumulated + simdutf::utf8_length_from_utf16(data + pos, bestFit);
-        return {finalPos, finalUtf8Len};
-      } else {
-        return pos + bestFit;
-      }
+      // Our chosen chunk didn't fit in the rest of the output buffer.
+      KJ_DASSERT(chunkSize > guaranteedToFit);
+      // Since it didn't fit we adjust our expansion guess upwards.
+      expansion = kj::max(expansion * 1.1, (chunkUtf8Len * 1.1) / chunkSize);
+    } else {
+      // Use successful length calculation to adjust our expansion estimate.
+      expansion = kj::max(1.15, (chunkUtf8Len * 1.1) / chunkSize);
+      pos += chunkSize;
+      utf8Accumulated += chunkUtf8Len;
     }
-
-    utf8Accumulated += chunkUtf8Len;
-    pos += chunkSize;
   }
-
-  if constexpr (ReturnLength) {
-    return {pos, utf8Accumulated};
-  } else {
-    return pos;
+  // Do the last few code units in a simpler way.
+  while (pos < length && utf8Accumulated < bufferSize) {
+    size_t extra = simpleUtfEncodingLength(data[pos]);
+    if (utf8Accumulated + extra > bufferSize) break;
+    pos++;
+    utf8Accumulated += extra;
   }
-}
-
-// Find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8
-// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
-// Unpaired surrogates replaced with U+FFFD.
-size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t bufferSize) {
-  size_t pos = 0;
-  size_t utf8Accumulated = 0;
-  constexpr size_t CHUNK = 256;
-
-  while (pos < length) {
-    size_t remaining = length - pos;
-    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
-
-    if (chunkSize == 0) {
-      chunkSize = (remaining >= 2) ? 2 : remaining;
-    }
-
-    size_t chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize);
-
-    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
-      // Chunk would overflow - binary search within chunk
-      size_t left = 0;
-      size_t right = chunkSize;
-      size_t bestFit = 0;
-
-      while (left <= right) {
-        size_t mid = left + (right - left) / 2;
-        if (mid == 0) break;
-
-        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
-
-        if (adjustedMid == 0) {
-          right = 0;
-          break;
-        }
-
-        size_t midUtf8Length =
-            simdutf::utf8_length_from_utf16_with_replacement(data + pos, adjustedMid);
-        if (utf8Accumulated + midUtf8Length <= bufferSize) {
-          bestFit = adjustedMid;
-          left = adjustedMid + 1;
-        } else {
-          right = adjustedMid - 1;
-        }
-      }
-
-      return pos + bestFit;
+  if (UTF16 && pos != 0 && pos != length && isSurrogatePair(data[pos - 1], data[pos])) {
+    // We ended on a leading surrogate which has a matching trailing surrogate in the next
+    // position.  In order to make progress when the bufferSize is tiny we try to include it.
+    if (utf8Accumulated < bufferSize) {
+      pos++;  // We had one more byte, so we can include the pair, UTF-8 encoding 3->4.
+    } else {
+      pos--;  // Don't chop the pair in half.
     }
-
-    utf8Accumulated += chunkUtf8Len;
-    pos += chunkSize;
   }
-
   return pos;
 }
 
@@ -820,197 +661,40 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   auto outputBuf = buffer.asArrayPtr<char>();
   size_t bufferSize = outputBuf.size();
 
-  v8::String::ValueView view(js.v8Isolate, input);
-  uint32_t length = view.length();
-
-  if (view.is_one_byte()) {
-    // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
-    auto data = reinterpret_cast<const char*>(view.data8());
-
-    // Latin-1 encoding strategy: three zones based on input size vs buffer capacity
-    //
-    // For Latin-1: ASCII chars (0x00-0x7F) → 1 byte, extended chars (0x80-0xFF) → 2 bytes
-    // Worst-case expansion: 2x, Best-case: 1x (pure ASCII), Typical mixed: ~1.2-1.5x
-    //
-    // Zone 1: "Definitely doesn't fit" (length > bufferSize * 2)
-    //   Even if all ASCII (best case 1:1), string won't fit. Go straight to incremental mode.
-    //   Uses forward scan without length calculation for maximum efficiency.
-    //   Example: 1M chars, 400k buffer → can't possibly fit, scan to find cutoff point
-    //
-    // Zone 2: "Definitely fits" (length * 2 <= bufferSize)
-    //   Even if all extended Latin-1 (worst case 1:2), string will fit. Convert directly.
-    //   Example: 100k chars, 250k buffer → worst case 200k bytes, guaranteed to fit
-    //
-    // Zone 3: "Maybe fits" (bufferSize < length * 2 AND length <= bufferSize * 2)
-    //   Might fit depending on ASCII/extended ratio. Use forward scan with length calculation.
-    //   Avoids redundant work: scanning once gets us both position and UTF-8 length.
-    //   Example: 600k chars, 700k buffer → fits if mostly ASCII, doesn't if mixed
-    //
-    // Threshold selection (bufferSize * 2):
-    //   - Chosen based on worst-case Latin-1 expansion of 2x
-    //   - Optimized for common case: small buffer relative to input (SSR, streaming)
-    //   - Trade-off: Zone 3 still does forward scan, but with length calculation overhead
-    //   - Performance cliff exists for borderline cases (e.g., 1M chars, 500k buffer falls
-    //     into Zone 3), but forward scan with length is still reasonably efficient
-    //
-    // Future optimization: Could use sampling to estimate ASCII ratio and choose zone
-    // dynamically, but adds complexity for marginal benefit in typical workloads.
-
-    if (length > bufferSize * 2) {
-      // Zone 1: Incremental mode - forward scan to find what fits, then convert
-      size_t read = findBestFitLatin1(data, length, bufferSize);
-      size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(read),
-        .written = static_cast<int>(written),
-      };
-    }
-
-    if (length * 2 <= bufferSize) {
-      // Zone 2: Fast path - worst-case (2x) definitely fits, convert directly
-      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
-    }
-
-    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
-    auto [read, utf8Length] = findBestFitLatin1<true>(data, length, bufferSize);
-
-    // Check if everything fit
-    if (read == length) {
-      // ASCII fast path: utf8Length == length means all chars are ASCII, no conversion needed
-      if (utf8Length == length) {
-        KJ_DASSERT(length <= bufferSize);
-        outputBuf.slice(0, length).copyFrom(kj::arrayPtr(data, length));
-        return TextEncoder::EncodeIntoResult{
-          .read = static_cast<int>(length),
-          .written = static_cast<int>(length),
-        };
+  size_t read = 0;
+  size_t written = 0;
+  {
+    // Scope for the view - we can't do anything that might cause a V8 GC!
+    v8::String::ValueView view(js.v8Isolate, input);
+    uint32_t length = view.length();
+
+    if (view.is_one_byte()) {
+      auto data = reinterpret_cast<const char*>(view.data8());
+      read = findBestFit(data, length, bufferSize);
+      if (read != 0) {
+        KJ_DASSERT(simdutf::utf8_length_from_latin1(data, read) <= bufferSize);
+        written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
+      }
+    } else {
+      auto data = reinterpret_cast<const char16_t*>(view.data16());
+      read = findBestFit(data, length, bufferSize);
+      if (read != 0) {
+        KJ_DASSERT(simdutf::utf8_length_from_utf16_with_replacement(data, read) <= bufferSize);
+        simdutf::result result =
+            simdutf::convert_utf16_to_utf8_with_errors(data, read, outputBuf.begin());
+        if (result.error == simdutf::SUCCESS) {
+          written = result.count;
+        } else {
+          // Oh, no, there are unpaired surrogates.  This is hopefully rare.
+          kj::SmallArray<char16_t, 4096> conversionBuffer(read);
+          simdutf::to_well_formed_utf16(data, read, conversionBuffer.begin());
+          written =
+              simdutf::convert_utf16_to_utf8(conversionBuffer.begin(), read, outputBuf.begin());
+        }
       }
-
-      auto written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
-    }
-
-    // Partial fit: convert only what fits
-    size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(read),
-      .written = static_cast<int>(written),
-    };
-  }
-
-  // UTF-16 path: validate to ensure spec compliance (replace invalid surrogates with U+FFFD)
-  auto data = reinterpret_cast<const char16_t*>(view.data16());
-
-  if (simdutf::validate_utf16(data, length)) {
-    // Valid UTF-16: use fast SIMD conversion
-    //
-    // UTF-16 to UTF-8 encoding: variable expansion based on code point ranges
-    //   U+0000-U+007F (ASCII):           1 byte   (rare in two-byte strings)
-    //   U+0080-U+07FF:                    2 bytes  (most common)
-    //   U+0800-U+FFFF (BMP):             3 bytes  (common: CJK, etc.)
-    //   U+10000-U+10FFFF (surrogate pairs): 4 bytes (less common: emoji, etc.)
-    // Worst-case: 3 bytes per code unit (BMP chars), Typical: ~2-3 bytes per code unit
-    //
-    // Zone 1: "Definitely doesn't fit" (length > bufferSize)
-    //   Conservative threshold: even if all ASCII (impossible for two-byte strings), won't fit.
-    //   This differs from Latin-1 (bufferSize * 2) due to different typical expansion patterns.
-    //   Example: 1M code units, 900k buffer → can't fit, use incremental mode
-    //
-    // Zone 2: "Definitely fits" (length * 3 <= bufferSize)
-    //   Even if all BMP characters (worst case 1:3), string will fit. Convert directly.
-    //   Example: 200k code units, 700k buffer → worst case 600k bytes, guaranteed to fit
-    //
-    // Zone 3: "Maybe fits" (bufferSize < length * 3 AND length <= bufferSize)
-    //   Might fit depending on character distribution. Use forward scan with length calculation.
-    //   Example: 300k code units, 800k buffer → fits if mostly 2-byte chars, doesn't if BMP
-    //
-    // Threshold selection (bufferSize vs bufferSize * 3):
-    //   - Zone 1 threshold (length > bufferSize) is conservative: even 1:1 ratio won't fit
-    //   - More aggressive than Latin-1 because UTF-16 typical expansion is higher (~2-3x)
-    //   - Zone 3 (maybe fits) is large: from bufferSize to bufferSize * 3
-    //   - Optimized for common case where UTF-16 strings are mostly 2-3 byte encodings
-    //   - Performance cliff: Zone 3 still uses forward scan with length calculation overhead
-
-    if (length > bufferSize) {
-      // Zone 1: Incremental mode - forward scan to find what fits, then convert
-      size_t read = findBestFitUtf16(data, length, bufferSize);
-      size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(read),
-        .written = static_cast<int>(written),
-      };
-    }
-
-    if (length * 3 <= bufferSize) {
-      // Zone 2: Fast path - worst-case (3x) definitely fits, convert directly
-      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
-    }
-
-    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
-    auto [read, utf8Length] = findBestFitUtf16<true>(data, length, bufferSize);
-
-    if (read == length) {
-      // Everything fit: convert entire string with SIMD
-      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
-      return TextEncoder::EncodeIntoResult{
-        .read = static_cast<int>(length),
-        .written = static_cast<int>(written),
-      };
     }
-
-    // Partial fit: convert only what fits
-    size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(read),
-      .written = static_cast<int>(written),
-    };
   }
-
-  // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
-
-  // Incremental mode: buffer much smaller than input, skip "whole string fits" checks
-  if (length > bufferSize) {
-    size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
-    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(read),
-      .written = static_cast<int>(written),
-    };
-  }
-
-  // Fast path: worst-case (3 bytes per UTF-16 code unit) fits
-  if (length * 3 <= bufferSize) {
-    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(length),
-      .written = static_cast<int>(written),
-    };
-  }
-
-  // Slow path: calculate exact UTF-8 length
-  size_t utf8Length = simdutf::utf8_length_from_utf16_with_replacement(data, length);
-  if (utf8Length <= bufferSize) {
-    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
-    return TextEncoder::EncodeIntoResult{
-      .read = static_cast<int>(length),
-      .written = static_cast<int>(written),
-    };
-  }
-
-  // Doesn't fit: forward scan to find what does
-  size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
-  size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
+  KJ_DASSERT(written <= bufferSize);
   return TextEncoder::EncodeIntoResult{
     .read = static_cast<int>(read),
     .written = static_cast<int>(written),

From 62fb056042cb7b51d92f8e585ebfd149ae395686 Mon Sep 17 00:00:00 2001
From: Erik Corry <ecorry@cloudflare.com>
Date: Mon, 24 Nov 2025 14:44:18 +0100
Subject: [PATCH 23/29] Add some tests of encodeinto for short output buffers.
 (#5570)

---
 src/workerd/api/BUILD.bazel       |  7 +++
 src/workerd/api/encoding-test.c++ | 77 +++++++++++++++++++++++++++++++
 src/workerd/api/encoding.c++      | 23 +++++++--
 src/workerd/api/encoding.h        |  7 +++
 4 files changed, 111 insertions(+), 3 deletions(-)
 create mode 100644 src/workerd/api/encoding-test.c++

diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index 0254d0994b9..4a1086ac3c1 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -588,6 +588,13 @@ kj_test(
     ],
 )
 
+kj_test(
+    src = "encoding-test.c++",
+    deps = [
+        ":encoding",
+    ],
+)
+
 kj_test(
     src = "base64-test.c++",
     deps = ["//src/workerd/tests:test-fixture"],
diff --git a/src/workerd/api/encoding-test.c++ b/src/workerd/api/encoding-test.c++
new file mode 100644
index 00000000000..a45d78aa563
--- /dev/null
+++ b/src/workerd/api/encoding-test.c++
@@ -0,0 +1,77 @@
+// Copyright (c) 2025 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+
+#include "encoding.h"
+
+#include <kj/test.h>
+
+namespace workerd::api {
+namespace test {
+
+KJ_TEST("BestFitASCII") {
+  // If there's zero input or output space, the answer is zero.
+  KJ_ASSERT(bestFit("", 0) == 0);
+  KJ_ASSERT(bestFit("a", 0) == 0);
+  KJ_ASSERT(bestFit("aa", 0) == 0);
+  KJ_ASSERT(bestFit("aaa", 0) == 0);
+  KJ_ASSERT(bestFit("aaaa", 0) == 0);
+  KJ_ASSERT(bestFit("aaaaa", 0) == 0);
+  KJ_ASSERT(bestFit("", 0) == 0);
+  KJ_ASSERT(bestFit("", 1) == 0);
+  KJ_ASSERT(bestFit("", 2) == 0);
+  KJ_ASSERT(bestFit("", 3) == 0);
+  KJ_ASSERT(bestFit("", 4) == 0);
+  KJ_ASSERT(bestFit("", 5) == 0);
+  // Zero cases with two-byte strings.
+  KJ_ASSERT(bestFit(u"", 0) == 0);
+  KJ_ASSERT(bestFit(u"€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"€€€€€", 0) == 0);
+  KJ_ASSERT(bestFit(u"", 0) == 0);
+  KJ_ASSERT(bestFit(u"", 1) == 0);
+  KJ_ASSERT(bestFit(u"", 2) == 0);
+  KJ_ASSERT(bestFit(u"", 3) == 0);
+  KJ_ASSERT(bestFit(u"", 4) == 0);
+  KJ_ASSERT(bestFit(u"", 5) == 0);
+  // Small buffers that only just fit.
+  KJ_ASSERT(bestFit(u"a", 1) == 1);
+  KJ_ASSERT(bestFit(u"å", 2) == 1);
+  KJ_ASSERT(bestFit(u"€", 3) == 1);
+  KJ_ASSERT(bestFit(u"😹", 4) == 2);
+  // Small buffers that don't fit.
+  KJ_ASSERT(bestFit(u"å", 1) == 0);
+  KJ_ASSERT(bestFit(u"€", 2) == 0);
+  KJ_ASSERT(bestFit(u"😹", 3) == 0);
+  // Don't chop a surrogate pair.
+  KJ_ASSERT(bestFit(u"1😹", 4) == 1);
+  KJ_ASSERT(bestFit(u"12😹", 5) == 2);
+  KJ_ASSERT(bestFit(u"123😹", 6) == 3);
+  KJ_ASSERT(bestFit(u"1234😹", 7) == 4);
+  KJ_ASSERT(bestFit(u"12345😹", 8) == 5);
+  // Some bigger ones just for fun.
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 0) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 1) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 2) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 3) == 0);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 4) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 5) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 6) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 7) == 2);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 8) == 4);
+  KJ_ASSERT(bestFit(u"😹😹😹😹😹😹", 9) == 4);
+  KJ_ASSERT(bestFit(u"0😹😹😹😹😹😹", 9) == 5);          // 0😹😹 is 5 and takes 9.
+  KJ_ASSERT(bestFit(u"01😹😹😹😹😹😹", 9) == 4);         // 01😹 is 4 and takes 6.
+  KJ_ASSERT(bestFit(u"012😹😹😹😹😹😹", 9) == 5);        // 012😹 is 5 and takes 7.
+  KJ_ASSERT(bestFit(u"0123😹😹😹😹😹😹", 9) == 6);       // 0123😹 is 6 and takes 8.
+  KJ_ASSERT(bestFit(u"01234😹😹😹😹😹😹", 9) == 7);      // 01234😹 is 7 and takes 9.
+  KJ_ASSERT(bestFit(u"012345😹😹😹😹😹😹", 9) == 6);     // 012345 is 6 and takes 6.
+  KJ_ASSERT(bestFit(u"0123456😹😹😹😹😹😹", 9) == 7);    // 0123456 is 7 and takes 7.
+  KJ_ASSERT(bestFit(u"01234567😹😹😹😹😹😹", 9) == 8);   // 0123456 is 8 and takes 8.
+  KJ_ASSERT(bestFit(u"012345678😹😹😹😹😹😹", 9) == 9);  // 0123456 is 9 and takes 9.
+}
+
+}  // namespace test
+}  // namespace workerd::api
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 17af01cbff7..8b0748ea63b 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -277,6 +277,9 @@ Encoding getEncodingForLabel(kj::StringPtr label) {
 #undef V
   return Encoding::INVALID;
 }
+
+constexpr int MAX_SIZE_FOR_STACK_ALLOC = 4096;
+
 }  // namespace
 
 const kj::Array<const kj::byte> TextDecoder::EMPTY =
@@ -507,7 +510,7 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   if (str.isOneByte(js)) {
     // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
     // and potentially triggering GC. Stack allocation for small strings, heap for large.
-    kj::SmallArray<kj::byte, 4096> latin1Buffer(length);
+    kj::SmallArray<kj::byte, MAX_SIZE_FOR_STACK_ALLOC> latin1Buffer(length);
 
     [[maybe_unused]] auto writeResult = str.writeInto(js, latin1Buffer.asPtr());
     KJ_DASSERT(
@@ -534,7 +537,7 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   // Use off-heap allocation for intermediate UTF-16 buffer to avoid wasting V8 heap space
   // and potentially triggering GC. Stack allocation for small strings, heap for large.
   // Stack allocation for small strings, heap for large.
-  kj::SmallArray<uint16_t, 4096> utf16Buffer(length);
+  kj::SmallArray<uint16_t, MAX_SIZE_FOR_STACK_ALLOC> utf16Buffer(length);
 
   [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
   KJ_DASSERT(
@@ -656,6 +659,20 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
 
 }  // namespace
 
+namespace test {
+
+size_t bestFit(const char* str, size_t bufferSize) {
+  return findBestFit(str, strlen(str), bufferSize);
+}
+
+size_t bestFit(const char16_t* str, size_t bufferSize) {
+  size_t length = 0;
+  while (str[length] != 0) length++;
+  return findBestFit(str, length, bufferSize);
+}
+
+}  // namespace test
+
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer) {
   auto outputBuf = buffer.asArrayPtr<char>();
@@ -686,7 +703,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
           written = result.count;
         } else {
           // Oh, no, there are unpaired surrogates.  This is hopefully rare.
-          kj::SmallArray<char16_t, 4096> conversionBuffer(read);
+          kj::SmallArray<char16_t, MAX_SIZE_FOR_STACK_ALLOC> conversionBuffer(read);
           simdutf::to_well_formed_utf16(data, read, conversionBuffer.begin());
           written =
               simdutf::convert_utf16_to_utf8(conversionBuffer.begin(), read, outputBuf.begin());
diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h
index e694ad1b355..1325be8f245 100644
--- a/src/workerd/api/encoding.h
+++ b/src/workerd/api/encoding.h
@@ -245,4 +245,11 @@ class TextEncoder final: public jsg::Object {
 #define EW_ENCODING_ISOLATE_TYPES                                                                  \
   api::TextDecoder, api::TextEncoder, api::TextDecoder::ConstructorOptions,                        \
       api::TextDecoder::DecodeOptions, api::TextEncoder::EncodeIntoResult
+
+namespace test {
+
+size_t bestFit(const char* str, size_t bufferSize);
+size_t bestFit(const char16_t* str, size_t bufferSize);
+
+}  // namespace test
 }  // namespace workerd::api

From 6956cb57f8453812417a0675120af0d9895dd4fc Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 24 Nov 2025 12:12:12 -0500
Subject: [PATCH 24/29] put changes behind an autogate

---
 src/workerd/api/encoding.c++  | 20 ++++++++++++++++++++
 src/workerd/util/autogate.c++ |  2 ++
 src/workerd/util/autogate.h   |  2 ++
 3 files changed, 24 insertions(+)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 8b0748ea63b..d2e601d4929 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -9,6 +9,7 @@
 
 #include <workerd/io/features.h>
 #include <workerd/jsg/jsg.h>
+#include <workerd/util/autogate.h>
 #include <workerd/util/strings.h>
 
 #include <unicode/ucnv.h>
@@ -498,6 +499,16 @@ jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
 }
 
 jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
+  if (!workerd::util::Autogate::isEnabled(workerd::util::AutogateKey::ENABLE_FAST_TEXTENCODER)) {
+    auto str = input.orDefault(js.str());
+    auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
+        "Cannot allocate space for TextEncoder.encode");
+    auto result = str.writeInto(
+        js, view.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    KJ_DASSERT(result.written == view.size());
+    return jsg::JsUint8Array(view.getHandle(js).As<v8::Uint8Array>());
+  }
+
   jsg::JsString str = input.orDefault(js.str());
 
   size_t utf8_length = 0;
@@ -675,6 +686,15 @@ size_t bestFit(const char16_t* str, size_t bufferSize) {
 
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer) {
+  if (!workerd::util::Autogate::isEnabled(workerd::util::AutogateKey::ENABLE_FAST_TEXTENCODER)) {
+    auto result = input.writeInto(
+        js, buffer.asArrayPtr<char>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(result.read),
+      .written = static_cast<int>(result.written),
+    };
+  }
+
   auto outputBuf = buffer.asArrayPtr<char>();
   size_t bufferSize = outputBuf.size();
 
diff --git a/src/workerd/util/autogate.c++ b/src/workerd/util/autogate.c++
index 5a8b5a8d9dd..49c2869cdc8 100644
--- a/src/workerd/util/autogate.c++
+++ b/src/workerd/util/autogate.c++
@@ -35,6 +35,8 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) {
       return "compression-stream-use-state-machine"_kj;
     case AutogateKey::IDENTITY_TRANSFORM_STREAM_USE_STATE_MACHINE:
       return "identity-transform-stream-use-state-machine"_kj;
+    case AutogateKey::ENABLE_FAST_TEXTENCODER:
+      return "enable-fast-textencoder"_kj;
     case AutogateKey::NumOfKeys:
       KJ_FAIL_ASSERT("NumOfKeys should not be used in getName");
   }
diff --git a/src/workerd/util/autogate.h b/src/workerd/util/autogate.h
index 82a4af828e8..f41e074bd5d 100644
--- a/src/workerd/util/autogate.h
+++ b/src/workerd/util/autogate.h
@@ -30,6 +30,8 @@ enum class AutogateKey {
   COMPRESSION_STREAM_USE_STATE_MACHINE,
   // Switch the IdentityTransformStream to use the new state machine-based impl
   IDENTITY_TRANSFORM_STREAM_USE_STATE_MACHINE,
+  // Enable fast TextEncoder implementation using simdutf
+  ENABLE_FAST_TEXTENCODER,
   NumOfKeys  // Reserved for iteration.
 };
 

From 166e9fd511c6822826f007e5012e9575d123d00d Mon Sep 17 00:00:00 2001
From: Erik Corry <ecorry@cloudflare.com>
Date: Mon, 24 Nov 2025 21:55:19 +0100
Subject: [PATCH 25/29] Attempt to eliminate last regression (#5579)

---
 src/workerd/api/encoding.c++ | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index d2e601d4929..ef151cc1b95 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -703,14 +703,26 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
   {
     // Scope for the view - we can't do anything that might cause a V8 GC!
     v8::String::ValueView view(js.v8Isolate, input);
-    uint32_t length = view.length();
+    size_t length = view.length();
 
     if (view.is_one_byte()) {
       auto data = reinterpret_cast<const char*>(view.data8());
-      read = findBestFit(data, length, bufferSize);
-      if (read != 0) {
-        KJ_DASSERT(simdutf::utf8_length_from_latin1(data, read) <= bufferSize);
-        written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
+      simdutf::result result =
+          simdutf::validate_ascii_with_errors(data, kj::min(length, bufferSize));
+      written = read = result.count;
+      auto outAddr = outputBuf.begin();
+      memcpy(outAddr, data, read);
+      outAddr += read;
+      data += read;
+      length -= read;
+      bufferSize -= read;
+      if (length != 0 && bufferSize != 0) {
+        size_t rest = findBestFit(data, length, bufferSize);
+        if (rest != 0) {
+          KJ_DASSERT(simdutf::utf8_length_from_latin1(data, rest) <= bufferSize);
+          written += simdutf::convert_latin1_to_utf8(data, rest, outAddr);
+          read += rest;
+        }
       }
     } else {
       auto data = reinterpret_cast<const char16_t*>(view.data16());
@@ -732,9 +744,14 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     }
   }
   KJ_DASSERT(written <= bufferSize);
+  // V8's String::kMaxLenth is a lot less than a maximal int so this is fine.
+  using RInt = decltype(TextEncoder::EncodeIntoResult::read);
+  using WInt = decltype(TextEncoder::EncodeIntoResult::written);
+  KJ_DASSERT(0 <= read && read <= std::numeric_limits<RInt>::max());
+  KJ_DASSERT(0 <= written && written <= std::numeric_limits<WInt>::max());
   return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(read),
-    .written = static_cast<int>(written),
+    .read = static_cast<RInt>(read),
+    .written = static_cast<WInt>(written),
   };
 }
 

From d462ca1042beee6c83ae43d8ceeced658bc10621 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Mon, 29 Dec 2025 18:31:36 -0500
Subject: [PATCH 26/29] make changes due to simdutf

---
 src/workerd/api/BUILD.bazel  | 3 ++-
 src/workerd/api/encoding.c++ | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index 4a1086ac3c1..58838c42bff 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -434,7 +434,6 @@ wd_cc_library(
     srcs = ["encoding.c++"],
     hdrs = ["encoding.h"],
     implementation_deps = [
-        "//src/workerd/io:features",
         "//src/workerd/util:strings",
         "@simdutf",
     ],
@@ -442,6 +441,7 @@ wd_cc_library(
     deps = [
         ":util",
         "//src/workerd/io:compatibility-date_capnp",
+        "//src/workerd/io:features",
         "//src/workerd/jsg",
         "@capnp-cpp//src/kj",
         "@simdutf",
@@ -592,6 +592,7 @@ kj_test(
     src = "encoding-test.c++",
     deps = [
         ":encoding",
+        "//src/workerd/io",
     ],
 )
 
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index ef151cc1b95..be487069e04 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -555,7 +555,7 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
       writeResult.written == length, "writeInto must completely overwrite the backing buffer");
 
   auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
-  utf8_length = simdutf::utf8_length_from_utf16_with_replacement(data, length);
+  utf8_length = simdutf::utf8_length_from_utf16_with_replacement(data, length).count;
 
   auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
   auto result = simdutf::convert_utf16_to_utf8_with_errors(
@@ -632,7 +632,7 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
 
     size_t chunkUtf8Len;
     if constexpr (UTF16) {
-      chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize);
+      chunkUtf8Len = simdutf::utf8_length_from_utf16_with_replacement(data + pos, chunkSize).count;
     } else {
       chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
     }
@@ -728,7 +728,8 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
       auto data = reinterpret_cast<const char16_t*>(view.data16());
       read = findBestFit(data, length, bufferSize);
       if (read != 0) {
-        KJ_DASSERT(simdutf::utf8_length_from_utf16_with_replacement(data, read) <= bufferSize);
+        KJ_DASSERT(
+            simdutf::utf8_length_from_utf16_with_replacement(data, read).count <= bufferSize);
         simdutf::result result =
             simdutf::convert_utf16_to_utf8_with_errors(data, read, outputBuf.begin());
         if (result.error == simdutf::SUCCESS) {

From e4e393da386f1f649440fac8b25dc5d19112c1cd Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 30 Dec 2025 10:59:07 -0500
Subject: [PATCH 27/29] leverage simdutf more (#5797)

---
 src/workerd/api/encoding.c++ | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index be487069e04..d4e73048b6f 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -555,22 +555,21 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
       writeResult.written == length, "writeInto must completely overwrite the backing buffer");
 
   auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
-  utf8_length = simdutf::utf8_length_from_utf16_with_replacement(data, length).count;
+  auto lengthResult = simdutf::utf8_length_from_utf16_with_replacement(data, length);
+  utf8_length = lengthResult.count;
 
-  auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
-  auto result = simdutf::convert_utf16_to_utf8_with_errors(
-      data, length, reinterpret_cast<char*>(backingStore->Data()));
-
-  if (result.error != simdutf::SUCCESS) {
-    // Oh, no, there are unpaired surrogates.  This is hopefully rare.
+  if (lengthResult.error == simdutf::SURROGATE) {
+    // If there are surrogates there may be unpaired surrogates. Fix them.
     simdutf::to_well_formed_utf16(data, length, data);
-    [[maybe_unused]] auto written =
-        simdutf::convert_utf16_to_utf8(data, length, reinterpret_cast<char*>(backingStore->Data()));
-    KJ_DASSERT(written == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
   } else {
-    KJ_DASSERT(result.count == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
+    KJ_DASSERT(lengthResult.error == simdutf::SUCCESS);
   }
 
+  auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+  [[maybe_unused]] auto written =
+      simdutf::convert_utf16_to_utf8(data, length, reinterpret_cast<char*>(backingStore->Data()));
+  KJ_DASSERT(written == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
+
   auto array =
       v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
   return jsg::JsUint8Array(array);

From a03390fdb4a2655285f83c7affe9eee6253f4323 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 30 Dec 2025 11:15:10 -0500
Subject: [PATCH 28/29] fix build warning

---
 src/workerd/api/encoding.c++ | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index d4e73048b6f..12ae03bcb1b 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -503,7 +503,7 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     auto str = input.orDefault(js.str());
     auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
         "Cannot allocate space for TextEncoder.encode");
-    auto result = str.writeInto(
+    [[maybe_unused]] auto result = str.writeInto(
         js, view.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
     KJ_DASSERT(result.written == view.size());
     return jsg::JsUint8Array(view.getHandle(js).As<v8::Uint8Array>());

From 1a2eab7467c5443cd8973e863d0662d714751a0a Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 30 Dec 2025 12:20:45 -0500
Subject: [PATCH 29/29] address pr reviews

---
 src/workerd/api/encoding-test.c++    | 13 +++++++++++++
 src/workerd/api/encoding.c++         | 25 +++++++++++++------------
 src/workerd/api/streams/encoding.c++ |  4 +---
 src/workerd/jsg/jsvalue.h            |  6 ++++++
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/workerd/api/encoding-test.c++ b/src/workerd/api/encoding-test.c++
index a45d78aa563..d74374092b4 100644
--- a/src/workerd/api/encoding-test.c++
+++ b/src/workerd/api/encoding-test.c++
@@ -9,6 +9,19 @@
 namespace workerd::api {
 namespace test {
 
+// These tests verify the findBestFit() function used by TextEncoder.encodeInto().
+//
+// bestFit(input, bufferSize) returns the number of input code units that can be
+// fully converted to UTF-8 and fit within the given output buffer size in bytes.
+//
+// The key insight is that different characters expand to different UTF-8 byte lengths:
+//   - ASCII (U+0000-U+007F): 1 byte per code unit
+//   - Latin-1 extended (U+0080-U+00FF): 2 bytes per code unit
+//   - BMP characters (U+0100-U+FFFF): 2-3 bytes per code unit
+//   - Supplementary characters (U+10000+): 4 bytes, encoded as surrogate pairs in UTF-16
+//
+// The function must never split a surrogate pair, so if there's only room for part of
+// a multi-byte character, it stops before that character.
 KJ_TEST("BestFitASCII") {
   // If there's zero input or output space, the answer is zero.
   KJ_ASSERT(bestFit("", 0) == 0);
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 12ae03bcb1b..d2cb4a8a949 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -514,6 +514,11 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
   size_t utf8_length = 0;
   auto length = str.length(js);
 
+#ifdef KJ_DEBUG
+  bool wasAlreadyFlat = str.isFlat();
+  KJ_DEFER({ KJ_ASSERT(wasAlreadyFlat || !str.isFlat()); });
+#endif
+
   // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
   // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat.
   // This means we may read from multiple string segments, but that's fine for our use case.
@@ -533,16 +538,14 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
     auto backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
     if (utf8_length == length) {
       // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
-      memcpy(backingStore->Data(), latin1Buffer.begin(), length);
+      kj::arrayPtr(static_cast<kj::byte*>(backingStore->Data()), length).copyFrom(latin1Buffer);
     } else {
       [[maybe_unused]] auto written =
           simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()),
               length, reinterpret_cast<char*>(backingStore->Data()));
       KJ_DASSERT(utf8_length == written);
     }
-    auto array = v8::Uint8Array::New(
-        v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
-    return jsg::JsUint8Array(array);
+    return jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, utf8_length);
   }
 
   // Use off-heap allocation for intermediate UTF-16 buffer to avoid wasting V8 heap space
@@ -570,24 +573,21 @@ jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString
       simdutf::convert_utf16_to_utf8(data, length, reinterpret_cast<char*>(backingStore->Data()));
   KJ_DASSERT(written == utf8_length, "Conversion yielded wrong number of UTF-8 bytes");
 
-  auto array =
-      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, utf8_length);
-  return jsg::JsUint8Array(array);
+  return jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, utf8_length);
 }
 
 namespace {
 
 constexpr bool isSurrogatePair(uint16_t lead, uint16_t trail) {
   // We would like to use simdutf::trim_partial_utf16, but it's not guaranteed
-  // to work right on invalid UTF-16.
+  // to work right on invalid UTF-16. Hence, we need this method to check for
+  // surrogate pairs and correctly trim utf16 chunks.
   return (lead & 0xfc00) == 0xd800 && (trail & 0xfc00) == 0xdc00;
 }
 
 // Ignores surrogates conservatively.
 constexpr size_t simpleUtfEncodingLength(uint16_t c) {
-  if (c < 0x80) return 1;
-  if (c < 0x400) return 2;
-  return 3;
+  return 1 + (c >= 0x80) + (c >= 0x400);
 }
 
 // Find how many UTF-16 or Latin1 code units fit when converted to UTF-8.
@@ -669,6 +669,7 @@ size_t findBestFit(const Char* data, size_t length, size_t bufferSize) {
 
 }  // namespace
 
+// Test helpers used by encoding-test.c++ to verify findBestFit behavior.
 namespace test {
 
 size_t bestFit(const char* str, size_t bufferSize) {
@@ -710,7 +711,7 @@ TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
           simdutf::validate_ascii_with_errors(data, kj::min(length, bufferSize));
       written = read = result.count;
       auto outAddr = outputBuf.begin();
-      memcpy(outAddr, data, read);
+      kj::arrayPtr(outAddr, read).copyFrom(kj::arrayPtr(data, read));
       outAddr += read;
       data += read;
       length -= read;
diff --git a/src/workerd/api/streams/encoding.c++ b/src/workerd/api/streams/encoding.c++
index 7fe67ce5e68..ede16250b83 100644
--- a/src/workerd/api/streams/encoding.c++
+++ b/src/workerd/api/streams/encoding.c++
@@ -93,9 +93,7 @@ jsg::Ref<TextEncoderStream> TextEncoderStream::constructor(jsg::Lock& js) {
     if (holder->pending != kj::none) {
       auto backingStore = js.allocBackingStore(3, jsg::Lock::AllocOption::UNINITIALIZED);
       memcpy(backingStore->Data(), REPLACEMENT_UTF8, 3);
-      auto array =
-          v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), 0, 3);
-      controller->enqueue(js, jsg::JsUint8Array(array));
+      controller->enqueue(js, jsg::JsUint8Array::create(js, kj::mv(backingStore), 0, 3));
     }
     return js.resolvedPromise();
   };
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index 8a52a5d5bd7..25ee99fe228 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -254,6 +254,12 @@ class JsArrayBufferView final: public JsBase<v8::ArrayBufferView, JsArrayBufferV
 
 class JsUint8Array final: public JsBase<v8::Uint8Array, JsUint8Array> {
  public:
+  static JsUint8Array create(
+      Lock& js, std::unique_ptr<v8::BackingStore> backingStore, size_t byteOffset, size_t length) {
+    return JsUint8Array(v8::Uint8Array::New(
+        v8::ArrayBuffer::New(js.v8Isolate, kj::mv(backingStore)), byteOffset, length));
+  }
+
   template <typename T = kj::byte>
   kj::ArrayPtr<T> asArrayPtr() {
     v8::Local<v8::Uint8Array> inner = *this;