diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel
index 5d884dab8d7..eba268d382a 100644
--- a/src/workerd/api/BUILD.bazel
+++ b/src/workerd/api/BUILD.bazel
@@ -432,6 +432,7 @@ wd_cc_library(
     hdrs = ["encoding.h"],
     implementation_deps = [
         "//src/workerd/util:strings",
+        "@simdutf",
     ],
     visibility = ["//visibility:public"],
     deps = [
diff --git a/src/workerd/api/encoding.c++ b/src/workerd/api/encoding.c++
index 329bcb7d20d..029d5514549 100644
--- a/src/workerd/api/encoding.c++
+++ b/src/workerd/api/encoding.c++
@@ -4,6 +4,7 @@
 
 #include "encoding.h"
 
+#include "simdutf.h"
 #include "util.h"
 
 #include <workerd/jsg/jsg.h>
@@ -11,6 +12,9 @@
 
 #include <unicode/ucnv.h>
 #include <unicode/utf8.h>
+#include <v8.h>
+
+#include <kj/string.h>
 
 #include <algorithm>
 
@@ -460,38 +464,584 @@ kj::Maybe<jsg::JsString> TextDecoder::decodePtr(
 // =======================================================================================
 // TextEncoder implementation
 
+namespace {
+
+[[maybe_unused]] constexpr inline bool isLeadSurrogate(char16_t c) {
+  return (c & 0xFC00) == 0xD800;
+}
+
+[[maybe_unused]] constexpr inline bool isTrailSurrogate(char16_t c) {
+  return (c & 0xFC00) == 0xDC00;
+}
+
+// Calculate UTF-8 length from UTF-16 with potentially invalid surrogates.
+// Invalid surrogates are counted as U+FFFD (3 bytes in UTF-8).
+// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
+size_t utf8LengthFromInvalidUtf16(kj::ArrayPtr<const char16_t> input) {
+  size_t inputPos = 0;
+  size_t utf8Length = 0;
+
+  while (inputPos < input.size()) {
+    // Find the next invalid surrogate using SIMD validation
+    auto result =
+        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
+
+    if (result.error == simdutf::error_code::SUCCESS) {
+      // Remaining input is valid - calculate length with SIMD
+      utf8Length +=
+          simdutf::utf8_length_from_utf16(input.begin() + inputPos, input.size() - inputPos);
+      break;
+    }
+
+    if (result.error == simdutf::error_code::SURROGATE) {
+      // Calculate length for the valid portion before the error with SIMD
+      if (result.count > 0) {
+        utf8Length += simdutf::utf8_length_from_utf16(input.begin() + inputPos, result.count);
+        inputPos += result.count;
+      }
+
+      // Handle the invalid surrogate at inputPos
+      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
+      [[maybe_unused]] char16_t c = input[inputPos];
+      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+                     isTrailSurrogate(input[inputPos + 1])),
+          "Valid surrogate pair should not trigger SURROGATE error");
+
+      // Invalid surrogate = U+FFFD (3 bytes)
+      utf8Length += 3;
+      inputPos++;
+    } else {
+      KJ_FAIL_REQUIRE(
+          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
+    }
+  }
+
+  return utf8Length;
+}
+
+// Encode a single UTF-16 code unit to UTF-8
+inline size_t encodeUtf8CodeUnit(char16_t c, kj::ArrayPtr<char> out) {
+  if (c < 0x80) {
+    KJ_DASSERT(out.size() >= 1);
+    out[0] = static_cast<char>(c);
+    return 1;
+  } else if (c < 0x800) {
+    KJ_DASSERT(out.size() >= 2);
+    out[0] = static_cast<char>(0xC0 | (c >> 6));
+    out[1] = static_cast<char>(0x80 | (c & 0x3F));
+    return 2;
+  } else {
+    KJ_DASSERT(out.size() >= 3);
+    out[0] = static_cast<char>(0xE0 | (c >> 12));
+    out[1] = static_cast<char>(0x80 | ((c >> 6) & 0x3F));
+    out[2] = static_cast<char>(0x80 | (c & 0x3F));
+    return 3;
+  }
+}
+
+// Convert UTF-16 with potentially invalid surrogates to UTF-8.
+// Invalid surrogates are replaced with U+FFFD.
+// Returns the number of UTF-8 bytes written.
+// Uses SIMD for valid portions and falls back to scalar for invalid surrogates.
+size_t convertInvalidUtf16ToUtf8(kj::ArrayPtr<const char16_t> input, kj::ArrayPtr<char> out) {
+  size_t inputPos = 0;
+  size_t outputPos = 0;
+
+  while (inputPos < input.size()) {
+    // Find the next invalid surrogate using SIMD validation
+    auto result =
+        simdutf::validate_utf16_with_errors(input.begin() + inputPos, input.size() - inputPos);
+
+    if (result.error == simdutf::error_code::SUCCESS) {
+      // Remaining input is valid - convert it all with SIMD
+      outputPos += simdutf::convert_utf16_to_utf8(
+          input.begin() + inputPos, input.size() - inputPos, out.begin() + outputPos);
+      KJ_DASSERT(outputPos <= out.size());
+      break;
+    }
+
+    if (result.error == simdutf::error_code::SURROGATE) {
+      // Convert the valid portion before the error with SIMD
+      if (result.count > 0) {
+        outputPos += simdutf::convert_valid_utf16_to_utf8(
+            input.begin() + inputPos, result.count, out.begin() + outputPos);
+        KJ_DASSERT(outputPos <= out.size());
+        inputPos += result.count;
+      }
+
+      // Handle the invalid surrogate at inputPos
+      // SURROGATE error means unpaired surrogate, so valid pair should be impossible
+      [[maybe_unused]] char16_t c = input[inputPos];
+      KJ_DASSERT(!(isLeadSurrogate(c) && inputPos + 1 < input.size() &&
+                     isTrailSurrogate(input[inputPos + 1])),
+          "Valid surrogate pair should not trigger SURROGATE error");
+
+      // Invalid surrogate - replace with U+FFFD (3 bytes)
+      outputPos += encodeUtf8CodeUnit(0xFFFD, out.slice(outputPos, out.size()));
+      KJ_DASSERT(outputPos <= out.size());
+      inputPos++;
+    } else {
+      KJ_FAIL_REQUIRE(
+          "Unexpected UTF-16 validation error from simdutf", static_cast<int>(result.error));
+    }
+  }
+
+  return outputPos;
+}
+
+}  // namespace
+
 jsg::Ref<TextEncoder> TextEncoder::constructor(jsg::Lock& js) {
   return js.alloc<TextEncoder>();
 }
 
+jsg::JsUint8Array TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
+  jsg::JsString str = input.orDefault(js.str());
+  std::shared_ptr<v8::BackingStore> backingStore;
+  size_t utf8_length = 0;
+  auto length = str.length(js);
+
+  // Fast path: check if string is one-byte before creating ValueView
+  if (str.isOneByte(js)) {
+    // Use off-heap allocation for intermediate Latin-1 buffer to avoid wasting V8 heap space
+    // and potentially triggering GC. Stack allocation for small strings, heap for large.
+    kj::SmallArray<kj::byte, 4096> latin1Buffer(length);
+
+    [[maybe_unused]] auto writeResult = str.writeInto(js, latin1Buffer.asPtr());
+    KJ_DASSERT(
+        writeResult.written == length, "writeInto must completely overwrite the backing buffer");
+
+    utf8_length = simdutf::utf8_length_from_latin1(
+        reinterpret_cast<const char*>(latin1Buffer.begin()), length);
+
+    if (utf8_length == length) {
+      // ASCII fast path: no conversion needed, Latin-1 is same as UTF-8 for ASCII
+      // Allocate final on-heap buffer and copy
+      backingStore = js.allocBackingStore(length, jsg::Lock::AllocOption::UNINITIALIZED);
+      memcpy(backingStore->Data(), latin1Buffer.begin(), length);
+      auto array = v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, length);
+      return jsg::JsUint8Array(array);
+    }
+
+    KJ_DASSERT(utf8_length > length);
+
+    // Need to convert Latin-1 to UTF-8
+    backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+    [[maybe_unused]] auto written =
+        simdutf::convert_latin1_to_utf8(reinterpret_cast<const char*>(latin1Buffer.begin()), length,
+            reinterpret_cast<char*>(backingStore->Data()));
+    KJ_DASSERT(utf8_length == written);
+    auto array =
+        v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+    return jsg::JsUint8Array(array);
+  }
+
+  // Two-byte string path
+  // Use off-heap allocation for intermediate UTF-16 buffer to avoid triggering GC.
+  // Stack allocation for small strings, heap for large.
+  kj::SmallArray<uint16_t, 4096> utf16Buffer(length);
+
+  // Note: writeInto() doesn't flatten the string - it calls writeTo() which chains through
+  // Write2 -> WriteV2 -> WriteHelperV2 -> String::WriteToFlat (written by Erik in 2008).
+  // This means we may read from multiple string segments, but that's fine for our use case.
+  [[maybe_unused]] auto writeResult = str.writeInto(js, utf16Buffer.asPtr());
+  KJ_DASSERT(
+      writeResult.written == length, "writeInto must completely overwrite the backing buffer");
+
+  auto data = reinterpret_cast<char16_t*>(utf16Buffer.begin());
+  utf8_length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+
+  if (!simdutf::validate_utf16(data, length)) {
+    simdutf::to_well_formed_utf16(data, length, data);
+  }
+
+  backingStore = js.allocBackingStore(utf8_length, jsg::Lock::AllocOption::UNINITIALIZED);
+  [[maybe_unused]] auto written = simdutf::convert_valid_utf16_to_utf8(
+      data, length, reinterpret_cast<char*>(backingStore->Data()));
+
+  auto array =
+      v8::Uint8Array::New(v8::ArrayBuffer::New(js.v8Isolate, backingStore), 0, utf8_length);
+  return jsg::JsUint8Array(array);
+}
+
 namespace {
-TextEncoder::EncodeIntoResult encodeIntoImpl(
-    jsg::Lock& js, jsg::JsString input, jsg::BufferSource& buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr().asChars(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
-  return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
-  };
+
+// Find how many Latin-1 characters fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity
+// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
+template <bool ReturnLength = false>
+std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitLatin1(
+    const char* data, size_t length, size_t bufferSize) {
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = kj::min(remaining, CHUNK);
+    size_t chunkUtf8Len = simdutf::utf8_length_from_latin1(data + pos, chunkSize);
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // Chunk would overflow - binary search within chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        size_t midUtf8Length = simdutf::utf8_length_from_latin1(data + pos, mid);
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = mid;
+          left = mid + 1;
+        } else {
+          right = mid - 1;
+        }
+      }
+
+      if constexpr (ReturnLength) {
+        size_t finalPos = pos + bestFit;
+        size_t finalUtf8Len =
+            utf8Accumulated + simdutf::utf8_length_from_latin1(data + pos, bestFit);
+        return {finalPos, finalUtf8Len};
+      } else {
+        return pos + bestFit;
+      }
+    }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
+  }
+
+  if constexpr (ReturnLength) {
+    return {pos, utf8Accumulated};
+  } else {
+    return pos;
+  }
 }
-}  // namespace
 
-jsg::BufferSource TextEncoder::encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input) {
-  auto str = input.orDefault(js.str());
-  auto view = JSG_REQUIRE_NONNULL(jsg::BufferSource::tryAlloc(js, str.utf8Length(js)), RangeError,
-      "Cannot allocate space for TextEncoder.encode");
-  [[maybe_unused]] auto result = encodeIntoImpl(js, str, view);
-  KJ_DASSERT(result.written == view.size());
-  return kj::mv(view);
+// Find how many UTF-16 code units fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
+// Template parameter ReturnLength controls whether to return just position or (position, utf8_length)
+template <bool ReturnLength = false>
+std::conditional_t<ReturnLength, std::pair<size_t, size_t>, size_t> findBestFitUtf16(
+    const char16_t* data, size_t length, size_t bufferSize) {
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
+
+    if (chunkSize == 0) {
+      chunkSize = (remaining >= 2) ? 2 : remaining;
+    }
+
+    size_t chunkUtf8Len = simdutf::utf8_length_from_utf16(data + pos, chunkSize);
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // Chunk would overflow - binary search within chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
+
+        if (adjustedMid == 0) {
+          right = 0;
+          break;
+        }
+
+        size_t midUtf8Length = simdutf::utf8_length_from_utf16(data + pos, adjustedMid);
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = adjustedMid;
+          left = adjustedMid + 1;
+        } else {
+          right = adjustedMid - 1;
+        }
+      }
+
+      if constexpr (ReturnLength) {
+        size_t finalPos = pos + bestFit;
+        size_t finalUtf8Len =
+            utf8Accumulated + simdutf::utf8_length_from_utf16(data + pos, bestFit);
+        return {finalPos, finalUtf8Len};
+      } else {
+        return pos + bestFit;
+      }
+    }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
+  }
+
+  if constexpr (ReturnLength) {
+    return {pos, utf8Accumulated};
+  } else {
+    return pos;
+  }
+}
+
+// Find how many UTF-16 code units with invalid surrogates fit when converted to UTF-8
+// Uses chunked forward scan with SIMD, O(result) complexity. Never splits surrogate pairs.
+// Unpaired surrogates replaced with U+FFFD.
+size_t findBestFitInvalidUtf16(const char16_t* data, size_t length, size_t bufferSize) {
+  size_t pos = 0;
+  size_t utf8Accumulated = 0;
+  constexpr size_t CHUNK = 256;
+
+  while (pos < length) {
+    size_t remaining = length - pos;
+    size_t chunkSize = simdutf::trim_partial_utf16(data + pos, kj::min(remaining, CHUNK));
+
+    if (chunkSize == 0) {
+      chunkSize = (remaining >= 2) ? 2 : remaining;
+    }
+
+    size_t chunkUtf8Len = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, chunkSize));
+
+    if (utf8Accumulated + chunkUtf8Len > bufferSize) {
+      // Chunk would overflow - binary search within chunk
+      size_t left = 0;
+      size_t right = chunkSize;
+      size_t bestFit = 0;
+
+      while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+        if (mid == 0) break;
+
+        size_t adjustedMid = simdutf::trim_partial_utf16(data + pos, mid);
+
+        if (adjustedMid == 0) {
+          right = 0;
+          break;
+        }
+
+        size_t midUtf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data + pos, adjustedMid));
+        if (utf8Accumulated + midUtf8Length <= bufferSize) {
+          bestFit = adjustedMid;
+          left = adjustedMid + 1;
+        } else {
+          right = adjustedMid - 1;
+        }
+      }
+
+      return pos + bestFit;
+    }
+
+    utf8Accumulated += chunkUtf8Len;
+    pos += chunkSize;
+  }
+
+  return pos;
 }
 
+}  // namespace
+
 TextEncoder::EncodeIntoResult TextEncoder::encodeInto(
     jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer) {
-  auto result = input.writeInto(
-      js, buffer.asArrayPtr<char>(), jsg::JsString::WriteFlags::REPLACE_INVALID_UTF8);
+  auto outputBuf = buffer.asArrayPtr<char>();
+  size_t bufferSize = outputBuf.size();
+
+  v8::String::ValueView view(js.v8Isolate, input);
+  uint32_t length = view.length();
+
+  if (view.is_one_byte()) {
+    // Latin-1 path: characters 0x00-0x7F encode as 1 UTF-8 byte, 0x80-0xFF as 2 bytes
+    auto data = reinterpret_cast<const char*>(view.data8());
+
+    // Latin-1 encoding strategy: three zones based on input size vs buffer capacity
+    //
+    // For Latin-1: ASCII chars (0x00-0x7F) → 1 byte, extended chars (0x80-0xFF) → 2 bytes
+    // Worst-case expansion: 2x, Best-case: 1x (pure ASCII), Typical mixed: ~1.2-1.5x
+    //
+    // Zone 1: "Definitely doesn't fit" (length > bufferSize * 2)
+    //   Even if all ASCII (best case 1:1), string won't fit. Go straight to incremental mode.
+    //   Uses forward scan without length calculation for maximum efficiency.
+    //   Example: 1M chars, 400k buffer → can't possibly fit, scan to find cutoff point
+    //
+    // Zone 2: "Definitely fits" (length * 2 <= bufferSize)
+    //   Even if all extended Latin-1 (worst case 1:2), string will fit. Convert directly.
+    //   Example: 100k chars, 250k buffer → worst case 200k bytes, guaranteed to fit
+    //
+    // Zone 3: "Maybe fits" (bufferSize < length * 2 AND length <= bufferSize * 2)
+    //   Might fit depending on ASCII/extended ratio. Use forward scan with length calculation.
+    //   Avoids redundant work: scanning once gets us both position and UTF-8 length.
+    //   Example: 600k chars, 700k buffer → fits if mostly ASCII, doesn't if mixed
+    //
+    // Threshold selection (bufferSize * 2):
+    //   - Chosen based on worst-case Latin-1 expansion of 2x
+    //   - Optimized for common case: small buffer relative to input (SSR, streaming)
+    //   - Trade-off: Zone 3 still does forward scan, but with length calculation overhead
+    //   - Performance cliff exists for borderline cases (e.g., 1M chars, 500k buffer falls
+    //     into Zone 3), but forward scan with length is still reasonably efficient
+    //
+    // Future optimization: Could use sampling to estimate ASCII ratio and choose zone
+    // dynamically, but adds complexity for marginal benefit in typical workloads.
+
+    if (length > bufferSize * 2) {
+      // Zone 1: Incremental mode - forward scan to find what fits, then convert
+      size_t read = findBestFitLatin1(data, length, bufferSize);
+      size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(read),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    if (length * 2 <= bufferSize) {
+      // Zone 2: Fast path - worst-case (2x) definitely fits, convert directly
+      size_t written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
+    auto [read, utf8Length] = findBestFitLatin1<true>(data, length, bufferSize);
+
+    // Check if everything fit
+    if (read == length) {
+      // ASCII fast path: utf8Length == length means all chars are ASCII, no conversion needed
+      if (utf8Length == length) {
+        memcpy(outputBuf.begin(), data, length);
+        return TextEncoder::EncodeIntoResult{
+          .read = static_cast<int>(length),
+          .written = static_cast<int>(length),
+        };
+      }
+
+      auto written = simdutf::convert_latin1_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Partial fit: convert only what fits
+    size_t written = simdutf::convert_latin1_to_utf8(data, read, outputBuf.begin());
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(read),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // UTF-16 path: validate to ensure spec compliance (replace invalid surrogates with U+FFFD)
+  auto data = reinterpret_cast<const char16_t*>(view.data16());
+
+  if (simdutf::validate_utf16(data, length)) {
+    // Valid UTF-16: use fast SIMD conversion
+    //
+    // UTF-16 to UTF-8 encoding: variable expansion based on code point ranges
+    //   U+0000-U+007F (ASCII):           1 byte   (rare in two-byte strings)
+    //   U+0080-U+07FF:                    2 bytes  (most common)
+    //   U+0800-U+FFFF (BMP):             3 bytes  (common: CJK, etc.)
+    //   U+10000-U+10FFFF (surrogate pairs): 4 bytes (less common: emoji, etc.)
+    // Worst-case: 3 bytes per code unit (BMP chars), Typical: ~2-3 bytes per code unit
+    //
+    // Zone 1: "Definitely doesn't fit" (length > bufferSize)
+    //   Conservative threshold: even if all ASCII (impossible for two-byte strings), won't fit.
+    //   This differs from Latin-1 (bufferSize * 2) due to different typical expansion patterns.
+    //   Example: 1M code units, 900k buffer → can't fit, use incremental mode
+    //
+    // Zone 2: "Definitely fits" (length * 3 <= bufferSize)
+    //   Even if all BMP characters (worst case 1:3), string will fit. Convert directly.
+    //   Example: 200k code units, 700k buffer → worst case 600k bytes, guaranteed to fit
+    //
+    // Zone 3: "Maybe fits" (bufferSize < length * 3 AND length <= bufferSize)
+    //   Might fit depending on character distribution. Use forward scan with length calculation.
+    //   Example: 300k code units, 800k buffer → fits if mostly 2-byte chars, doesn't if BMP
+    //
+    // Threshold selection (bufferSize vs bufferSize * 3):
+    //   - Zone 1 threshold (length > bufferSize) is conservative: even 1:1 ratio won't fit
+    //   - More aggressive than Latin-1 because UTF-16 typical expansion is higher (~2-3x)
+    //   - Zone 3 (maybe fits) is large: from bufferSize to bufferSize * 3
+    //   - Optimized for common case where UTF-16 strings are mostly 2-3 byte encodings
+    //   - Performance cliff: Zone 3 still uses forward scan with length calculation overhead
+
+    if (length > bufferSize) {
+      // Zone 1: Incremental mode - forward scan to find what fits, then convert
+      size_t read = findBestFitUtf16(data, length, bufferSize);
+      size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(read),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    if (length * 3 <= bufferSize) {
+      // Zone 2: Fast path - worst-case (3x) definitely fits, convert directly
+      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Zone 3: "Maybe fits" - use forward scan with length calculation to avoid double-scan
+    auto [read, utf8Length] = findBestFitUtf16<true>(data, length, bufferSize);
+
+    if (read == length) {
+      // Everything fit: convert entire string with SIMD
+      size_t written = simdutf::convert_utf16_to_utf8(data, length, outputBuf.begin());
+      return TextEncoder::EncodeIntoResult{
+        .read = static_cast<int>(length),
+        .written = static_cast<int>(written),
+      };
+    }
+
+    // Partial fit: convert only what fits
+    size_t written = simdutf::convert_utf16_to_utf8(data, read, outputBuf.begin());
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(read),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Invalid UTF-16: convert directly to UTF-8, replacing unpaired surrogates with U+FFFD
+
+  // Incremental mode: buffer much smaller than input, skip "whole string fits" checks
+  if (length > bufferSize) {
+    size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(read),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Fast path: worst-case (3 bytes per UTF-16 code unit) fits
+  if (length * 3 <= bufferSize) {
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(length),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Slow path: calculate exact UTF-8 length
+  size_t utf8Length = utf8LengthFromInvalidUtf16(kj::arrayPtr(data, length));
+  if (utf8Length <= bufferSize) {
+    size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, length), outputBuf);
+    return TextEncoder::EncodeIntoResult{
+      .read = static_cast<int>(length),
+      .written = static_cast<int>(written),
+    };
+  }
+
+  // Doesn't fit: forward scan to find what does
+  size_t read = findBestFitInvalidUtf16(data, length, bufferSize);
+  size_t written = convertInvalidUtf16ToUtf8(kj::arrayPtr(data, read), outputBuf);
   return TextEncoder::EncodeIntoResult{
-    .read = static_cast<int>(result.read),
-    .written = static_cast<int>(result.written),
+    .read = static_cast<int>(read),
+    .written = static_cast<int>(written),
   };
 }
 
diff --git a/src/workerd/api/encoding.h b/src/workerd/api/encoding.h
index 81d6899e5ce..c2425c4c153 100644
--- a/src/workerd/api/encoding.h
+++ b/src/workerd/api/encoding.h
@@ -216,7 +216,7 @@ class TextEncoder final: public jsg::Object {
 
   static jsg::Ref<TextEncoder> constructor(jsg::Lock& js);
 
-  jsg::BufferSource encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
+  jsg::JsUint8Array encode(jsg::Lock& js, jsg::Optional<jsg::JsString> input);
 
   EncodeIntoResult encodeInto(jsg::Lock& js, jsg::JsString input, jsg::JsUint8Array buffer);
 
@@ -234,11 +234,7 @@ class TextEncoder final: public jsg::Object {
       JSG_READONLY_INSTANCE_PROPERTY(encoding, getEncoding);
     }
 
-    // `encode()` returns `jsg::BufferSource`, which may be an `ArrayBuffer` or `ArrayBufferView`,
-    // but the implementation uses `jsg::BufferSource::tryAlloc()` which always tries to allocate a
-    // `Uint8Array`. The spec defines that this function returns a `Uint8Array` too.
     JSG_TS_OVERRIDE({
-      encode(input?: string): Uint8Array;
       encodeInto(input: string, buffer: Uint8Array): TextEncoderEncodeIntoResult;
     });
   }
diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h
index f2e8351de97..b15ebef25f3 100644
--- a/src/workerd/jsg/buffersource.h
+++ b/src/workerd/jsg/buffersource.h
@@ -102,9 +102,10 @@ class BackingStore {
 
   // Creates a new BackingStore of the given size.
   template <BufferSourceType T = v8::Uint8Array>
-  static BackingStore alloc(Lock& js, size_t size) {
-    return BackingStore(js.allocBackingStore(size), size, 0, getBufferSourceElementSize<T>(),
-        construct<T>, checkIsIntegerType<T>());
+  static BackingStore alloc(
+      Lock& js, size_t size, Lock::AllocOption init_mode = Lock::AllocOption::ZERO_INITIALIZED) {
+    return BackingStore(js.allocBackingStore(size, init_mode), size, 0,
+        getBufferSourceElementSize<T>(), construct<T>, checkIsIntegerType<T>());
   }
 
   using Disposer = void(void*, size_t, void*);
diff --git a/src/workerd/jsg/jsg.h b/src/workerd/jsg/jsg.h
index 4af60a50871..e6e23da08f2 100644
--- a/src/workerd/jsg/jsg.h
+++ b/src/workerd/jsg/jsg.h
@@ -2766,6 +2766,14 @@ class Lock {
 
   // Utility method to safely allocate a v8::BackingStore with allocation failure handling.
   // Throws a javascript error if allocation fails.
+  //
+  // IMPORTANT: This method can trigger garbage collection, which may move or invalidate V8
+  // objects. Do NOT call this method while:
+  // - A v8::String::ValueView is alive (it holds internal V8 heap locks)
+  // - You have raw pointers to V8 heap data (e.g., from view.data8(), view.data16())
+  //
+  // Safe pattern: Copy V8 string data to off-heap memory FIRST (e.g., via JsString::writeInto()
+  // into kj::SmallArray), THEN call allocBackingStore(). See TextEncoder::encode() for example.
   std::unique_ptr<v8::BackingStore> allocBackingStore(
       size_t size, AllocOption init_mode = AllocOption::ZERO_INITIALIZED) KJ_WARN_UNUSED_RESULT;
 
diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h
index 23f94a7ac36..36c7a311f4c 100644
--- a/src/workerd/jsg/jsvalue.h
+++ b/src/workerd/jsg/jsvalue.h
@@ -280,6 +280,7 @@ class JsString final: public JsBase<v8::String, JsString> {
   int hashCode() const;
 
   bool isFlat() const;
+  bool isOneByte(Lock& js) const KJ_WARN_UNUSED_RESULT;
   bool containsOnlyOneByte() const;
 
   bool operator==(const JsString& other) const;
@@ -307,6 +308,12 @@ class JsString final: public JsBase<v8::String, JsString> {
     // The number of elements (e.g. char, byte, uint16_t) written to the buffer.
     size_t written;
   };
+
+  // Copy string contents into a provided buffer (off-heap memory).
+  //
+  // IMPORTANT: This method does NOT flatten the V8 string or hold V8 heap locks. It safely
+  // copies data out of V8's heap into your buffer. This makes it safe to use before calling
+  // GC-triggering operations like Lock::allocBackingStore().
   WriteIntoStatus writeInto(
       Lock& js, kj::ArrayPtr<char> buffer, WriteFlags options = WriteFlags::NONE) const;
   WriteIntoStatus writeInto(
@@ -988,6 +995,10 @@ inline int JsString::length(jsg::Lock& js) const {
   return inner->Length();
 }
 
+inline bool JsString::isOneByte(jsg::Lock& js) const {
+  return inner->IsOneByte();
+}
+
 inline size_t JsString::utf8Length(jsg::Lock& js) const {
   return inner->Utf8LengthV2(js.v8Isolate);
 }