Fix diffSentences for old Safari versions (#616)

ExplodingCabbage · web-flow · commit 2717fd85b49d · 2025-05-22T14:36:23.000+01:00
* Fix diffSentences for old Safari versions Resolves #615 * Add release notes
diff --git a/release-notes.md b/release-notes.md
@@ -1,5 +1,10 @@
 # Release Notes
 
+## 8.0.2
+
+- [#616](https://github.com/kpdecker/jsdiff/pull/616) **Restored compatibility of `diffSentences` with old Safari versions.** This was broken in 8.0.0 by the introduction of a regex with a [lookbehind assertion](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Lookbehind_assertion); these weren't supported in Safari prior to version 16.4.
+- [#612](https://github.com/kpdecker/jsdiff/pull/612) **Improved tree shakeability** by marking the built CJS and ESM packages with `sideEffects: false`.
+
 ## 8.0.1
 
 - [#610](https://github.com/kpdecker/jsdiff/pull/610) **Fixes types for `diffJson` which were broken by 8.0.0**. The new bundled types in 8.0.0 only allowed `diffJson` to be passed string arguments, but it should've been possible to pass either strings or objects (and now is). Thanks to Josh Kelley for the fix.
diff --git a/src/diff/sentence.ts b/src/diff/sentence.ts
@@ -1,9 +1,53 @@
 import Diff from './base.js';
-import type { ChangeObject, CallbackOptionAbortable, CallbackOptionNonabortable, DiffCallbackNonabortable, DiffSentencesOptionsAbortable, DiffSentencesOptionsNonabortable} from '../types.js';
+import type {
+  ChangeObject,
+  CallbackOptionAbortable,
+  CallbackOptionNonabortable,
+  DiffCallbackNonabortable,
+  DiffSentencesOptionsAbortable,
+  DiffSentencesOptionsNonabortable
+} from '../types.js';
+
+function isSentenceEndPunct(char: string) {
+  return char == '.' || char == '!' || char == '?';
+}
 
 class SentenceDiff extends Diff<string, string> {
   tokenize(value: string) {
-    return value.split(/(?<=[.!?])(\s+|$)/);
+    // If in future we drop support for environments that don't support lookbehinds, we can replace
+    // this entire function with:
+    //     return value.split(/(?<=[.!?])(\s+|$)/);
+    // but until then, for similar reasons to the trailingWs function in string.ts, we are forced
+    // to do this verbosely "by hand" instead of using a regex.
+    const result = [];
+    let tokenStartI = 0;
+    for (let i = 0; i < value.length; i++) {
+      if (i == value.length - 1) {
+        result.push(value.slice(tokenStartI));
+        break;
+      }
+
+      if (isSentenceEndPunct(value[i]) && value[i + 1].match(/\s/)) {
+        // We've hit a sentence break - i.e. a punctuation mark followed by whitespace.
+        // We now want to push TWO tokens to the result:
+        // 1. the sentence
+        result.push(value.slice(tokenStartI, i + 1));
+
+        // 2. the whitespace
+        i = tokenStartI = i + 1;
+        while (value[i + 1]?.match(/\s/)) {
+          i++;
+        }
+        result.push(value.slice(tokenStartI, i + 1));
+
+        // Then the next token (a sentence) starts on the character after the whitespace.
+        // (It's okay if this is off the end of the string - then the outer loop will terminate
+        // here anyway.)
+        tokenStartI = i + 1;
+      }
+    }
+
+    return result;
   }
 }