jquast
diff --git a/‎bin/update-tables.py‎
Lines changed: 281 additions & 102 deletions b/‎bin/update-tables.py‎
Lines changed: 281 additions & 102 deletions
diff --git a/‎docs/intro.rst‎
Lines changed: 4 additions & 0 deletions b/‎docs/intro.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/specs.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs/specs.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/unicode_version.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/unicode_version.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/emoji-variation-sequences.txt‎
Lines changed: 757 additions & 0 deletions b/‎tests/emoji-variation-sequences.txt‎
Lines changed: 757 additions & 0 deletions
diff --git a/‎tests/emoji-zwj-sequences.txt‎
Lines changed: 1529 additions & 0 deletions b/‎tests/emoji-zwj-sequences.txt‎
Lines changed: 1529 additions & 0 deletions
diff --git a/‎tests/test_core.py‎
Lines changed: 0 additions & 100 deletions b/‎tests/test_core.py‎
Lines changed: 0 additions & 100 deletions
diff --git a/‎tests/test_emoji_zwj.py‎
Lines changed: 193 additions & 0 deletions b/‎tests/test_emoji_zwj.py‎
Lines changed: 193 additions & 0 deletions
diff --git a/‎tox.ini‎
Lines changed: 1 addition & 1 deletion b/‎tox.ini‎
Lines changed: 1 addition & 1 deletion
@@ -216,6 +216,10 @@ Other Languages
 =======
 History
 =======
+0.2.10 *2023-11-08*
+  * **Bugfix** account for Wide characters in wcswidth
+    when combined with U+FE0F Variation Selector 16 (`PR #XX`)
+
 0.2.9 *2023-10-30*
   * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
     Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).
 
@@ -52,3 +52,6 @@ Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``).
 
 Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is
 present in comment of unicode data file, aprox. 3 characters.
+
+Any character in sequence with U+FE0F (Variation Selector 16) defined by
+Emoji Variation Sequences txt as ``emoji style``.
@@ -121,3 +121,9 @@ release files:
 ``EastAsianWidth-15.1.0.txt``
   *Date: 2023-07-28, 23:34:08 GMT*
 
+``emoji-variation-sequences-12.0.0.txt``
+  *Date: 2019-01-15, 12:10:05 GMT*
+
+``emoji-variation-sequences-15.1.0.txt``
+  *Date: 2023-02-01, 02:22:54 GMT*
+
@@ -44,7 +44,7 @@ def main():
     setuptools.setup(
         name='wcwidth',
         # NOTE: manually manage __version__ in wcwidth/__init__.py !
-        version='0.2.9',
+        version='0.2.10',
         description=(
             "Measures the displayed width of unicode strings in a terminal"),
         long_description=codecs.open(
 
@@ -10,9 +10,6 @@
 # local
 import wcwidth
 
-# 3rd party
-import pytest
-
 # some tests cannot be done on some builds of python, where the internal
 # unicode structure is limited to 0x10000 for memory conservation,
 # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
@@ -22,12 +19,6 @@
 except NameError:
     # python 3
     unichr = chr
-try:
-    unichr(0x2fffe)
-    NARROW_ONLY = False
-except ValueError:
-    NARROW_ONLY = True
-
 
 def test_package_version():
     """wcwidth.__version__ is expected value."""
@@ -254,97 +245,6 @@ def test_kr_jamo_filler():
     assert length_phrase == expect_length_phrase
 
 
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def emoji_zwj_sequence():
-    u"""
-    Emoji zwj sequence of four codepoints is just 2 cells.
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
-    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
-    expect_length_each = (2, 0, 0, 2)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_unfinished_zwj_sequence():
-    u"""
-    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-    expect_length_each = (2, 0, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_non_recommended_zwj_sequence():
-    """
-    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
-    """
-    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
-              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
-    expect_length_each = (2, 0, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
-
-@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
-def test_longer_emoji_zwj_sequence():
-    """
-    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
-    """
-    # 'Category Code', 'East Asian Width property' -- 'description'
-    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
-              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
-              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
-              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
-              u"\U0001F9D1"   # 'So', 'W' -- ADULT
-              u"\U0001F3FD")  # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
-
-    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
-    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
-    expect_length_phrase = 2
-
-    # exercise,
-    length_each = tuple(map(wcwidth.wcwidth, phrase))
-    length_phrase = wcwidth.wcswidth(phrase)
-
-    # verify.
-    assert length_each == expect_length_each
-    assert length_phrase == expect_length_phrase
-
 
 def test_devanagari_script():
     """
 
@@ -0,0 +1,193 @@
+import os
+
+import pytest
+
+try:
+    # python 2
+    _ = unichr
+except NameError:
+    # python 3
+    unichr = chr
+try:
+    unichr(0x2fffe)
+    NARROW_ONLY = False
+except ValueError:
+    NARROW_ONLY = True
+
+import wcwidth
+
+def make_sequence_from_line(line):
+    # convert '002A FE0F  ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
+    return ''.join(chr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def emoji_zwj_sequence():
+    u"""
+    Emoji zwj sequence of four codepoints is just 2 cells.
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    expect_length_each = (2, 0, 0, 2)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_unfinished_zwj_sequence():
+    u"""
+    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+    expect_length_each = (2, 0, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_non_recommended_zwj_sequence():
+    """
+    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
+    """
+    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
+              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
+    expect_length_each = (2, 0, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def test_another_emoji_zwj_sequence():
+    phrase = (
+        u"\u26F9"        # PERSON WITH BALL
+        u"\U0001F3FB"    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+        u"\u200D"        # ZERO WIDTH JOINER
+        u"\u2640"        # FEMALE SIGN
+        u"\uFE0F")       # VARIATION SELECTOR-16
+    expect_length_each = (1, 0, 0, 1, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_longer_emoji_zwj_sequence():
+    """
+    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
+    """
+    # 'Category Code', 'East Asian Width property' -- 'description'
+    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
+              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
+              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
+              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
+              u"\U0001F9D1"   # 'So', 'W' -- ADULT
+              u"\U0001F3FD")  # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
+
+    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
+    expect_length_phrase = 2
+
+    # exercise,
+    length_each = tuple(map(wcwidth.wcwidth, phrase))
+    length_phrase = wcwidth.wcswidth(phrase)
+
+    # verify.
+    assert length_each == expect_length_each
+    assert length_phrase == expect_length_phrase
+
+
+def read_sequences_from_file(filename):
+    with open(os.path.join(os.path.dirname(__file__), filename)) as f:
+        lines = [line.strip()
+                 for line in f.readlines()
+                 if not line.startswith('#') and line.strip()]
+    sequences = [make_sequence_from_line(line) for line in lines]
+    return lines, sequences
+
+
+def test_recommended_emoji_zwj_sequences():
+    """
+    Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
+    """
+    # given,
+    lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
+
+    errors = []
+    # Exercise, track by zipping with original text file line, a debugging aide
+    num = 0
+    for sequence, line in zip(sequences, lines):
+        num += 1
+        measured_width = wcwidth.wcswidth(sequence)
+        if measured_width != 2:
+            errors.append({
+                'expected_width': 2,
+                'line': line,
+                'measured_width': measured_width,
+                'sequence': sequence,
+            })
+
+    # verify
+    assert errors == []
+    assert num > 1000, num
+
+
+def test_recommended_variation_16_sequences():
+    """
+    Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
+    """
+    # given,
+    lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
+
+    errors = []
+    num = 0
+    for sequence, line in zip(sequences, lines):
+        num += 1
+        if '\ufe0f' not in sequence:
+            # filter for only \uFE0F (VS-16)
+            continue
+        measured_width = wcwidth.wcswidth(sequence)
+        if measured_width != 2:
+            errors.append({
+                'expected_width': 2,
+                'line': line,
+                'measured_width': wcwidth.wcswidth(sequence),
+                'sequence': sequence,
+            })
+
+    # verify
+    assert errors == []
+    assert num > 1000, num
@@ -123,7 +123,7 @@ deps = -r requirements-tests37.in
 basepython = python3.12
 usedevelop = true
 deps = -r requirements-update.txt
-commands = python {toxinidir}/bin/update-tables.py {posargs}
+commands = python {toxinidir}/bin/update-tables.py {posargs:--no-check-last-modified}
 
 [testenv:autopep8]
 basepython = python3.11