|
| 1 | +import os |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +try: |
| 6 | + # python 2 |
| 7 | + _ = unichr |
| 8 | +except NameError: |
| 9 | + # python 3 |
| 10 | + unichr = chr |
| 11 | +try: |
| 12 | + unichr(0x2fffe) |
| 13 | + NARROW_ONLY = False |
| 14 | +except ValueError: |
| 15 | + NARROW_ONLY = True |
| 16 | + |
| 17 | +import wcwidth |
| 18 | + |
| 19 | +def make_sequence_from_line(line): |
| 20 | + # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) |
| 21 | + return ''.join(chr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) |
| 22 | + |
| 23 | +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") |
| 24 | +def emoji_zwj_sequence(): |
| 25 | + u""" |
| 26 | + Emoji zwj sequence of four codepoints is just 2 cells. |
| 27 | + """ |
| 28 | + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN |
| 29 | + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 |
| 30 | + u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER |
| 31 | + u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER |
| 32 | + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf |
| 33 | + expect_length_each = (2, 0, 0, 2) |
| 34 | + expect_length_phrase = 2 |
| 35 | + |
| 36 | + # exercise, |
| 37 | + length_each = tuple(map(wcwidth.wcwidth, phrase)) |
| 38 | + length_phrase = wcwidth.wcswidth(phrase) |
| 39 | + |
| 40 | + # verify. |
| 41 | + assert length_each == expect_length_each |
| 42 | + assert length_phrase == expect_length_phrase |
| 43 | + |
| 44 | +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") |
| 45 | +def test_unfinished_zwj_sequence(): |
| 46 | + u""" |
| 47 | + Ensure index-out-of-bounds does not occur for zero-width joiner without any following character |
| 48 | + """ |
| 49 | + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN |
| 50 | + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 |
| 51 | + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER |
| 52 | + expect_length_each = (2, 0, 0) |
| 53 | + expect_length_phrase = 2 |
| 54 | + |
| 55 | + # exercise, |
| 56 | + length_each = tuple(map(wcwidth.wcwidth, phrase)) |
| 57 | + length_phrase = wcwidth.wcswidth(phrase) |
| 58 | + |
| 59 | + # verify. |
| 60 | + assert length_each == expect_length_each |
| 61 | + assert length_phrase == expect_length_phrase |
| 62 | + |
| 63 | + |
| 64 | +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") |
| 65 | +def test_non_recommended_zwj_sequence(): |
| 66 | + """ |
| 67 | + Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify |
| 68 | + """ |
| 69 | + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN |
| 70 | + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 |
| 71 | + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER |
| 72 | + expect_length_each = (2, 0, 0) |
| 73 | + expect_length_phrase = 2 |
| 74 | + |
| 75 | + # exercise, |
| 76 | + length_each = tuple(map(wcwidth.wcwidth, phrase)) |
| 77 | + length_phrase = wcwidth.wcswidth(phrase) |
| 78 | + |
| 79 | + # verify. |
| 80 | + assert length_each == expect_length_each |
| 81 | + assert length_phrase == expect_length_phrase |
| 82 | + |
| 83 | + |
| 84 | +def test_another_emoji_zwj_sequence(): |
| 85 | + phrase = ( |
| 86 | + u"\u26F9" # PERSON WITH BALL |
| 87 | + u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 |
| 88 | + u"\u200D" # ZERO WIDTH JOINER |
| 89 | + u"\u2640" # FEMALE SIGN |
| 90 | + u"\uFE0F") # VARIATION SELECTOR-16 |
| 91 | + expect_length_each = (1, 0, 0, 1, 0) |
| 92 | + expect_length_phrase = 2 |
| 93 | + |
| 94 | + # exercise, |
| 95 | + length_each = tuple(map(wcwidth.wcwidth, phrase)) |
| 96 | + length_phrase = wcwidth.wcswidth(phrase) |
| 97 | + |
| 98 | + # verify. |
| 99 | + assert length_each == expect_length_each |
| 100 | + assert length_phrase == expect_length_phrase |
| 101 | + |
| 102 | + |
| 103 | +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") |
| 104 | +def test_longer_emoji_zwj_sequence(): |
| 105 | + """ |
| 106 | + A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! |
| 107 | + """ |
| 108 | + # 'Category Code', 'East Asian Width property' -- 'description' |
| 109 | + phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT |
| 110 | + u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 |
| 111 | + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER |
| 112 | + u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART |
| 113 | + u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 |
| 114 | + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER |
| 115 | + u"\U0001F48B" # 'So', 'W' -- KISS MARK |
| 116 | + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER |
| 117 | + u"\U0001F9D1" # 'So', 'W' -- ADULT |
| 118 | + u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 |
| 119 | + |
| 120 | + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf |
| 121 | + expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) |
| 122 | + expect_length_phrase = 2 |
| 123 | + |
| 124 | + # exercise, |
| 125 | + length_each = tuple(map(wcwidth.wcwidth, phrase)) |
| 126 | + length_phrase = wcwidth.wcswidth(phrase) |
| 127 | + |
| 128 | + # verify. |
| 129 | + assert length_each == expect_length_each |
| 130 | + assert length_phrase == expect_length_phrase |
| 131 | + |
| 132 | + |
| 133 | +def read_sequences_from_file(filename): |
| 134 | + with open(os.path.join(os.path.dirname(__file__), filename)) as f: |
| 135 | + lines = [line.strip() |
| 136 | + for line in f.readlines() |
| 137 | + if not line.startswith('#') and line.strip()] |
| 138 | + sequences = [make_sequence_from_line(line) for line in lines] |
| 139 | + return lines, sequences |
| 140 | + |
| 141 | + |
| 142 | +def test_recommended_emoji_zwj_sequences(): |
| 143 | + """ |
| 144 | + Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt |
| 145 | + """ |
| 146 | + # given, |
| 147 | + lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') |
| 148 | + |
| 149 | + errors = [] |
| 150 | + # Exercise, track by zipping with original text file line, a debugging aide |
| 151 | + num = 0 |
| 152 | + for sequence, line in zip(sequences, lines): |
| 153 | + num += 1 |
| 154 | + measured_width = wcwidth.wcswidth(sequence) |
| 155 | + if measured_width != 2: |
| 156 | + errors.append({ |
| 157 | + 'expected_width': 2, |
| 158 | + 'line': line, |
| 159 | + 'measured_width': measured_width, |
| 160 | + 'sequence': sequence, |
| 161 | + }) |
| 162 | + |
| 163 | + # verify |
| 164 | + assert errors == [] |
| 165 | + assert num > 1000, num |
| 166 | + |
| 167 | + |
| 168 | +def test_recommended_variation_16_sequences(): |
| 169 | + """ |
| 170 | + Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt |
| 171 | + """ |
| 172 | + # given, |
| 173 | + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') |
| 174 | + |
| 175 | + errors = [] |
| 176 | + num = 0 |
| 177 | + for sequence, line in zip(sequences, lines): |
| 178 | + num += 1 |
| 179 | + if '\ufe0f' not in sequence: |
| 180 | + # filter for only \uFE0F (VS-16) |
| 181 | + continue |
| 182 | + measured_width = wcwidth.wcswidth(sequence) |
| 183 | + if measured_width != 2: |
| 184 | + errors.append({ |
| 185 | + 'expected_width': 2, |
| 186 | + 'line': line, |
| 187 | + 'measured_width': wcwidth.wcswidth(sequence), |
| 188 | + 'sequence': sequence, |
| 189 | + }) |
| 190 | + |
| 191 | + # verify |
| 192 | + assert errors == [] |
| 193 | + assert num > 1000, num |
0 commit comments