Skip to content

Commit c577a41

Browse files
committed
Bugfix accounting for Variation Selector 16
- Add new table, VS16_NARROW_TO_WIDE. It has only one version, "9.0.0". This defines a set of characters that are otherwise Narrow, like '0', that become wide when combined with U+FE0F, "VARIATION SELECTOR 16". - wcswidth() function now recalls "last-most measured character", and, on U+FE0F, checks that character in table VS16_NARROW_TO_WIDE, and, if matching, adds 1 to the measured width. - The latest list of 'emoji-zwj-sequences.txt' and 'emoji-variation-sequences.txt' are fetched by update-tables.py and placed in 'tests/' folder, and now used by automatic tests in test_emoji_zwj.py, this is helpful to ensure 100% compatibility with all latest known emoji sequences A single "9.0.0" version is used because of ambiguity in legacy releases of unicode data files. So ambiguous that very few terminals get it right, I will share results from 'ucs-detect' project based on this branch next. All in all, U+FE0F appears to be something of a "fixup" that is only for these legacy emojis, many that probably should have always been defined as WIDE. I don't expect any new FE0F sequences to be published. Minor ----- refactor update-tables.py, add 'UnicodeDataFile' class for fetching/storing --- c/bin/update-tables.py +++ i/bin/update-tables.py @@ -2,16 +2,10 @@ """ Update the Unicode code tables for wcwidth. This is code generation using jinja2. -This should be executed through tox, +This is typically executed through tox, $ tox -e update -If data files were previously downloaded, but will refresh by last-modified -check using HEAD request from unicode.org URLs, unless --no-check-last-modified -is used: - - $ tox -e update -- --check-last-modified - https://github.com/jquast/wcwidth """ from __future__ import annotations @@ -21,7 +15,6 @@ import os import re import sys import string -import logging import datetime import functools import unicodedata @@ -29,7 +22,10 @@ from pathlib import Path from dataclasses import field, fields, dataclass from typing import Any, Mapping, Iterable, Iterator, Sequence, Container, Collection -from typing_extensions import Self +try: + from typing import Self +except ImportError: + from typing_extensions import Self # 3rd party import jinja2 @@ -37,13 +33,11 @@ import requests import urllib3.util import dateutil.parser -URL_UNICODE_DERIVED_AGE = 'https://www.unicode.org/Public/UCD/latest/ucd/DerivedAge.txt' -URL_EASTASIAN_WIDTH = 'https://www.unicode.org/Public/{version}/ucd/EastAsianWidth.txt' -URL_DERIVED_CATEGORY = 'https://www.unicode.org/Public/{version}/ucd/extracted/DerivedGeneralCategory.txt' EXCLUDE_VERSIONS = ['2.0.0', '2.1.2', '3.0.0', '3.1.0', '3.2.0', '4.0.0'] PATH_UP = os.path.relpath(os.path.join(os.path.dirname(__file__), os.path.pardir)) PATH_DATA = os.path.join(PATH_UP, 'data') +PATH_TESTS = os.path.join(PATH_UP, 'tests') # "wcwidth/bin/update-tables.py", even on Windows # not really a path, if the git repo isn't named "wcwidth" THIS_FILEPATH = ('wcwidth/' + @@ -59,7 +53,26 @@ FETCH_BLOCKSIZE = int(os.environ.get('FETCH_BLOCKSIZE', '4096')) MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '6')) BACKOFF_FACTOR = float(os.environ.get('BACKOFF_FACTOR', '0.1')) -logger = logging.getLogger(__name__) +def _bisearch(ucs, table): + """ + A copy of wcwwidth._bisearch, to prevent having issues when depending on + code that imports our generated code + """ + lbound = 0 + ubound = len(table) - 1 + + if ucs < table[0][0] or ucs > table[ubound][1]: + return 0 + while ubound >= lbound: + mid = (lbound + ubound) // 2 + if ucs > table[mid][1]: + lbound = mid + 1 + elif ucs < table[mid][0]: + ubound = mid - 1 + else: + return 1 + + return 0 @dataclass(order=True, frozen=True) @@ -67,7 +80,7 @@ class UnicodeVersion: """A class for camparable unicode version.""" major: int minor: int - micro: int + micro: int | None @classmethod def parse(cls, version_str: str) -> UnicodeVersion: @@ -77,14 +90,19 @@ class UnicodeVersion: >>> UnicodeVersion.parse("14.0.0") UnicodeVersion(major=14, minor=0, micro=0) """ - return cls(*map(int, version_str.split(".")[:3])) + ver_ints = tuple(map(int, version_str.split(".")[:3])) + return cls(major=ver_ints[0], minor=ver_ints[1], + micro=ver_ints[2] if len(ver_ints) > 2 else None) def __str__(self) -> str: """ >>> str(UnicodeVersion(12, 1, 0)) '12.1.0' """ - return f'{self.major}.{self.minor}.{self.micro}' + maybe_micro = '' + if self.micro is not None: + maybe_micro = f'.{self.micro}' + return f'{self.major}.{self.minor}{maybe_micro}' @dataclass(frozen=True) @@ -282,11 +300,9 @@ class UnicodeTableRenderDef(RenderDefinition): @functools.cache def fetch_unicode_versions() -> list[UnicodeVersion]: """Fetch, determine, and return Unicode Versions for processing.""" - fname = os.path.join(PATH_DATA, URL_UNICODE_DERIVED_AGE.rsplit('/', 1)[-1]) - do_retrieve(url=URL_UNICODE_DERIVED_AGE, fname=fname) pattern = re.compile(r'#.*assigned in Unicode ([0-9.]+)') versions: list[UnicodeVersion] = [] - with open(fname, encoding='utf-8') as f: + with open(UnicodeDataFile.DerivedAge(), encoding='utf-8') as f: for line in f: if match := re.match(pattern, line): version = match.group(1) @@ -297,75 +313,138 @@ def fetch_unicode_versions() -> list[UnicodeVersion]: def fetch_source_headers() -> UnicodeVersionRstRenderCtx: - # find all filenames with a version number in it, - # sort filenames by name, then dotted number, ascending - pattern = re.compile( - r'^(DerivedGeneralCategory|EastAsianWidth)-(\d+)\.(\d+)\.(\d+)\.txt$') - filename_matches = [] - for fname in os.listdir(PATH_DATA): - if match := re.search(pattern, fname): - filename_matches.append(match) - - filename_matches.sort(key=lambda m: ( - m.group(1), - int(m.group(2)), - int(m.group(3)), - int(m.group(4)), - )) - filenames = [os.path.join(PATH_DATA, match.string) - for match in filename_matches] - headers: list[tuple[str, str]] = [] - for filename in filenames: + for filename in UnicodeDataFile.filenames(): header_description = cite_source_description(filename) headers.append(header_description) return UnicodeVersionRstRenderCtx(headers) def fetch_table_wide_data() -> UnicodeTableRenderCtx: - """Fetch and update east-asian tables.""" + """Fetch east-asian tables.""" table: dict[UnicodeVersion, TableDef] = {} for version in fetch_unicode_versions(): # parse typical 'wide' characters by categories 'W' and 'F', - fname_eaw = os.path.join(PATH_DATA, f'EastAsianWidth-{version}.txt') - do_retrieve(url=URL_EASTASIAN_WIDTH.format(version=version), fname=fname_eaw) - table[version] = parse_category(fname=fname_eaw, category_codes=('W', 'F'), wide=2) - - # subtract(!) wide characters that are defined as 'W' category in EAW, but - # as a zero-width category 'Mn' or 'Mc' in DGC, which is preferred. - fname_dgc = os.path.join(PATH_DATA, f'DerivedGeneralCategory-{version}.txt') - do_retrieve(url=URL_UNICODE_DERIVED_AGE.format(version=version), fname=fname_dgc) - table[version].values.discard(parse_category(fname=fname_dgc, category_codes=('Mn', 'Mc'), wide=0).values) - - # join with some atypical 'wide' characters defined only by category - # 'Sk' in DGC - table[version].values.update(parse_category(fname=fname_dgc, category_codes=('Sk',), wide=2).values) + table[version] = parse_category(fname=UnicodeDataFile.EastAsianWidth(version), + category_codes=('W', 'F'), + wide=2) + + # subtract(!) wide characters that were defined above as 'W' category in EastAsianWidth, + # but also zero-width category 'Mn' or 'Mc' in DerivedGeneralCategory! + table[version].values.discard(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), + category_codes=('Mn', 'Mc'), + wide=0).values) + + # finally, join with atypical 'wide' characters defined by category 'Sk', + table[version].values.update(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), + category_codes=('Sk',), + wide=2).values) return UnicodeTableRenderCtx('WIDE_EASTASIAN', table) def fetch_table_zero_data() -> UnicodeTableRenderCtx: """ - Fetch and update zero width tables. + Fetch zero width tables. See also: https://unicode.org/L2/L2002/02368-default-ignorable.html """ table: dict[UnicodeVersion, TableDef] = {} for version in fetch_unicode_versions(): # Determine values of zero-width character lookup table by the following category codes - fname_dgc = os.path.join(PATH_DATA, f'DerivedGeneralCategory-{version}.txt') - do_retrieve(url=URL_DERIVED_CATEGORY.format(version=version), fname=fname_dgc) - table[version] = parse_category(fname=fname_dgc, category_codes=('Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp', 'Sk'), wide=0) + table[version] = parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), + category_codes=('Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp', 'Sk'), + wide=0) # And, include NULL table[version].values.add(0) return UnicodeTableRenderCtx('ZERO_WIDTH', table) +def fetch_table_vs16_data() -> UnicodeTableRenderCtx: + """ + Fetch and create a "narrow to wide variation-16" lookup table. + + Characters in this table are all narrow, but when combined with a variation + selector-16 (\uFE0F), they become wide, for the given versions of unicode. + + UNICODE_VERSION=9.0.0 or greater is required to enable detection of the effect + of *any* 'variation selector-16' narrow emoji becoming wide. Just two total + files are parsed to create ONE unicode version table supporting all + Unicode versions 9.0.0 and later. + + Because of the ambiguity of versioning of these early emoji data files, which + match unicode releases 8, 9, and 10, these specifications were mostly + implemented only in Terminals supporting Unicode 9.0 or later. + + For that reason, and that these values are not expected to change, + only this single shared table is exported. + + ---- + + One example of versioning descripenancy, where v3.2 became v1.1 ("-" 12.0, "+" 15.1):: + + -2620 FE0F ; Basic_Emoji ; skull and crossbones # 3.2 [1] (☠️) + +2620 FE0F ; emoji style; # (1.1) SKULL AND CROSSBONES + + Or another discrepancy, published in unicode 12.0 as emoji version 5.2, but + missing entirely in the emoji-variation-sequences.txt published with unicode + version 15.1:: + + 26F3 FE0E ; text style; # (5.2) FLAG IN HOLE + + while some terminals display \\u0036\\uFE0F as a wide number one (kitty), + others display as ascii 1 with a no-effect zero-width (iTerm2) and others + have a strange narrow font corruption, I think it is fair to call these + ambiguous, no doubt in part because of these issues, see related + 'ucs-detect' project. + + Note that version 3.2 became 1.1, which would change unicode release of 9.0 + to version 8.0. + """ + table: dict[UnicodeVersion, TableDef] = {} + unicode_latest = fetch_unicode_versions()[-1] + + wide_tables = fetch_table_wide_data().table + unicode_version = UnicodeVersion.parse('9.0.0') + + # parse table formatted by the latest emoji release (developed with + # 15.1.0) and parse a single file for all individual releases + table[unicode_version] = parse_vs16_data(fname=UnicodeDataFile.EmojiVariationSequences(unicode_latest), + ubound_unicode_version=unicode_version) + + # parse and join the final emoji release 12.0 of the earlier "type" + table[unicode_version].values.update( + parse_vs16_data(fname=UnicodeDataFile.LegacyEmojiVariationSequences(), + ubound_unicode_version=unicode_version).values) + + # perform culling on any values that are already understood as 'wide' + # without the variation-16 selector + wide_table = wide_tables[unicode_version].as_value_ranges() + table[unicode_version].values = { + ucs for ucs in table[unicode_version].values + if not _bisearch(ucs, wide_table) + } + + return UnicodeTableRenderCtx('VS16_NARROW_TO_WIDE', table) + +def parse_vs16_data(fname: str, ubound_unicode_version: UnicodeVersion): + with open(fname, encoding='utf-8') as fin: + table_iter = parse_vs16_table(fin) + # pull "date string" + date = next(table_iter).comment.split(':', 1)[1].strip() + # pull values only matching this unicode version and lower + values = {entry.code_range[0] for entry in table_iter} + return TableDef(ubound_unicode_version, date, values) + + def cite_source_description(filename: str) -> tuple[str, str]: """Return unicode.org source data file's own description as citation.""" with open(filename, encoding='utf-8') as f: entry_iter = parse_unicode_table(f) fname = next(entry_iter).comment.strip() + # use local name w/version in place of 'emoji-variation-sequences.txt' + if fname == 'emoji-variation-sequences.txt': + fname = os.path.basename(filename) date = next(entry_iter).comment.strip() return fname, date @@ -401,10 +480,31 @@ def parse_unicode_table(file: Iterable[str]) -> Iterator[TableEntry]: yield TableEntry(code_range, tuple(properties), comment) +def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]: + """ + Parse emoji-variation-sequences.txt for codepoints that preceed 0xFE0F + """ + hex_str_vs16 = 'FE0F' + for line in fp: + data, _, comment = line.partition('#') + data_fields: Iterator[str] = (field.strip() for field in data.split(';')) + code_points_str, *properties = data_fields + + if not code_points_str: + if 'Date' in comment: + # yield 'Data' + yield TableEntry(None, tuple(properties), comment) + continue + code_points = code_points_str.split() + if len(code_points) == 2 and code_points[1] == hex_str_vs16: + # yeild a single "code range" entry for a single value that preceeds FE0F + yield TableEntry((int(code_points[0], 16), int(code_points[0], 16)), tuple(properties), comment) + [email protected] def parse_category(fname: str, category_codes: Container[str], wide: int) -> TableDef: """Parse value ranges of unicode data files, by given categories into string tables.""" - print(f'parsing {fname}: ', end='', flush=True) + print(f'parsing {fname} category_codes={",".join(category_codes)}: ', end='', flush=True) with open(fname, encoding='utf-8') as f: table_iter = parse_unicode_table(f) @@ -417,55 +517,131 @@ def parse_category(fname: str, category_codes: Container[str], wide: int) -> Tab print('ok') return TableDef(version, date, values) +class UnicodeDataFile: + """ + Helper class for fetching Unicode Data Files + + Methods like 'DerivedAge' return a local filename, but have the side-effect of + fetching those files from unicode.org first, if not existing or out-of-date. [email protected] -def get_http_session() -> requests.Session: - session = requests.Session() - retries = urllib3.util.Retry(total=MAX_RETRIES, - backoff_factor=BACKOFF_FACTOR, - status_forcelist=[500, 502, 503, 504]) - session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) - return session - - -def is_url_newer(url: str, fname: str) -> bool: - if not os.path.exists(fname): - return True - if '--no-check-last-modified' not in sys.argv[1:]: - session = get_http_session() - resp = session.head(url, timeout=CONNECT_TIMEOUT) + Because file modification times are used, for local files of TestEmojiZWJSequences + and TestEmojiVariationSequences, these files should be forcefully re-fetched + CLI argument '--no-check-last-modified'. + """ + URL_DERIVED_AGE = 'https://www.unicode.org/Public/UCD/latest/ucd/DerivedAge.txt' + URL_EASTASIAN_WIDTH = 'https://www.unicode.org/Public/{version}/ucd/EastAsianWidth.txt' + URL_DERIVED_CATEGORY = 'https://www.unicode.org/Public/{version}/ucd/extracted/DerivedGeneralCategory.txt' + URL_EMOJI_VARIATION = 'https://unicode.org/Public/{version}/ucd/emoji/emoji-variation-sequences.txt' + URL_LEGACY_VARIATION = 'https://unicode.org/Public/emoji/{version}/emoji-variation-sequences.txt' + URL_EMOJI_ZWJ = 'https://unicode.org/Public/emoji/{version}/emoji-zwj-sequences.txt' + + @classmethod + def DerivedAge(cls) -> str: + fname = os.path.join(PATH_DATA, 'DerivedAge.txt') + cls.do_retrieve(url=cls.URL_DERIVED_AGE, fname=fname) + return fname + + @classmethod + def EastAsianWidth(cls, version: str) -> str: + fname = os.path.join(PATH_DATA, f'EastAsianWidth-{version}.txt') + cls.do_retrieve(url=cls.URL_EASTASIAN_WIDTH.format(version=version), fname=fname) + return fname + + @classmethod + def DerivedGeneralCategory(cls, version: str) -> str: + fname = os.path.join(PATH_DATA, f'DerivedGeneralCategory-{version}.txt') + cls.do_retrieve(url=cls.URL_DERIVED_CATEGORY.format(version=version), fname=fname) + return fname + + @classmethod + def EmojiVariationSequences(cls, version: str) -> str: + fname = os.path.join(PATH_DATA, f'emoji-variation-sequences-{version}.txt') + cls.do_retrieve(url=cls.URL_EMOJI_VARIATION.format(version=version), fname=fname) + return fname + + @classmethod + def LegacyEmojiVariationSequences(cls) -> str: + version = "12.0" + fname = os.path.join(PATH_DATA, f'emoji-variation-sequences-{version}.0.txt') + cls.do_retrieve(url=cls.URL_LEGACY_VARIATION.format(version=version), fname=fname) + return fname + + @classmethod + def TestEmojiVariationSequences(cls) -> str: + version = fetch_unicode_versions()[-1] + fname = os.path.join(PATH_TESTS, f'emoji-variation-sequences.txt') + cls.do_retrieve(url=cls.URL_EMOJI_VARIATION.format(version=version), fname=fname) + return fname + + @classmethod + def TestEmojiZWJSequences(cls) -> str: + version = fetch_unicode_versions()[-1] + fname = os.path.join(PATH_TESTS, f'emoji-zwj-sequences.txt') + cls.do_retrieve(url=cls.URL_EMOJI_ZWJ.format(version=f"{version.major}.{version.minor}"), fname=fname) + return fname + + @staticmethod + def do_retrieve(url: str, fname: str) -> None: + """Retrieve given url to target filepath fname.""" + folder = os.path.dirname(fname) + if folder and not os.path.exists(folder): + os.makedirs(folder) + if not UnicodeDataFile.is_url_newer(url, fname): + return + session = UnicodeDataFile.get_http_session() + resp = session.get(url, timeout=CONNECT_TIMEOUT) resp.raise_for_status() - remote_url_dt = dateutil.parser.parse(resp.headers['Last-Modified']).astimezone() - local_file_dt = datetime.datetime.fromtimestamp(os.path.getmtime(fname)).astimezone() - return remote_url_dt > local_file_dt - return False - - -def do_retrieve(url: str, fname: str) -> None: - """Retrieve given url to target filepath fname.""" - folder = os.path.dirname(fname) - if folder and not os.path.exists(folder): - os.makedirs(folder) - if not is_url_newer(url, fname): - return - session = get_http_session() - resp = session.get(url, timeout=CONNECT_TIMEOUT) - resp.raise_for_status() - print(f"saving {fname}: ", end='', flush=True) - with open(fname, 'wb') as fout: - for chunk in resp.iter_content(FETCH_BLOCKSIZE): - fout.write(chunk) - print('ok') + print(f"saving {fname}: ", end='', flush=True) + with open(fname, 'wb') as fout: + for chunk in resp.iter_content(FETCH_BLOCKSIZE): + fout.write(chunk) + print('ok') + + @staticmethod + def is_url_newer(url: str, fname: str) -> bool: + if not os.path.exists(fname): + return True + if '--no-check-last-modified' not in sys.argv[1:]: + session = UnicodeDataFile.get_http_session() + resp = session.head(url, timeout=CONNECT_TIMEOUT) + resp.raise_for_status() + remote_url_dt = dateutil.parser.parse(resp.headers['Last-Modified']).astimezone() + local_file_dt = datetime.datetime.fromtimestamp(os.path.getmtime(fname)).astimezone() + return remote_url_dt > local_file_dt + return False + + @functools.cache + def get_http_session() -> requests.Session: + session = requests.Session() + retries = urllib3.util.Retry(total=MAX_RETRIES, + backoff_factor=BACKOFF_FACTOR, + status_forcelist=[500, 502, 503, 504]) + session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) + return session + + @staticmethod + def filenames() -> list[str]: + """ + Return list of UnicodeData files stored in PATH_DATA, sorted by version number. + """ + pattern = re.compile( + r'^(emoji-variation-sequences|DerivedGeneralCategory|EastAsianWidth)-(\d+)\.(\d+)\.(\d+).txt$') + filename_matches = [] + for fname in os.listdir(PATH_DATA): + if match := re.search(pattern, fname): + filename_matches.append(match) + filename_matches.sort(key=lambda m: ( + m.group(1), + int(m.group(2)), + int(m.group(3)), + int(m.group(4)), + )) + return [os.path.join(PATH_DATA, match.string) for match in filename_matches] + def main() -> None: """Update east-asian, combining and zero width tables.""" - if "--debug" in sys.argv[1:]: - loglevel = logging.DEBUG - else: - loglevel = logging.WARNING - logging.basicConfig(stream=sys.stderr, level=loglevel) - # This defines which jinja source templates map to which output filenames, # and what function defines the source data. We hope to add more source # language options using jinja2 templates, with minimal modification of the @@ -474,6 +650,7 @@ def main() -> None: yield UnicodeVersionPyRenderDef.new( UnicodeVersionPyRenderCtx(fetch_unicode_versions()) ) + yield UnicodeTableRenderDef.new('table_vs16.py', fetch_table_vs16_data()) yield UnicodeTableRenderDef.new('table_wide.py', fetch_table_wide_data()) yield UnicodeTableRenderDef.new('table_zero.py', fetch_table_zero_data()) yield UnicodeVersionRstRenderDef.new(fetch_source_headers()) @@ -485,7 +662,9 @@ def main() -> None: fout.write(data) print('ok') + # fetch latest test data files + UnicodeDataFile.TestEmojiVariationSequences() + UnicodeDataFile.TestEmojiZWJSequences() if __name__ == '__main__': - main() - + main() \ No newline at end of file diff --git c/docs/intro.rst i/docs/intro.rst index b73d670..b5d790d 100644 --- c/docs/intro.rst +++ i/docs/intro.rst @@ -216,6 +216,10 @@ Other Languages ======= History ======= +0.2.10 *2023-11-08* + * **Bugfix** account for Wide characters in wcswidth + when combined with U+FE0F Variation Selector 16 (`PR #XX`) + 0.2.9 *2023-10-30* * **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese, Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_). diff --git c/docs/specs.rst i/docs/specs.rst index a76101d..5d61ddb 100644 --- c/docs/specs.rst +++ i/docs/specs.rst @@ -52,3 +52,6 @@ Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``). Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is present in comment of unicode data file, aprox. 3 characters. + +Any character in sequence with U+FE0F (Variation Selector 16) defined by +Emoji Variation Sequences txt as ``emoji style``. \ No newline at end of file diff --git c/docs/unicode_version.rst i/docs/unicode_version.rst index 549d9a9..76737f9 100644 --- c/docs/unicode_version.rst +++ i/docs/unicode_version.rst @@ -121,3 +121,9 @@ release files: ``EastAsianWidth-15.1.0.txt`` *Date: 2023-07-28, 23:34:08 GMT* +``emoji-variation-sequences-12.0.0.txt`` + *Date: 2019-01-15, 12:10:05 GMT* + +``emoji-variation-sequences-15.1.0.txt`` + *Date: 2023-02-01, 02:22:54 GMT* + diff --git c/setup.py i/setup.py index b533784..ad4d72c 100755 --- c/setup.py +++ i/setup.py @@ -44,7 +44,7 @@ def main(): setuptools.setup( name='wcwidth', # NOTE: manually manage __version__ in wcwidth/__init__.py ! - version='0.2.9', + version='0.2.10', description=( "Measures the displayed width of unicode strings in a terminal"), long_description=codecs.open( diff --git c/tests/emoji-variation-sequences.txt i/tests/emoji-variation-sequences.txt new file mode 100644 index 0000000..d8a3c9f --- /dev/null +++ i/tests/emoji-variation-sequences.txt @@ -0,0 +1,757 @@ +# emoji-variation-sequences.txt +# Date: 2023-02-01, 02:22:54 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Emoji Variation Sequences for UTS #51 +# Used with Emoji Version 15.1 and subsequent minor revisions (if any) +# +# For documentation and usage, see https://www.unicode.org/reports/tr51 +# +0023 FE0E ; text style; # (1.1) NUMBER SIGN +0023 FE0F ; emoji style; # (1.1) NUMBER SIGN +002A FE0E ; text style; # (1.1) ASTERISK +002A FE0F ; emoji style; # (1.1) ASTERISK +0030 FE0E ; text style; # (1.1) DIGIT ZERO +0030 FE0F ; emoji style; # (1.1) DIGIT ZERO +0031 FE0E ; text style; # (1.1) DIGIT ONE +0031 FE0F ; emoji style; # (1.1) DIGIT ONE +0032 FE0E ; text style; # (1.1) DIGIT TWO +0032 FE0F ; emoji style; # (1.1) DIGIT TWO +0033 FE0E ; text style; # (1.1) DIGIT THREE +0033 FE0F ; emoji style; # (1.1) DIGIT THREE +0034 FE0E ; text style; # (1.1) DIGIT FOUR +0034 FE0F ; emoji style; # (1.1) DIGIT FOUR +0035 FE0E ; text style; # (1.1) DIGIT FIVE +0035 FE0F ; emoji style; # (1.1) DIGIT FIVE +0036 FE0E ; text style; # (1.1) DIGIT SIX +0036 FE0F ; emoji style; # (1.1) DIGIT SIX +0037 FE0E ; text style; # (1.1) DIGIT SEVEN +0037 FE0F ; emoji style; # (1.1) DIGIT SEVEN +0038 FE0E ; text style; # (1.1) DIGIT EIGHT +0038 FE0F ; emoji style; # (1.1) DIGIT EIGHT +0039 FE0E ; text style; # (1.1) DIGIT NINE +0039 FE0F ; emoji style; # (1.1) DIGIT NINE +00A9 FE0E ; text style; # (1.1) COPYRIGHT SIGN +00A9 FE0F ; emoji style; # (1.1) COPYRIGHT SIGN +00AE FE0E ; text style; # (1.1) REGISTERED SIGN +00AE FE0F ; emoji style; # (1.1) REGISTERED SIGN +203C FE0E ; text style; # (1.1) DOUBLE EXCLAMATION MARK +203C FE0F ; emoji style; # (1.1) DOUBLE EXCLAMATION MARK +2049 FE0E ; text style; # (3.0) EXCLAMATION QUESTION MARK +2049 FE0F ; emoji style; # (3.0) EXCLAMATION QUESTION MARK +2122 FE0E ; text style; # (1.1) TRADE MARK SIGN +2122 FE0F ; emoji style; # (1.1) TRADE MARK SIGN +2139 FE0E ; text style; # (3.0) INFORMATION SOURCE +2139 FE0F ; emoji style; # (3.0) INFORMATION SOURCE +2194 FE0E ; text style; # (1.1) LEFT RIGHT ARROW +2194 FE0F ; emoji style; # (1.1) LEFT RIGHT ARROW +2195 FE0E ; text style; # (1.1) UP DOWN ARROW +2195 FE0F ; emoji style; # (1.1) UP DOWN ARROW +2196 FE0E ; text style; # (1.1) NORTH WEST ARROW +2196 FE0F ; emoji style; # (1.1) NORTH WEST ARROW +2197 FE0E ; text style; # (1.1) NORTH EAST ARROW +2197 FE0F ; emoji style; # (1.1) NORTH EAST ARROW +2198 FE0E ; text style; # (1.1) SOUTH EAST ARROW +2198 FE0F ; emoji style; # (1.1) SOUTH EAST ARROW +2199 FE0E ; text style; # (1.1) SOUTH WEST ARROW +2199 FE0F ; emoji style; # (1.1) SOUTH WEST ARROW +21A9 FE0E ; text style; # (1.1) LEFTWARDS ARROW WITH HOOK +21A9 FE0F ; emoji style; # (1.1) LEFTWARDS ARROW WITH HOOK +21AA FE0E ; text style; # (1.1) RIGHTWARDS ARROW WITH HOOK +21AA FE0F ; emoji style; # (1.1) RIGHTWARDS ARROW WITH HOOK +231A FE0E ; text style; # (1.1) WATCH +231A FE0F ; emoji style; # (1.1) WATCH +231B FE0E ; text style; # (1.1) HOURGLASS +231B FE0F ; emoji style; # (1.1) HOURGLASS +2328 FE0E ; text style; # (1.1) KEYBOARD +2328 FE0F ; emoji style; # (1.1) KEYBOARD +23CF FE0E ; text style; # (4.0) EJECT SYMBOL +23CF FE0F ; emoji style; # (4.0) EJECT SYMBOL +23E9 FE0E ; text style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE +23E9 FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE +23EA FE0E ; text style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE +23EA FE0F ; emoji style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE +23EB FE0E ; text style; # (6.0) BLACK UP-POINTING DOUBLE TRIANGLE +23EB FE0F ; emoji style; # (6.0) BLACK UP-POINTING DOUBLE TRIANGLE +23EC FE0E ; text style; # (6.0) BLACK DOWN-POINTING DOUBLE TRIANGLE +23EC FE0F ; emoji style; # (6.0) BLACK DOWN-POINTING DOUBLE TRIANGLE +23ED FE0E ; text style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23ED FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EE FE0E ; text style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EE FE0F ; emoji style; # (6.0) BLACK LEFT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR +23EF FE0E ; text style; # (6.0) BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23EF FE0F ; emoji style; # (6.0) BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23F0 FE0E ; text style; # (6.0) ALARM CLOCK +23F0 FE0F ; emoji style; # (6.0) ALARM CLOCK +23F1 FE0E ; text style; # (6.0) STOPWATCH +23F1 FE0F ; emoji style; # (6.0) STOPWATCH +23F2 FE0E ; text style; # (6.0) TIMER CLOCK +23F2 FE0F ; emoji style; # (6.0) TIMER CLOCK +23F3 FE0E ; text style; # (6.0) HOURGLASS WITH FLOWING SAND +23F3 FE0F ; emoji style; # (6.0) HOURGLASS WITH FLOWING SAND +23F8 FE0E ; text style; # (7.0) DOUBLE VERTICAL BAR +23F8 FE0F ; emoji style; # (7.0) DOUBLE VERTICAL BAR +23F9 FE0E ; text style; # (7.0) BLACK SQUARE FOR STOP +23F9 FE0F ; emoji style; # (7.0) BLACK SQUARE FOR STOP +23FA FE0E ; text style; # (7.0) BLACK CIRCLE FOR RECORD +23FA FE0F ; emoji style; # (7.0) BLACK CIRCLE FOR RECORD +24C2 FE0E ; text style; # (1.1) CIRCLED LATIN CAPITAL LETTER M +24C2 FE0F ; emoji style; # (1.1) CIRCLED LATIN CAPITAL LETTER M +25AA FE0E ; text style; # (1.1) BLACK SMALL SQUARE +25AA FE0F ; emoji style; # (1.1) BLACK SMALL SQUARE +25AB FE0E ; text style; # (1.1) WHITE SMALL SQUARE +25AB FE0F ; emoji style; # (1.1) WHITE SMALL SQUARE +25B6 FE0E ; text style; # (1.1) BLACK RIGHT-POINTING TRIANGLE +25B6 FE0F ; emoji style; # (1.1) BLACK RIGHT-POINTING TRIANGLE +25C0 FE0E ; text style; # (1.1) BLACK LEFT-POINTING TRIANGLE +25C0 FE0F ; emoji style; # (1.1) BLACK LEFT-POINTING TRIANGLE +25FB FE0E ; text style; # (3.2) WHITE MEDIUM SQUARE +25FB FE0F ; emoji style; # (3.2) WHITE MEDIUM SQUARE +25FC FE0E ; text style; # (3.2) BLACK MEDIUM SQUARE +25FC FE0F ; emoji style; # (3.2) BLACK MEDIUM SQUARE +25FD FE0E ; text style; # (3.2) WHITE MEDIUM SMALL SQUARE +25FD FE0F ; emoji style; # (3.2) WHITE MEDIUM SMALL SQUARE +25FE FE0E ; text style; # (3.2) BLACK MEDIUM SMALL SQUARE +25FE FE0F ; emoji style; # (3.2) BLACK MEDIUM SMALL SQUARE +2600 FE0E ; text style; # (1.1) BLACK SUN WITH RAYS +2600 FE0F ; emoji style; # (1.1) BLACK SUN WITH RAYS +2601 FE0E ; text style; # (1.1) CLOUD +2601 FE0F ; emoji style; # (1.1) CLOUD +2602 FE0E ; text style; # (1.1) UMBRELLA +2602 FE0F ; emoji style; # (1.1) UMBRELLA +2603 FE0E ; text style; # (1.1) SNOWMAN +2603 FE0F ; emoji style; # (1.1) SNOWMAN +2604 FE0E ; text style; # (1.1) COMET +2604 FE0F ; emoji style; # (1.1) COMET +260E FE0E ; text style; # (1.1) BLACK TELEPHONE +260E FE0F ; emoji style; # (1.1) BLACK TELEPHONE +2611 FE0E ; text style; # (1.1) BALLOT BOX WITH CHECK +2611 FE0F ; emoji style; # (1.1) BALLOT BOX WITH CHECK +2614 FE0E ; text style; # (4.0) UMBRELLA WITH RAIN DROPS +2614 FE0F ; emoji style; # (4.0) UMBRELLA WITH RAIN DROPS +2615 FE0E ; text style; # (4.0) HOT BEVERAGE +2615 FE0F ; emoji style; # (4.0) HOT BEVERAGE +2618 FE0E ; text style; # (4.1) SHAMROCK +2618 FE0F ; emoji style; # (4.1) SHAMROCK +261D FE0E ; text style; # (1.1) WHITE UP POINTING INDEX +261D FE0F ; emoji style; # (1.1) WHITE UP POINTING INDEX +2620 FE0E ; text style; # (1.1) SKULL AND CROSSBONES +2620 FE0F ; emoji style; # (1.1) SKULL AND CROSSBONES +2622 FE0E ; text style; # (1.1) RADIOACTIVE SIGN +2622 FE0F ; emoji style; # (1.1) RADIOACTIVE SIGN +2623 FE0E ; text style; # (1.1) BIOHAZARD SIGN +2623 FE0F ; emoji style; # (1.1) BIOHAZARD SIGN +2626 FE0E ; text style; # (1.1) ORTHODOX CROSS +2626 FE0F ; emoji style; # (1.1) ORTHODOX CROSS +262A FE0E ; text style; # (1.1) STAR AND CRESCENT +262A FE0F ; emoji style; # (1.1) STAR AND CRESCENT +262E FE0E ; text style; # (1.1) PEACE SYMBOL +262E FE0F ; emoji style; # (1.1) PEACE SYMBOL +262F FE0E ; text style; # (1.1) YIN YANG +262F FE0F ; emoji style; # (1.1) YIN YANG +2638 FE0E ; text style; # (1.1) WHEEL OF DHARMA +2638 FE0F ; emoji style; # (1.1) WHEEL OF DHARMA +2639 FE0E ; text style; # (1.1) WHITE FROWNING FACE +2639 FE0F ; emoji style; # (1.1) WHITE FROWNING FACE +263A FE0E ; text style; # (1.1) WHITE SMILING FACE +263A FE0F ; emoji style; # (1.1) WHITE SMILING FACE +2640 FE0E ; text style; # (1.1) FEMALE SIGN +2640 FE0F ; emoji style; # (1.1) FEMALE SIGN +2642 FE0E ; text style; # (1.1) MALE SIGN +2642 FE0F ; emoji style; # (1.1) MALE SIGN +2648 FE0E ; text style; # (1.1) ARIES +2648 FE0F ; emoji style; # (1.1) ARIES +2649 FE0E ; text style; # (1.1) TAURUS +2649 FE0F ; emoji style; # (1.1) TAURUS +264A FE0E ; text style; # (1.1) GEMINI +264A FE0F ; emoji style; # (1.1) GEMINI +264B FE0E ; text style; # (1.1) CANCER +264B FE0F ; emoji style; # (1.1) CANCER +264C FE0E ; text style; # (1.1) LEO +264C FE0F ; emoji style; # (1.1) LEO +264D FE0E ; text style; # (1.1) VIRGO +264D FE0F ; emoji style; # (1.1) VIRGO +264E FE0E ; text style; # (1.1) LIBRA +264E FE0F ; emoji style; # (1.1) LIBRA +264F FE0E ; text style; # (1.1) SCORPIUS +264F FE0F ; emoji style; # (1.1) SCORPIUS +2650 FE0E ; text style; # (1.1) SAGITTARIUS +2650 FE0F ; emoji style; # (1.1) SAGITTARIUS +2651 FE0E ; text style; # (1.1) CAPRICORN +2651 FE0F ; emoji style; # (1.1) CAPRICORN +2652 FE0E ; text style; # (1.1) AQUARIUS +2652 FE0F ; emoji style; # (1.1) AQUARIUS +2653 FE0E ; text style; # (1.1) PISCES +2653 FE0F ; emoji style; # (1.1) PISCES +265F FE0E ; text style; # (1.1) BLACK CHESS PAWN +265F FE0F ; emoji style; # (1.1) BLACK CHESS PAWN +2660 FE0E ; text style; # (1.1) BLACK SPADE SUIT +2660 FE0F ; emoji style; # (1.1) BLACK SPADE SUIT +2663 FE0E ; text style; # (1.1) BLACK CLUB SUIT +2663 FE0F ; emoji style; # (1.1) BLACK CLUB SUIT +2665 FE0E ; text style; # (1.1) BLACK HEART SUIT +2665 FE0F ; emoji style; # (1.1) BLACK HEART SUIT +2666 FE0E ; text style; # (1.1) BLACK DIAMOND SUIT +2666 FE0F ; emoji style; # (1.1) BLACK DIAMOND SUIT +2668 FE0E ; text style; # (1.1) HOT SPRINGS +2668 FE0F ; emoji style; # (1.1) HOT SPRINGS +267B FE0E ; text style; # (3.2) BLACK UNIVERSAL RECYCLING SYMBOL +267B FE0F ; emoji style; # (3.2) BLACK UNIVERSAL RECYCLING SYMBOL +267E FE0E ; text style; # (4.1) PERMANENT PAPER SIGN +267E FE0F ; emoji style; # (4.1) PERMANENT PAPER SIGN +267F FE0E ; text style; # (4.1) WHEELCHAIR SYMBOL +267F FE0F ; emoji style; # (4.1) WHEELCHAIR SYMBOL +2692 FE0E ; text style; # (4.1) HAMMER AND PICK +2692 FE0F ; emoji style; # (4.1) HAMMER AND PICK +2693 FE0E ; text style; # (4.1) ANCHOR +2693 FE0F ; emoji style; # (4.1) ANCHOR +2694 FE0E ; text style; # (4.1) CROSSED SWORDS +2694 FE0F ; emoji style; # (4.1) CROSSED SWORDS +2695 FE0E ; text style; # (4.1) STAFF OF AESCULAPIUS +2695 FE0F ; emoji style; # (4.1) STAFF OF AESCULAPIUS +2696 FE0E ; text style; # (4.1) SCALES +2696 FE0F ; emoji style; # (4.1) SCALES +2697 FE0E ; text style; # (4.1) ALEMBIC +2697 FE0F ; emoji style; # (4.1) ALEMBIC +2699 FE0E ; text style; # (4.1) GEAR +2699 FE0F ; emoji style; # (4.1) GEAR +269B FE0E ; text style; # (4.1) ATOM SYMBOL +269B FE0F ; emoji style; # (4.1) ATOM SYMBOL +269C FE0E ; text style; # (4.1) FLEUR-DE-LIS +269C FE0F ; emoji style; # (4.1) FLEUR-DE-LIS +26A0 FE0E ; text style; # (4.0) WARNING SIGN +26A0 FE0F ; emoji style; # (4.0) WARNING SIGN +26A1 FE0E ; text style; # (4.0) HIGH VOLTAGE SIGN +26A1 FE0F ; emoji style; # (4.0) HIGH VOLTAGE SIGN +26A7 FE0E ; text style; # (4.1) MALE WITH STROKE AND MALE AND FEMALE SIGN +26A7 FE0F ; emoji style; # (4.1) MALE WITH STROKE AND MALE AND FEMALE SIGN +26AA FE0E ; text style; # (4.1) MEDIUM WHITE CIRCLE +26AA FE0F ; emoji style; # (4.1) MEDIUM WHITE CIRCLE +26AB FE0E ; text style; # (4.1) MEDIUM BLACK CIRCLE +26AB FE0F ; emoji style; # (4.1) MEDIUM BLACK CIRCLE +26B0 FE0E ; text style; # (4.1) COFFIN +26B0 FE0F ; emoji style; # (4.1) COFFIN +26B1 FE0E ; text style; # (4.1) FUNERAL URN +26B1 FE0F ; emoji style; # (4.1) FUNERAL URN +26BD FE0E ; text style; # (5.2) SOCCER BALL +26BD FE0F ; emoji style; # (5.2) SOCCER BALL +26BE FE0E ; text style; # (5.2) BASEBALL +26BE FE0F ; emoji style; # (5.2) BASEBALL +26C4 FE0E ; text style; # (5.2) SNOWMAN WITHOUT SNOW +26C4 FE0F ; emoji style; # (5.2) SNOWMAN WITHOUT SNOW +26C5 FE0E ; text style; # (5.2) SUN BEHIND CLOUD +26C5 FE0F ; emoji style; # (5.2) SUN BEHIND CLOUD +26C8 FE0E ; text style; # (5.2) THUNDER CLOUD AND RAIN +26C8 FE0F ; emoji style; # (5.2) THUNDER CLOUD AND RAIN +26CE FE0E ; text style; # (6.0) OPHIUCHUS +26CE FE0F ; emoji style; # (6.0) OPHIUCHUS +26CF FE0E ; text style; # (5.2) PICK +26CF FE0F ; emoji style; # (5.2) PICK +26D1 FE0E ; text style; # (5.2) HELMET WITH WHITE CROSS +26D1 FE0F ; emoji style; # (5.2) HELMET WITH WHITE CROSS +26D3 FE0E ; text style; # (5.2) CHAINS +26D3 FE0F ; emoji style; # (5.2) CHAINS +26D4 FE0E ; text style; # (5.2) NO ENTRY +26D4 FE0F ; emoji style; # (5.2) NO ENTRY +26E9 FE0E ; text style; # (5.2) SHINTO SHRINE +26E9 FE0F ; emoji style; # (5.2) SHINTO SHRINE +26EA FE0E ; text style; # (5.2) CHURCH +26EA FE0F ; emoji style; # (5.2) CHURCH +26F0 FE0E ; text style; # (5.2) MOUNTAIN +26F0 FE0F ; emoji style; # (5.2) MOUNTAIN +26F1 FE0E ; text style; # (5.2) UMBRELLA ON GROUND +26F1 FE0F ; emoji style; # (5.2) UMBRELLA ON GROUND +26F2 FE0E ; text style; # (5.2) FOUNTAIN +26F2 FE0F ; emoji style; # (5.2) FOUNTAIN +26F3 FE0E ; text style; # (5.2) FLAG IN HOLE +26F3 FE0F ; emoji style; # (5.2) FLAG IN HOLE +26F4 FE0E ; text style; # (5.2) FERRY +26F4 FE0F ; emoji style; # (5.2) FERRY +26F5 FE0E ; text style; # (5.2) SAILBOAT +26F5 FE0F ; emoji style; # (5.2) SAILBOAT +26F7 FE0E ; text style; # (5.2) SKIER +26F7 FE0F ; emoji style; # (5.2) SKIER +26F8 FE0E ; text style; # (5.2) ICE SKATE +26F8 FE0F ; emoji style; # (5.2) ICE SKATE +26F9 FE0E ; text style; # (5.2) PERSON WITH BALL +26F9 FE0F ; emoji style; # (5.2) PERSON WITH BALL +26FA FE0E ; text style; # (5.2) TENT +26FA FE0F ; emoji style; # (5.2) TENT +26FD FE0E ; text style; # (5.2) FUEL PUMP +26FD FE0F ; emoji style; # (5.2) FUEL PUMP +2702 FE0E ; text style; # (1.1) BLACK SCISSORS +2702 FE0F ; emoji style; # (1.1) BLACK SCISSORS +2705 FE0E ; text style; # (6.0) WHITE HEAVY CHECK MARK +2705 FE0F ; emoji style; # (6.0) WHITE HEAVY CHECK MARK +2708 FE0E ; text style; # (1.1) AIRPLANE +2708 FE0F ; emoji style; # (1.1) AIRPLANE +2709 FE0E ; text style; # (1.1) ENVELOPE +2709 FE0F ; emoji style; # (1.1) ENVELOPE +270A FE0E ; text style; # (6.0) RAISED FIST +270A FE0F ; emoji style; # (6.0) RAISED FIST +270B FE0E ; text style; # (6.0) RAISED HAND +270B FE0F ; emoji style; # (6.0) RAISED HAND +270C FE0E ; text style; # (1.1) VICTORY HAND +270C FE0F ; emoji style; # (1.1) VICTORY HAND +270D FE0E ; text style; # (1.1) WRITING HAND +270D FE0F ; emoji style; # (1.1) WRITING HAND +270F FE0E ; text style; # (1.1) PENCIL +270F FE0F ; emoji style; # (1.1) PENCIL +2712 FE0E ; text style; # (1.1) BLACK NIB +2712 FE0F ; emoji style; # (1.1) BLACK NIB +2714 FE0E ; text style; # (1.1) HEAVY CHECK MARK +2714 FE0F ; emoji style; # (1.1) HEAVY CHECK MARK +2716 FE0E ; text style; # (1.1) HEAVY MULTIPLICATION X +2716 FE0F ; emoji style; # (1.1) HEAVY MULTIPLICATION X +271D FE0E ; text style; # (1.1) LATIN CROSS +271D FE0F ; emoji style; # (1.1) LATIN CROSS +2721 FE0E ; text style; # (1.1) STAR OF DAVID +2721 FE0F ; emoji style; # (1.1) STAR OF DAVID +2728 FE0E ; text style; # (6.0) SPARKLES +2728 FE0F ; emoji style; # (6.0) SPARKLES +2733 FE0E ; text style; # (1.1) EIGHT SPOKED ASTERISK +2733 FE0F ; emoji style; # (1.1) EIGHT SPOKED ASTERISK +2734 FE0E ; text style; # (1.1) EIGHT POINTED BLACK STAR +2734 FE0F ; emoji style; # (1.1) EIGHT POINTED BLACK STAR +2744 FE0E ; text style; # (1.1) SNOWFLAKE +2744 FE0F ; emoji style; # (1.1) SNOWFLAKE +2747 FE0E ; text style; # (1.1) SPARKLE +2747 FE0F ; emoji style; # (1.1) SPARKLE +274C FE0E ; text style; # (6.0) CROSS MARK +274C FE0F ; emoji style; # (6.0) CROSS MARK +274E FE0E ; text style; # (6.0) NEGATIVE SQUARED CROSS MARK +274E FE0F ; emoji style; # (6.0) NEGATIVE SQUARED CROSS MARK +2753 FE0E ; text style; # (6.0) BLACK QUESTION MARK ORNAMENT +2753 FE0F ; emoji style; # (6.0) BLACK QUESTION MARK ORNAMENT +2754 FE0E ; text style; # (6.0) WHITE QUESTION MARK ORNAMENT +2754 FE0F ; emoji style; # (6.0) WHITE QUESTION MARK ORNAMENT +2755 FE0E ; text style; # (6.0) WHITE EXCLAMATION MARK ORNAMENT +2755 FE0F ; emoji style; # (6.0) WHITE EXCLAMATION MARK ORNAMENT +2757 FE0E ; text style; # (5.2) HEAVY EXCLAMATION MARK SYMBOL +2757 FE0F ; emoji style; # (5.2) HEAVY EXCLAMATION MARK SYMBOL +2763 FE0E ; text style; # (1.1) HEAVY HEART EXCLAMATION MARK ORNAMENT +2763 FE0F ; emoji style; # (1.1) HEAVY HEART EXCLAMATION MARK ORNAMENT +2764 FE0E ; text style; # (1.1) HEAVY BLACK HEART +2764 FE0F ; emoji style; # (1.1) HEAVY BLACK HEART +2795 FE0E ; text style; # (6.0) HEAVY PLUS SIGN +2795 FE0F ; emoji style; # (6.0) HEAVY PLUS SIGN +2796 FE0E ; text style; # (6.0) HEAVY MINUS SIGN +2796 FE0F ; emoji style; # (6.0) HEAVY MINUS SIGN +2797 FE0E ; text style; # (6.0) HEAVY DIVISION SIGN +2797 FE0F ; emoji style; # (6.0) HEAVY DIVISION SIGN +27A1 FE0E ; text style; # (1.1) BLACK RIGHTWARDS ARROW +27A1 FE0F ; emoji style; # (1.1) BLACK RIGHTWARDS ARROW +27B0 FE0E ; text style; # (6.0) CURLY LOOP +27B0 FE0F ; emoji style; # (6.0) CURLY LOOP +27BF FE0E ; text style; # (6.0) DOUBLE CURLY LOOP +27BF FE0F ; emoji style; # (6.0) DOUBLE CURLY LOOP +2934 FE0E ; text style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2934 FE0F ; emoji style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2935 FE0E ; text style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2935 FE0F ; emoji style; # (3.2) ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2B05 FE0E ; text style; # (4.0) LEFTWARDS BLACK ARROW +2B05 FE0F ; emoji style; # (4.0) LEFTWARDS BLACK ARROW +2B06 FE0E ; text style; # (4.0) UPWARDS BLACK ARROW +2B06 FE0F ; emoji style; # (4.0) UPWARDS BLACK ARROW +2B07 FE0E ; text style; # (4.0) DOWNWARDS BLACK ARROW +2B07 FE0F ; emoji style; # (4.0) DOWNWARDS BLACK ARROW +2B1B FE0E ; text style; # (5.1) BLACK LARGE SQUARE +2B1B FE0F ; emoji style; # (5.1) BLACK LARGE SQUARE +2B1C FE0E ; text style; # (5.1) WHITE LARGE SQUARE +2B1C FE0F ; emoji style; # (5.1) WHITE LARGE SQUARE +2B50 FE0E ; text style; # (5.1) WHITE MEDIUM STAR +2B50 FE0F ; emoji style; # (5.1) WHITE MEDIUM STAR +2B55 FE0E ; text style; # (5.2) HEAVY LARGE CIRCLE +2B55 FE0F ; emoji style; # (5.2) HEAVY LARGE CIRCLE +3030 FE0E ; text style; # (1.1) WAVY DASH +3030 FE0F ; emoji style; # (1.1) WAVY DASH +303D FE0E ; text style; # (3.2) PART ALTERNATION MARK +303D FE0F ; emoji style; # (3.2) PART ALTERNATION MARK +3297 FE0E ; text style; # (1.1) CIRCLED IDEOGRAPH CONGRATULATION +3297 FE0F ; emoji style; # (1.1) CIRCLED IDEOGRAPH CONGRATULATION +3299 FE0E ; text style; # (1.1) CIRCLED IDEOGRAPH SECRET +3299 FE0F ; emoji style; # (1.1) CIRCLED IDEOGRAPH SECRET +1F004 FE0E ; text style; # (5.1) MAHJONG TILE RED DRAGON +1F004 FE0F ; emoji style; # (5.1) MAHJONG TILE RED DRAGON +1F170 FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER A +1F170 FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER A +1F171 FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER B +1F171 FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER B +1F17E FE0E ; text style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER O +1F17E FE0F ; emoji style; # (6.0) NEGATIVE SQUARED LATIN CAPITAL LETTER O +1F17F FE0E ; text style; # (5.2) NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F17F FE0F ; emoji style; # (5.2) NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F202 FE0E ; text style; # (6.0) SQUARED KATAKANA SA +1F202 FE0F ; emoji style; # (6.0) SQUARED KATAKANA SA +1F21A FE0E ; text style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F21A FE0F ; emoji style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F22F FE0E ; text style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F22F FE0F ; emoji style; # (5.2) SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F237 FE0E ; text style; # (6.0) SQUARED CJK UNIFIED IDEOGRAPH-6708 +1F237 FE0F ; emoji style; # (6.0) SQUARED CJK UNIFIED IDEOGRAPH-6708 +1F30D FE0E ; text style; # (6.0) EARTH GLOBE EUROPE-AFRICA +1F30D FE0F ; emoji style; # (6.0) EARTH GLOBE EUROPE-AFRICA +1F30E FE0E ; text style; # (6.0) EARTH GLOBE AMERICAS +1F30E FE0F ; emoji style; # (6.0) EARTH GLOBE AMERICAS +1F30F FE0E ; text style; # (6.0) EARTH GLOBE ASIA-AUSTRALIA +1F30F FE0F ; emoji style; # (6.0) EARTH GLOBE ASIA-AUSTRALIA +1F315 FE0E ; text style; # (6.0) FULL MOON SYMBOL +1F315 FE0F ; emoji style; # (6.0) FULL MOON SYMBOL +1F31C FE0E ; text style; # (6.0) LAST QUARTER MOON WITH FACE +1F31C FE0F ; emoji style; # (6.0) LAST QUARTER MOON WITH FACE +1F321 FE0E ; text style; # (7.0) THERMOMETER +1F321 FE0F ; emoji style; # (7.0) THERMOMETER +1F324 FE0E ; text style; # (7.0) WHITE SUN WITH SMALL CLOUD +1F324 FE0F ; emoji style; # (7.0) WHITE SUN WITH SMALL CLOUD +1F325 FE0E ; text style; # (7.0) WHITE SUN BEHIND CLOUD +1F325 FE0F ; emoji style; # (7.0) WHITE SUN BEHIND CLOUD +1F326 FE0E ; text style; # (7.0) WHITE SUN BEHIND CLOUD WITH RAIN +1F326 FE0F ; emoji style; # (7.0) WHITE SUN BEHIND CLOUD WITH RAIN +1F327 FE0E ; text style; # (7.0) CLOUD WITH RAIN +1F327 FE0F ; emoji style; # (7.0) CLOUD WITH RAIN +1F328 FE0E ; text style; # (7.0) CLOUD WITH SNOW +1F328 FE0F ; emoji style; # (7.0) CLOUD WITH SNOW +1F329 FE0E ; text style; # (7.0) CLOUD WITH LIGHTNING +1F329 FE0F ; emoji style; # (7.0) CLOUD WITH LIGHTNING +1F32A FE0E ; text style; # (7.0) CLOUD WITH TORNADO +1F32A FE0F ; emoji style; # (7.0) CLOUD WITH TORNADO +1F32B FE0E ; text style; # (7.0) FOG +1F32B FE0F ; emoji style; # (7.0) FOG +1F32C FE0E ; text style; # (7.0) WIND BLOWING FACE +1F32C FE0F ; emoji style; # (7.0) WIND BLOWING FACE +1F336 FE0E ; text style; # (7.0) HOT PEPPER +1F336 FE0F ; emoji style; # (7.0) HOT PEPPER +1F378 FE0E ; text style; # (6.0) COCKTAIL GLASS +1F378 FE0F ; emoji style; # (6.0) COCKTAIL GLASS +1F37D FE0E ; text style; # (7.0) FORK AND KNIFE WITH PLATE +1F37D FE0F ; emoji style; # (7.0) FORK AND KNIFE WITH PLATE +1F393 FE0E ; text style; # (6.0) GRADUATION CAP +1F393 FE0F ; emoji style; # (6.0) GRADUATION CAP +1F396 FE0E ; text style; # (7.0) MILITARY MEDAL +1F396 FE0F ; emoji style; # (7.0) MILITARY MEDAL +1F397 FE0E ; text style; # (7.0) REMINDER RIBBON +1F397 FE0F ; emoji style; # (7.0) REMINDER RIBBON +1F399 FE0E ; text style; # (7.0) STUDIO MICROPHONE +1F399 FE0F ; emoji style; # (7.0) STUDIO MICROPHONE +1F39A FE0E ; text style; # (7.0) LEVEL SLIDER +1F39A FE0F ; emoji style; # (7.0) LEVEL SLIDER +1F39B FE0E ; text style; # (7.0) CONTROL KNOBS +1F39B FE0F ; emoji style; # (7.0) CONTROL KNOBS +1F39E FE0E ; text style; # (7.0) FILM FRAMES +1F39E FE0F ; emoji style; # (7.0) FILM FRAMES +1F39F FE0E ; text style; # (7.0) ADMISSION TICKETS +1F39F FE0F ; emoji style; # (7.0) ADMISSION TICKETS +1F3A7 FE0E ; text style; # (6.0) HEADPHONE +1F3A7 FE0F ; emoji style; # (6.0) HEADPHONE +1F3AC FE0E ; text style; # (6.0) CLAPPER BOARD +1F3AC FE0F ; emoji style; # (6.0) CLAPPER BOARD +1F3AD FE0E ; text style; # (6.0) PERFORMING ARTS +1F3AD FE0F ; emoji style; # (6.0) PERFORMING ARTS +1F3AE FE0E ; text style; # (6.0) VIDEO GAME +1F3AE FE0F ; emoji style; # (6.0) VIDEO GAME +1F3C2 FE0E ; text style; # (6.0) SNOWBOARDER +1F3C2 FE0F ; emoji style; # (6.0) SNOWBOARDER +1F3C4 FE0E ; text style; # (6.0) SURFER +1F3C4 FE0F ; emoji style; # (6.0) SURFER +1F3C6 FE0E ; text style; # (6.0) TROPHY +1F3C6 FE0F ; emoji style; # (6.0) TROPHY +1F3CA FE0E ; text style; # (6.0) SWIMMER +1F3CA FE0F ; emoji style; # (6.0) SWIMMER +1F3CB FE0E ; text style; # (7.0) WEIGHT LIFTER +1F3CB FE0F ; emoji style; # (7.0) WEIGHT LIFTER +1F3CC FE0E ; text style; # (7.0) GOLFER +1F3CC FE0F ; emoji style; # (7.0) GOLFER +1F3CD FE0E ; text style; # (7.0) RACING MOTORCYCLE +1F3CD FE0F ; emoji style; # (7.0) RACING MOTORCYCLE +1F3CE FE0E ; text style; # (7.0) RACING CAR +1F3CE FE0F ; emoji style; # (7.0) RACING CAR +1F3D4 FE0E ; text style; # (7.0) SNOW CAPPED MOUNTAIN +1F3D4 FE0F ; emoji style; # (7.0) SNOW CAPPED MOUNTAIN +1F3D5 FE0E ; text style; # (7.0) CAMPING +1F3D5 FE0F ; emoji style; # (7.0) CAMPING +1F3D6 FE0E ; text style; # (7.0) BEACH WITH UMBRELLA +1F3D6 FE0F ; emoji style; # (7.0) BEACH WITH UMBRELLA +1F3D7 FE0E ; text style; # (7.0) BUILDING CONSTRUCTION +1F3D7 FE0F ; emoji style; # (7.0) BUILDING CONSTRUCTION +1F3D8 FE0E ; text style; # (7.0) HOUSE BUILDINGS +1F3D8 FE0F ; emoji style; # (7.0) HOUSE BUILDINGS +1F3D9 FE0E ; text style; # (7.0) CITYSCAPE +1F3D9 FE0F ; emoji style; # (7.0) CITYSCAPE +1F3DA FE0E ; text style; # (7.0) DERELICT HOUSE BUILDING +1F3DA FE0F ; emoji style; # (7.0) DERELICT HOUSE BUILDING +1F3DB FE0E ; text style; # (7.0) CLASSICAL BUILDING +1F3DB FE0F ; emoji style; # (7.0) CLASSICAL BUILDING +1F3DC FE0E ; text style; # (7.0) DESERT +1F3DC FE0F ; emoji style; # (7.0) DESERT +1F3DD FE0E ; text style; # (7.0) DESERT ISLAND +1F3DD FE0F ; emoji style; # (7.0) DESERT ISLAND +1F3DE FE0E ; text style; # (7.0) NATIONAL PARK +1F3DE FE0F ; emoji style; # (7.0) NATIONAL PARK +1F3DF FE0E ; text style; # (7.0) STADIUM +1F3DF FE0F ; emoji style; # (7.0) STADIUM +1F3E0 FE0E ; text style; # (6.0) HOUSE BUILDING +1F3E0 FE0F ; emoji style; # (6.0) HOUSE BUILDING +1F3ED FE0E ; text style; # (6.0) FACTORY +1F3ED FE0F ; emoji style; # (6.0) FACTORY +1F3F3 FE0E ; text style; # (7.0) WAVING WHITE FLAG +1F3F3 FE0F ; emoji style; # (7.0) WAVING WHITE FLAG +1F3F5 FE0E ; text style; # (7.0) ROSETTE +1F3F5 FE0F ; emoji style; # (7.0) ROSETTE +1F3F7 FE0E ; text style; # (7.0) LABEL +1F3F7 FE0F ; emoji style; # (7.0) LABEL +1F408 FE0E ; text style; # (6.0) CAT +1F408 FE0F ; emoji style; # (6.0) CAT +1F415 FE0E ; text style; # (6.0) DOG +1F415 FE0F ; emoji style; # (6.0) DOG +1F41F FE0E ; text style; # (6.0) FISH +1F41F FE0F ; emoji style; # (6.0) FISH +1F426 FE0E ; text style; # (6.0) BIRD +1F426 FE0F ; emoji style; # (6.0) BIRD +1F43F FE0E ; text style; # (7.0) CHIPMUNK +1F43F FE0F ; emoji style; # (7.0) CHIPMUNK +1F441 FE0E ; text style; # (7.0) EYE +1F441 FE0F ; emoji style; # (7.0) EYE +1F442 FE0E ; text style; # (6.0) EAR +1F442 FE0F ; emoji style; # (6.0) EAR +1F446 FE0E ; text style; # (6.0) WHITE UP POINTING BACKHAND INDEX +1F446 FE0F ; emoji style; # (6.0) WHITE UP POINTING BACKHAND INDEX +1F447 FE0E ; text style; # (6.0) WHITE DOWN POINTING BACKHAND INDEX +1F447 FE0F ; emoji style; # (6.0) WHITE DOWN POINTING BACKHAND INDEX +1F448 FE0E ; text style; # (6.0) WHITE LEFT POINTING BACKHAND INDEX +1F448 FE0F ; emoji style; # (6.0) WHITE LEFT POINTING BACKHAND INDEX +1F449 FE0E ; text style; # (6.0) WHITE RIGHT POINTING BACKHAND INDEX +1F449 FE0F ; emoji style; # (6.0) WHITE RIGHT POINTING BACKHAND INDEX +1F44D FE0E ; text style; # (6.0) THUMBS UP SIGN +1F44D FE0F ; emoji style; # (6.0) THUMBS UP SIGN +1F44E FE0E ; text style; # (6.0) THUMBS DOWN SIGN +1F44E FE0F ; emoji style; # (6.0) THUMBS DOWN SIGN +1F453 FE0E ; text style; # (6.0) EYEGLASSES +1F453 FE0F ; emoji style; # (6.0) EYEGLASSES +1F46A FE0E ; text style; # (6.0) FAMILY +1F46A FE0F ; emoji style; # (6.0) FAMILY +1F47D FE0E ; text style; # (6.0) EXTRATERRESTRIAL ALIEN +1F47D FE0F ; emoji style; # (6.0) EXTRATERRESTRIAL ALIEN +1F4A3 FE0E ; text style; # (6.0) BOMB +1F4A3 FE0F ; emoji style; # (6.0) BOMB +1F4B0 FE0E ; text style; # (6.0) MONEY BAG +1F4B0 FE0F ; emoji style; # (6.0) MONEY BAG +1F4B3 FE0E ; text style; # (6.0) CREDIT CARD +1F4B3 FE0F ; emoji style; # (6.0) CREDIT CARD +1F4BB FE0E ; text style; # (6.0) PERSONAL COMPUTER +1F4BB FE0F ; emoji style; # (6.0) PERSONAL COMPUTER +1F4BF FE0E ; text style; # (6.0) OPTICAL DISC +1F4BF FE0F ; emoji style; # (6.0) OPTICAL DISC +1F4CB FE0E ; text style; # (6.0) CLIPBOARD +1F4CB FE0F ; emoji style; # (6.0) CLIPBOARD +1F4DA FE0E ; text style; # (6.0) BOOKS +1F4DA FE0F ; emoji style; # (6.0) BOOKS +1F4DF FE0E ; text style; # (6.0) PAGER +1F4DF FE0F ; emoji style; # (6.0) PAGER +1F4E4 FE0E ; text style; # (6.0) OUTBOX TRAY +1F4E4 FE0F ; emoji style; # (6.0) OUTBOX TRAY +1F4E5 FE0E ; text style; # (6.0) INBOX TRAY +1F4E5 FE0F ; emoji style; # (6.0) INBOX TRAY +1F4E6 FE0E ; text style; # (6.0) PACKAGE +1F4E6 FE0F ; emoji style; # (6.0) PACKAGE +1F4EA FE0E ; text style; # (6.0) CLOSED MAILBOX WITH LOWERED FLAG +1F4EA FE0F ; emoji style; # (6.0) CLOSED MAILBOX WITH LOWERED FLAG +1F4EB FE0E ; text style; # (6.0) CLOSED MAILBOX WITH RAISED FLAG +1F4EB FE0F ; emoji style; # (6.0) CLOSED MAILBOX WITH RAISED FLAG +1F4EC FE0E ; text style; # (6.0) OPEN MAILBOX WITH RAISED FLAG +1F4EC FE0F ; emoji style; # (6.0) OPEN MAILBOX WITH RAISED FLAG +1F4ED FE0E ; text style; # (6.0) OPEN MAILBOX WITH LOWERED FLAG +1F4ED FE0F ; emoji style; # (6.0) OPEN MAILBOX WITH LOWERED FLAG +1F4F7 FE0E ; text style; # (6.0) CAMERA +1F4F7 FE0F ; emoji style; # (6.0) CAMERA +1F4F9 FE0E ; text style; # (6.0) VIDEO CAMERA +1F4F9 FE0F ; emoji style; # (6.0) VIDEO CAMERA +1F4FA FE0E ; text style; # (6.0) TELEVISION +1F4FA FE0F ; emoji style; # (6.0) TELEVISION +1F4FB FE0E ; text style; # (6.0) RADIO +1F4FB FE0F ; emoji style; # (6.0) RADIO +1F4FD FE0E ; text style; # (7.0) FILM PROJECTOR +1F4FD FE0F ; emoji style; # (7.0) FILM PROJECTOR +1F508 FE0E ; text style; # (6.0) SPEAKER +1F508 FE0F ; emoji style; # (6.0) SPEAKER +1F50D FE0E ; text style; # (6.0) LEFT-POINTING MAGNIFYING GLASS +1F50D FE0F ; emoji style; # (6.0) LEFT-POINTING MAGNIFYING GLASS +1F512 FE0E ; text style; # (6.0) LOCK +1F512 FE0F ; emoji style; # (6.0) LOCK +1F513 FE0E ; text style; # (6.0) OPEN LOCK +1F513 FE0F ; emoji style; # (6.0) OPEN LOCK +1F549 FE0E ; text style; # (7.0) OM SYMBOL +1F549 FE0F ; emoji style; # (7.0) OM SYMBOL +1F54A FE0E ; text style; # (7.0) DOVE OF PEACE +1F54A FE0F ; emoji style; # (7.0) DOVE OF PEACE +1F550 FE0E ; text style; # (6.0) CLOCK FACE ONE OCLOCK +1F550 FE0F ; emoji style; # (6.0) CLOCK FACE ONE OCLOCK +1F551 FE0E ; text style; # (6.0) CLOCK FACE TWO OCLOCK +1F551 FE0F ; emoji style; # (6.0) CLOCK FACE TWO OCLOCK +1F552 FE0E ; text style; # (6.0) CLOCK FACE THREE OCLOCK +1F552 FE0F ; emoji style; # (6.0) CLOCK FACE THREE OCLOCK +1F553 FE0E ; text style; # (6.0) CLOCK FACE FOUR OCLOCK +1F553 FE0F ; emoji style; # (6.0) CLOCK FACE FOUR OCLOCK +1F554 FE0E ; text style; # (6.0) CLOCK FACE FIVE OCLOCK +1F554 FE0F ; emoji style; # (6.0) CLOCK FACE FIVE OCLOCK +1F555 FE0E ; text style; # (6.0) CLOCK FACE SIX OCLOCK +1F555 FE0F ; emoji style; # (6.0) CLOCK FACE SIX OCLOCK +1F556 FE0E ; text style; # (6.0) CLOCK FACE SEVEN OCLOCK +1F556 FE0F ; emoji style; # (6.0) CLOCK FACE SEVEN OCLOCK +1F557 FE0E ; text style; # (6.0) CLOCK FACE EIGHT OCLOCK +1F557 FE0F ; emoji style; # (6.0) CLOCK FACE EIGHT OCLOCK +1F558 FE0E ; text style; # (6.0) CLOCK FACE NINE OCLOCK +1F558 FE0F ; emoji style; # (6.0) CLOCK FACE NINE OCLOCK +1F559 FE0E ; text style; # (6.0) CLOCK FACE TEN OCLOCK +1F559 FE0F ; emoji style; # (6.0) CLOCK FACE TEN OCLOCK +1F55A FE0E ; text style; # (6.0) CLOCK FACE ELEVEN OCLOCK +1F55A FE0F ; emoji style; # (6.0) CLOCK FACE ELEVEN OCLOCK +1F55B FE0E ; text style; # (6.0) CLOCK FACE TWELVE OCLOCK +1F55B FE0F ; emoji style; # (6.0) CLOCK FACE TWELVE OCLOCK +1F55C FE0E ; text style; # (6.0) CLOCK FACE ONE-THIRTY +1F55C FE0F ; emoji style; # (6.0) CLOCK FACE ONE-THIRTY +1F55D FE0E ; text style; # (6.0) CLOCK FACE TWO-THIRTY +1F55D FE0F ; emoji style; # (6.0) CLOCK FACE TWO-THIRTY +1F55E FE0E ; text style; # (6.0) CLOCK FACE THREE-THIRTY +1F55E FE0F ; emoji style; # (6.0) CLOCK FACE THREE-THIRTY +1F55F FE0E ; text style; # (6.0) CLOCK FACE FOUR-THIRTY +1F55F FE0F ; emoji style; # (6.0) CLOCK FACE FOUR-THIRTY +1F560 FE0E ; text style; # (6.0) CLOCK FACE FIVE-THIRTY +1F560 FE0F ; emoji style; # (6.0) CLOCK FACE FIVE-THIRTY +1F561 FE0E ; text style; # (6.0) CLOCK FACE SIX-THIRTY +1F561 FE0F ; emoji style; # (6.0) CLOCK FACE SIX-THIRTY +1F562 FE0E ; text style; # (6.0) CLOCK FACE SEVEN-THIRTY +1F562 FE0F ; emoji style; # (6.0) CLOCK FACE SEVEN-THIRTY +1F563 FE0E ; text style; # (6.0) CLOCK FACE EIGHT-THIRTY +1F563 FE0F ; emoji style; # (6.0) CLOCK FACE EIGHT-THIRTY +1F564 FE0E ; text style; # (6.0) CLOCK FACE NINE-THIRTY +1F564 FE0F ; emoji style; # (6.0) CLOCK FACE NINE-THIRTY +1F565 FE0E ; text style; # (6.0) CLOCK FACE TEN-THIRTY +1F565 FE0F ; emoji style; # (6.0) CLOCK FACE TEN-THIRTY +1F566 FE0E ; text style; # (6.0) CLOCK FACE ELEVEN-THIRTY +1F566 FE0F ; emoji style; # (6.0) CLOCK FACE ELEVEN-THIRTY +1F567 FE0E ; text style; # (6.0) CLOCK FACE TWELVE-THIRTY +1F567 FE0F ; emoji style; # (6.0) CLOCK FACE TWELVE-THIRTY +1F56F FE0E ; text style; # (7.0) CANDLE +1F56F FE0F ; emoji style; # (7.0) CANDLE +1F570 FE0E ; text style; # (7.0) MANTELPIECE CLOCK +1F570 FE0F ; emoji style; # (7.0) MANTELPIECE CLOCK +1F573 FE0E ; text style; # (7.0) HOLE +1F573 FE0F ; emoji style; # (7.0) HOLE +1F574 FE0E ; text style; # (7.0) MAN IN BUSINESS SUIT LEVITATING +1F574 FE0F ; emoji style; # (7.0) MAN IN BUSINESS SUIT LEVITATING +1F575 FE0E ; text style; # (7.0) SLEUTH OR SPY +1F575 FE0F ; emoji style; # (7.0) SLEUTH OR SPY +1F576 FE0E ; text style; # (7.0) DARK SUNGLASSES +1F576 FE0F ; emoji style; # (7.0) DARK SUNGLASSES +1F577 FE0E ; text style; # (7.0) SPIDER +1F577 FE0F ; emoji style; # (7.0) SPIDER +1F578 FE0E ; text style; # (7.0) SPIDER WEB +1F578 FE0F ; emoji style; # (7.0) SPIDER WEB +1F579 FE0E ; text style; # (7.0) JOYSTICK +1F579 FE0F ; emoji style; # (7.0) JOYSTICK +1F587 FE0E ; text style; # (7.0) LINKED PAPERCLIPS +1F587 FE0F ; emoji style; # (7.0) LINKED PAPERCLIPS +1F58A FE0E ; text style; # (7.0) LOWER LEFT BALLPOINT PEN +1F58A FE0F ; emoji style; # (7.0) LOWER LEFT BALLPOINT PEN +1F58B FE0E ; text style; # (7.0) LOWER LEFT FOUNTAIN PEN +1F58B FE0F ; emoji style; # (7.0) LOWER LEFT FOUNTAIN PEN +1F58C FE0E ; text style; # (7.0) LOWER LEFT PAINTBRUSH +1F58C FE0F ; emoji style; # (7.0) LOWER LEFT PAINTBRUSH +1F58D FE0E ; text style; # (7.0) LOWER LEFT CRAYON +1F58D FE0F ; emoji style; # (7.0) LOWER LEFT CRAYON +1F590 FE0E ; text style; # (7.0) RAISED HAND WITH FINGERS SPLAYED +1F590 FE0F ; emoji style; # (7.0) RAISED HAND WITH FINGERS SPLAYED +1F5A5 FE0E ; text style; # (7.0) DESKTOP COMPUTER +1F5A5 FE0F ; emoji style; # (7.0) DESKTOP COMPUTER +1F5A8 FE0E ; text style; # (7.0) PRINTER +1F5A8 FE0F ; emoji style; # (7.0) PRINTER +1F5B1 FE0E ; text style; # (7.0) THREE BUTTON MOUSE +1F5B1 FE0F ; emoji style; # (7.0) THREE BUTTON MOUSE +1F5B2 FE0E ; text style; # (7.0) TRACKBALL +1F5B2 FE0F ; emoji style; # (7.0) TRACKBALL +1F5BC FE0E ; text style; # (7.0) FRAME WITH PICTURE +1F5BC FE0F ; emoji style; # (7.0) FRAME WITH PICTURE +1F5C2 FE0E ; text style; # (7.0) CARD INDEX DIVIDERS +1F5C2 FE0F ; emoji style; # (7.0) CARD INDEX DIVIDERS +1F5C3 FE0E ; text style; # (7.0) CARD FILE BOX +1F5C3 FE0F ; emoji style; # (7.0) CARD FILE BOX +1F5C4 FE0E ; text style; # (7.0) FILE CABINET +1F5C4 FE0F ; emoji style; # (7.0) FILE CABINET +1F5D1 FE0E ; text style; # (7.0) WASTEBASKET +1F5D1 FE0F ; emoji style; # (7.0) WASTEBASKET +1F5D2 FE0E ; text style; # (7.0) SPIRAL NOTE PAD +1F5D2 FE0F ; emoji style; # (7.0) SPIRAL NOTE PAD +1F5D3 FE0E ; text style; # (7.0) SPIRAL CALENDAR PAD +1F5D3 FE0F ; emoji style; # (7.0) SPIRAL CALENDAR PAD +1F5DC FE0E ; text style; # (7.0) COMPRESSION +1F5DC FE0F ; emoji style; # (7.0) COMPRESSION +1F5DD FE0E ; text style; # (7.0) OLD KEY +1F5DD FE0F ; emoji style; # (7.0) OLD KEY +1F5DE FE0E ; text style; # (7.0) ROLLED-UP NEWSPAPER +1F5DE FE0F ; emoji style; # (7.0) ROLLED-UP NEWSPAPER +1F5E1 FE0E ; text style; # (7.0) DAGGER KNIFE +1F5E1 FE0F ; emoji style; # (7.0) DAGGER KNIFE +1F5E3 FE0E ; text style; # (7.0) SPEAKING HEAD IN SILHOUETTE +1F5E3 FE0F ; emoji style; # (7.0) SPEAKING HEAD IN SILHOUETTE +1F5E8 FE0E ; text style; # (7.0) LEFT SPEECH BUBBLE +1F5E8 FE0F ; emoji style; # (7.0) LEFT SPEECH BUBBLE +1F5EF FE0E ; text style; # (7.0) RIGHT ANGER BUBBLE +1F5EF FE0F ; emoji style; # (7.0) RIGHT ANGER BUBBLE +1F5F3 FE0E ; text style; # (7.0) BALLOT BOX WITH BALLOT +1F5F3 FE0F ; emoji style; # (7.0) BALLOT BOX WITH BALLOT +1F5FA FE0E ; text style; # (7.0) WORLD MAP +1F5FA FE0F ; emoji style; # (7.0) WORLD MAP +1F610 FE0E ; text style; # (6.0) NEUTRAL FACE +1F610 FE0F ; emoji style; # (6.0) NEUTRAL FACE +1F687 FE0E ; text style; # (6.0) METRO +1F687 FE0F ; emoji style; # (6.0) METRO +1F68D FE0E ; text style; # (6.0) ONCOMING BUS +1F68D FE0F ; emoji style; # (6.0) ONCOMING BUS +1F691 FE0E ; text style; # (6.0) AMBULANCE +1F691 FE0F ; emoji style; # (6.0) AMBULANCE +1F694 FE0E ; text style; # (6.0) ONCOMING POLICE CAR +1F694 FE0F ; emoji style; # (6.0) ONCOMING POLICE CAR +1F698 FE0E ; text style; # (6.0) ONCOMING AUTOMOBILE +1F698 FE0F ; emoji style; # (6.0) ONCOMING AUTOMOBILE +1F6AD FE0E ; text style; # (6.0) NO SMOKING SYMBOL +1F6AD FE0F ; emoji style; # (6.0) NO SMOKING SYMBOL +1F6B2 FE0E ; text style; # (6.0) BICYCLE +1F6B2 FE0F ; emoji style; # (6.0) BICYCLE +1F6B9 FE0E ; text style; # (6.0) MENS SYMBOL +1F6B9 FE0F ; emoji style; # (6.0) MENS SYMBOL +1F6BA FE0E ; text style; # (6.0) WOMENS SYMBOL +1F6BA FE0F ; emoji style; # (6.0) WOMENS SYMBOL +1F6BC FE0E ; text style; # (6.0) BABY SYMBOL +1F6BC FE0F ; emoji style; # (6.0) BABY SYMBOL +1F6CB FE0E ; text style; # (7.0) COUCH AND LAMP +1F6CB FE0F ; emoji style; # (7.0) COUCH AND LAMP +1F6CD FE0E ; text style; # (7.0) SHOPPING BAGS +1F6CD FE0F ; emoji style; # (7.0) SHOPPING BAGS +1F6CE FE0E ; text style; # (7.0) BELLHOP BELL +1F6CE FE0F ; emoji style; # (7.0) BELLHOP BELL +1F6CF FE0E ; text style; # (7.0) BED +1F6CF FE0F ; emoji style; # (7.0) BED +1F6E0 FE0E ; text style; # (7.0) HAMMER AND WRENCH +1F6E0 FE0F ; emoji style; # (7.0) HAMMER AND WRENCH +1F6E1 FE0E ; text style; # (7.0) SHIELD +1F6E1 FE0F ; emoji style; # (7.0) SHIELD +1F6E2 FE0E ; text style; # (7.0) OIL DRUM +1F6E2 FE0F ; emoji style; # (7.0) OIL DRUM +1F6E3 FE0E ; text style; # (7.0) MOTORWAY +1F6E3 FE0F ; emoji style; # (7.0) MOTORWAY +1F6E4 FE0E ; text style; # (7.0) RAILWAY TRACK +1F6E4 FE0F ; emoji style; # (7.0) RAILWAY TRACK +1F6E5 FE0E ; text style; # (7.0) MOTOR BOAT +1F6E5 FE0F ; emoji style; # (7.0) MOTOR BOAT +1F6E9 FE0E ; text style; # (7.0) SMALL AIRPLANE +1F6E9 FE0F ; emoji style; # (7.0) SMALL AIRPLANE +1F6F0 FE0E ; text style; # (7.0) SATELLITE +1F6F0 FE0F ; emoji style; # (7.0) SATELLITE +1F6F3 FE0E ; text style; # (7.0) PASSENGER SHIP +1F6F3 FE0F ; emoji style; # (7.0) PASSENGER SHIP + +#Total sequences: 371 + +#EOF diff --git c/tests/emoji-zwj-sequences.txt i/tests/emoji-zwj-sequences.txt new file mode 100644 index 0000000..25f8b61 --- /dev/null +++ i/tests/emoji-zwj-sequences.txt @@ -0,0 +1,1529 @@ +# emoji-zwj-sequences.txt +# Date: 2023-06-05, 20:04:50 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countr…
1 parent f4368e3 commit c577a41

16 files changed

+2922
-208
lines changed

bin/update-tables.py

Lines changed: 281 additions & 102 deletions
Large diffs are not rendered by default.

docs/intro.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ Other Languages
216216
=======
217217
History
218218
=======
219+
0.2.10 *2023-11-08*
220+
* **Bugfix** account for Wide characters in wcswidth
221+
when combined with U+FE0F Variation Selector 16 (`PR #XX`)
222+
219223
0.2.9 *2023-10-30*
220224
* **Bugfix** zero-width characters used in Emoji ZWJ sequences, Balinese,
221225
Jamo, Devanagari, Tamil, Kannada and others (`PR #91`_).

docs/specs.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,6 @@ Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``).
5252

5353
Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is
5454
present in comment of unicode data file, aprox. 3 characters.
55+
56+
Any character in sequence with U+FE0F (Variation Selector 16) defined by
57+
Emoji Variation Sequences txt as ``emoji style``.

docs/unicode_version.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,9 @@ release files:
121121
``EastAsianWidth-15.1.0.txt``
122122
*Date: 2023-07-28, 23:34:08 GMT*
123123

124+
``emoji-variation-sequences-12.0.0.txt``
125+
*Date: 2019-01-15, 12:10:05 GMT*
126+
127+
``emoji-variation-sequences-15.1.0.txt``
128+
*Date: 2023-02-01, 02:22:54 GMT*
129+

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def main():
4444
setuptools.setup(
4545
name='wcwidth',
4646
# NOTE: manually manage __version__ in wcwidth/__init__.py !
47-
version='0.2.9',
47+
version='0.2.10',
4848
description=(
4949
"Measures the displayed width of unicode strings in a terminal"),
5050
long_description=codecs.open(

tests/emoji-variation-sequences.txt

Lines changed: 757 additions & 0 deletions
Large diffs are not rendered by default.

tests/emoji-zwj-sequences.txt

Lines changed: 1529 additions & 0 deletions
Large diffs are not rendered by default.

tests/test_core.py

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@
1010
# local
1111
import wcwidth
1212

13-
# 3rd party
14-
import pytest
15-
1613
# some tests cannot be done on some builds of python, where the internal
1714
# unicode structure is limited to 0x10000 for memory conservation,
1815
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
@@ -22,12 +19,6 @@
2219
except NameError:
2320
# python 3
2421
unichr = chr
25-
try:
26-
unichr(0x2fffe)
27-
NARROW_ONLY = False
28-
except ValueError:
29-
NARROW_ONLY = True
30-
3122

3223
def test_package_version():
3324
"""wcwidth.__version__ is expected value."""
@@ -254,97 +245,6 @@ def test_kr_jamo_filler():
254245
assert length_phrase == expect_length_phrase
255246

256247

257-
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
258-
def emoji_zwj_sequence():
259-
u"""
260-
Emoji zwj sequence of four codepoints is just 2 cells.
261-
"""
262-
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
263-
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
264-
u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
265-
u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
266-
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
267-
expect_length_each = (2, 0, 0, 2)
268-
expect_length_phrase = 2
269-
270-
# exercise,
271-
length_each = tuple(map(wcwidth.wcwidth, phrase))
272-
length_phrase = wcwidth.wcswidth(phrase)
273-
274-
# verify.
275-
assert length_each == expect_length_each
276-
assert length_phrase == expect_length_phrase
277-
278-
279-
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
280-
def test_unfinished_zwj_sequence():
281-
u"""
282-
Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
283-
"""
284-
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
285-
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
286-
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
287-
expect_length_each = (2, 0, 0)
288-
expect_length_phrase = 2
289-
290-
# exercise,
291-
length_each = tuple(map(wcwidth.wcwidth, phrase))
292-
length_phrase = wcwidth.wcswidth(phrase)
293-
294-
# verify.
295-
assert length_each == expect_length_each
296-
assert length_phrase == expect_length_phrase
297-
298-
299-
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
300-
def test_non_recommended_zwj_sequence():
301-
"""
302-
Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
303-
"""
304-
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
305-
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
306-
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
307-
expect_length_each = (2, 0, 0)
308-
expect_length_phrase = 2
309-
310-
# exercise,
311-
length_each = tuple(map(wcwidth.wcwidth, phrase))
312-
length_phrase = wcwidth.wcswidth(phrase)
313-
314-
# verify.
315-
assert length_each == expect_length_each
316-
assert length_phrase == expect_length_phrase
317-
318-
319-
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
320-
def test_longer_emoji_zwj_sequence():
321-
"""
322-
A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
323-
"""
324-
# 'Category Code', 'East Asian Width property' -- 'description'
325-
phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
326-
u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
327-
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
328-
u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
329-
u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
330-
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
331-
u"\U0001F48B" # 'So', 'W' -- KISS MARK
332-
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
333-
u"\U0001F9D1" # 'So', 'W' -- ADULT
334-
u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
335-
336-
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
337-
expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
338-
expect_length_phrase = 2
339-
340-
# exercise,
341-
length_each = tuple(map(wcwidth.wcwidth, phrase))
342-
length_phrase = wcwidth.wcswidth(phrase)
343-
344-
# verify.
345-
assert length_each == expect_length_each
346-
assert length_phrase == expect_length_phrase
347-
348248

349249
def test_devanagari_script():
350250
"""

tests/test_emoji_zwj.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import os
2+
3+
import pytest
4+
5+
try:
6+
# python 2
7+
_ = unichr
8+
except NameError:
9+
# python 3
10+
unichr = chr
11+
try:
12+
unichr(0x2fffe)
13+
NARROW_ONLY = False
14+
except ValueError:
15+
NARROW_ONLY = True
16+
17+
import wcwidth
18+
19+
def make_sequence_from_line(line):
20+
# convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
21+
return ''.join(chr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
22+
23+
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
24+
def emoji_zwj_sequence():
25+
u"""
26+
Emoji zwj sequence of four codepoints is just 2 cells.
27+
"""
28+
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
29+
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
30+
u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
31+
u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
32+
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
33+
expect_length_each = (2, 0, 0, 2)
34+
expect_length_phrase = 2
35+
36+
# exercise,
37+
length_each = tuple(map(wcwidth.wcwidth, phrase))
38+
length_phrase = wcwidth.wcswidth(phrase)
39+
40+
# verify.
41+
assert length_each == expect_length_each
42+
assert length_phrase == expect_length_phrase
43+
44+
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
45+
def test_unfinished_zwj_sequence():
46+
u"""
47+
Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
48+
"""
49+
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
50+
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
51+
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
52+
expect_length_each = (2, 0, 0)
53+
expect_length_phrase = 2
54+
55+
# exercise,
56+
length_each = tuple(map(wcwidth.wcwidth, phrase))
57+
length_phrase = wcwidth.wcswidth(phrase)
58+
59+
# verify.
60+
assert length_each == expect_length_each
61+
assert length_phrase == expect_length_phrase
62+
63+
64+
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
65+
def test_non_recommended_zwj_sequence():
66+
"""
67+
Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
68+
"""
69+
phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
70+
u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
71+
u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
72+
expect_length_each = (2, 0, 0)
73+
expect_length_phrase = 2
74+
75+
# exercise,
76+
length_each = tuple(map(wcwidth.wcwidth, phrase))
77+
length_phrase = wcwidth.wcswidth(phrase)
78+
79+
# verify.
80+
assert length_each == expect_length_each
81+
assert length_phrase == expect_length_phrase
82+
83+
84+
def test_another_emoji_zwj_sequence():
85+
phrase = (
86+
u"\u26F9" # PERSON WITH BALL
87+
u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
88+
u"\u200D" # ZERO WIDTH JOINER
89+
u"\u2640" # FEMALE SIGN
90+
u"\uFE0F") # VARIATION SELECTOR-16
91+
expect_length_each = (1, 0, 0, 1, 0)
92+
expect_length_phrase = 2
93+
94+
# exercise,
95+
length_each = tuple(map(wcwidth.wcwidth, phrase))
96+
length_phrase = wcwidth.wcswidth(phrase)
97+
98+
# verify.
99+
assert length_each == expect_length_each
100+
assert length_phrase == expect_length_phrase
101+
102+
103+
@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
104+
def test_longer_emoji_zwj_sequence():
105+
"""
106+
A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
107+
"""
108+
# 'Category Code', 'East Asian Width property' -- 'description'
109+
phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
110+
u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
111+
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
112+
u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
113+
u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
114+
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
115+
u"\U0001F48B" # 'So', 'W' -- KISS MARK
116+
u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
117+
u"\U0001F9D1" # 'So', 'W' -- ADULT
118+
u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
119+
120+
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
121+
expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0)
122+
expect_length_phrase = 2
123+
124+
# exercise,
125+
length_each = tuple(map(wcwidth.wcwidth, phrase))
126+
length_phrase = wcwidth.wcswidth(phrase)
127+
128+
# verify.
129+
assert length_each == expect_length_each
130+
assert length_phrase == expect_length_phrase
131+
132+
133+
def read_sequences_from_file(filename):
134+
with open(os.path.join(os.path.dirname(__file__), filename)) as f:
135+
lines = [line.strip()
136+
for line in f.readlines()
137+
if not line.startswith('#') and line.strip()]
138+
sequences = [make_sequence_from_line(line) for line in lines]
139+
return lines, sequences
140+
141+
142+
def test_recommended_emoji_zwj_sequences():
143+
"""
144+
Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
145+
"""
146+
# given,
147+
lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
148+
149+
errors = []
150+
# Exercise, track by zipping with original text file line, a debugging aide
151+
num = 0
152+
for sequence, line in zip(sequences, lines):
153+
num += 1
154+
measured_width = wcwidth.wcswidth(sequence)
155+
if measured_width != 2:
156+
errors.append({
157+
'expected_width': 2,
158+
'line': line,
159+
'measured_width': measured_width,
160+
'sequence': sequence,
161+
})
162+
163+
# verify
164+
assert errors == []
165+
assert num > 1000, num
166+
167+
168+
def test_recommended_variation_16_sequences():
169+
"""
170+
Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
171+
"""
172+
# given,
173+
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
174+
175+
errors = []
176+
num = 0
177+
for sequence, line in zip(sequences, lines):
178+
num += 1
179+
if '\ufe0f' not in sequence:
180+
# filter for only \uFE0F (VS-16)
181+
continue
182+
measured_width = wcwidth.wcswidth(sequence)
183+
if measured_width != 2:
184+
errors.append({
185+
'expected_width': 2,
186+
'line': line,
187+
'measured_width': wcwidth.wcswidth(sequence),
188+
'sequence': sequence,
189+
})
190+
191+
# verify
192+
assert errors == []
193+
assert num > 1000, num

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ deps = -r requirements-tests37.in
123123
basepython = python3.12
124124
usedevelop = true
125125
deps = -r requirements-update.txt
126-
commands = python {toxinidir}/bin/update-tables.py {posargs}
126+
commands = python {toxinidir}/bin/update-tables.py {posargs:--no-check-last-modified}
127127

128128
[testenv:autopep8]
129129
basepython = python3.11

0 commit comments

Comments
 (0)