Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,39 @@ env:
PIP_DISABLE_PIP_VERSION_CHECK: "1"

jobs:
lint:
name: ${{ matrix.label }}
runs-on: ubuntu-latest
continue-on-error: true # Allow linting to fail without blocking CI

strategy:
fail-fast: false
matrix:
include:
# docformatter on 3.13 (not yet compatible with 3.14)
- python-version: '3.13'
toxenv: docformatter_check
label: docformatter

# All other linting tools on 3.13
- python-version: '3.13'
toxenv: flake8,flake8_tests,isort_check,pydocstyle,pylint,pylint_tests,codespell,docs
label: Linting

steps:
- uses: actions/checkout@v6

- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: pip

- name: Install tox
run: python -Im pip install tox

- name: Run linting
run: python -Im tox -e ${{ matrix.toxenv }}

tests:
strategy:
fail-fast: false
Expand Down Expand Up @@ -140,8 +173,10 @@ jobs:

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
verbose: true

- name: Fail if coverage is <100%.
run: |
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ htmlcov
data
.DS_Store
.idea
.python-version

19 changes: 9 additions & 10 deletions bin/update-tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,12 +429,9 @@ def fetch_table_ambiguous_data() -> UnicodeTableRenderCtx:
"""
Fetch east-asian ambiguous character table for the latest Unicode version.

East Asian Ambiguous (A) characters can display as either 1 cell (narrow)
or 2 cells (wide) depending on the terminal's configuration. This table
allows users to opt-in to treating these characters as wide by passing
ambiguous_width=2 to wcwidth/wcswidth.

See https://www.unicode.org/reports/tr11/ for the specification.
East Asian Ambiguous (A) characters can display as either 1 cell (narrow) or 2 cells (wide)
depending on the terminal's configuration. This table allows users to opt-in to treating these
characters as wide by passing ambiguous_width=2 to wcwidth/wcswidth.
"""
table: dict[UnicodeVersion, TableDef] = {}
version = fetch_unicode_versions()[-1]
Expand Down Expand Up @@ -883,10 +880,11 @@ def filenames() -> list[str]:


def replace_if_modified(new_filename: str, original_filename: str) -> None:
"""Replace original file with new file only if there are significant changes.
"""
Replace original file with new file only if there are significant changes.

If only the 'This code generated' timestamp line differs, discard the new file.
If there are other changes or the original doesn't exist, replace it.
If only the 'This code generated' timestamp line differs, discard the new file. If there are
other changes or the original doesn't exist, replace it.
"""
if os.path.exists(original_filename):
with open(original_filename, encoding='utf-8') as f1, \
Expand Down Expand Up @@ -921,7 +919,8 @@ def replace_if_modified(new_filename: str, original_filename: str) -> None:


def fetch_all_emoji_files() -> None:
"""Fetch emoji variation sequences and ZWJ sequences for all versions.
"""
Fetch emoji variation sequences and ZWJ sequences for all versions.

URL locations:
- Variation sequences (5.0-12.1): /Public/emoji/{version}/
Expand Down
6 changes: 4 additions & 2 deletions docs/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ Use `iter_graphemes()`_ to iterate over *grapheme clusters* of a string.
['o', 'k', '👨\u200d👩\u200d👧']

A grapheme cluster is what a user perceives as a single character, even if it is composed of
multiple Unicode codepoints. This function implements Unicode Standard `Annex #29`_ grapheme cluster
multiple Unicode codepoints. This function implements `Unicode Standard Annex #29`_ grapheme cluster
boundary rules.

ljust()
Expand Down Expand Up @@ -244,7 +244,7 @@ sequence-aware wrapping with full control over wrapping behavior.
clip()
------

Use `clip()`_ to extract a substring by the column positions displayed, preserving terminal sequences.
Use `clip()`_ to extract a substring by column positions, preserving terminal sequences.

.. code-block:: python

Expand Down Expand Up @@ -639,6 +639,8 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip
.. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences
.. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences
.. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
.. _`Terminal.detect_ambiguous_width()`: https://blessed.readthedocs.io/en/latest/api/terminal.html#blessed.terminal.Terminal.detect_ambiguous_width
.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi
:alt: Downloads
:target: https://pypi.org/project/wcwidth/
Expand Down
11 changes: 5 additions & 6 deletions tests/test_ambiguous.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# local
import wcwidth


AMBIGUOUS_CHARS = [
('\u00a1', 'INVERTED_EXCLAMATION'),
('\u00a7', 'SECTION_SIGN'),
Expand Down Expand Up @@ -40,34 +39,34 @@ def test_wcswidth_mixed_ambiguous_and_wide():


def test_width_ambiguous():
"""width() respects ambiguous_width parameter."""
"""Width() respects ambiguous_width parameter."""
assert wcwidth.width('\u00b1') == 1
assert wcwidth.width('\u00b1', ambiguous_width=2) == 2


def test_ljust_ambiguous():
"""ljust respects ambiguous_width parameter."""
"""Ljust respects ambiguous_width parameter."""
text = '\u00b1'
assert wcwidth.ljust(text, 4) == '\u00b1 '
assert wcwidth.ljust(text, 4, ambiguous_width=2) == '\u00b1 '


def test_rjust_ambiguous():
"""rjust respects ambiguous_width parameter."""
"""Rjust respects ambiguous_width parameter."""
text = '\u00b1'
assert wcwidth.rjust(text, 4) == ' \u00b1'
assert wcwidth.rjust(text, 4, ambiguous_width=2) == ' \u00b1'


def test_center_ambiguous():
"""center respects ambiguous_width parameter."""
"""Center respects ambiguous_width parameter."""
text = '\u00b1'
assert wcwidth.center(text, 5) == ' \u00b1 '
assert wcwidth.center(text, 6, ambiguous_width=2) == ' \u00b1 '


def test_wrap_ambiguous():
"""wrap respects ambiguous_width parameter."""
"""Wrap respects ambiguous_width parameter."""
text = '\u00b1' * 5 # 5 ambiguous characters
assert wcwidth.wrap(text, 4) == ['\u00b1\u00b1\u00b1\u00b1', '\u00b1']
assert wcwidth.wrap(text, 4, ambiguous_width=2) == ['\u00b1\u00b1', '\u00b1\u00b1', '\u00b1']
Expand Down
5 changes: 3 additions & 2 deletions tests/test_clip.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Tests for clip() and strip_sequences() functions."""
# 3rd party
import pytest

from wcwidth import clip, strip_sequences, width

# local
from wcwidth import clip, width, strip_sequences

STRIP_SEQUENCES_CASES = [
('', ''),
Expand Down
23 changes: 12 additions & 11 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Core tests for wcwidth module. isort:skip_file"""
"""Core tests for wcwidth module."""
# std imports
import importlib.metadata

# local
Expand Down Expand Up @@ -38,7 +39,7 @@ def test_empty_string():

def basic_string_type():
"""
This is a python 2-specific test of the basic "string type"
This is a python 2-specific test of the basic "string type".

Such strings cannot contain anything but ascii in python2.
"""
Expand Down Expand Up @@ -117,14 +118,14 @@ def test_null_width_0():


def test_control_c0_width_negative_1():
"""How the API reacts to CSI (Control sequence initiate).
"""
How the API reacts to CSI (Control sequence initiate).

An example of bad fortune, this terminal sequence is a width of 0
on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer
(CSI) sequences.
An example of bad fortune, this terminal sequence is a width of 0 on all terminals, but wcwidth
doesn't parse Control-Sequence-Inducer (CSI) sequences.

Also the "legacy" posix functions wcwidth and wcswidth return -1 for
any string containing the C1 control character \x1b (ESC).
Also the "legacy" posix functions wcwidth and wcswidth return -1 for any string containing the
C1 control character \x1b (ESC).
"""
# given,
phrase = '\x1b[0m'
Expand Down Expand Up @@ -190,8 +191,8 @@ def test_balinese_script():
"""
Balinese kapal (ship) is length 3.

This may be an example that is not yet correctly rendered by any terminal so
far, like devanagari.
This may be an example that is not yet correctly rendered by any terminal so far, like
devanagari.
"""
phrase = ("\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA
"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL
Expand All @@ -211,7 +212,7 @@ def test_balinese_script():

def test_kr_jamo():
"""
Test basic combining of HANGUL CHOSEONG and JUNGSEONG
Test basic combining of HANGUL CHOSEONG and JUNGSEONG.

Example and from Raymond Chen's blog post,
https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
Expand Down
50 changes: 20 additions & 30 deletions tests/test_emojis.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Tests for emoji width measurement and ZWJ sequences."""
# std imports
import os

# 3rd party
import pytest

# local
import wcwidth

# some tests cannot be done on some builds of python, where the internal
# unicode structure is limited to 0x10000 for memory conservation,
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
Expand All @@ -13,9 +17,6 @@
except ValueError:
NARROW_ONLY = True

# local
import wcwidth


def make_sequence_from_line(line):
# convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
Expand All @@ -24,13 +25,11 @@ def make_sequence_from_line(line):

@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def emoji_zwj_sequence():
"""
Emoji zwj sequence of four codepoints is just 2 cells.
"""
"""Emoji zwj sequence of four codepoints is just 2 cells."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
"\U0001f4bb") # Fused, Category So, East Asian Width property 'W' -- PERSONAL COMPUTER
# This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
expect_length_each = (2, 0, 0, 2)
expect_length_phrase = 2
Expand All @@ -46,9 +45,7 @@ def emoji_zwj_sequence():

@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_unfinished_zwj_sequence():
"""
Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
"""
"""Ensure index-out-of-bounds does not occur for ZWJ without any following character."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
Expand All @@ -66,9 +63,7 @@ def test_unfinished_zwj_sequence():

@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_non_recommended_zwj_sequence():
"""
Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
"""
"""Verify ZWJ with characters that cannot be joined, wcwidth does not verify."""
phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
Expand Down Expand Up @@ -109,8 +104,8 @@ def test_longer_emoji_zwj_sequence():
"""
A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!

Also test the same sequence in duplicate, verifying multiple VS-16 sequences
in a single function call.
Also test the same sequence in duplicate, verifying multiple VS-16 sequences in a single
function call.
"""
# 'Category Code', 'East Asian Width property' -- 'description'
phrase = ("\U0001F9D1" # 'So', 'W' -- ADULT
Expand Down Expand Up @@ -138,20 +133,17 @@ def test_longer_emoji_zwj_sequence():


def read_sequences_from_file(filename):
fp = open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8')
lines = [line.strip()
for line in fp.readlines()
if not line.startswith('#') and line.strip()]
fp.close()
with open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8') as fp:
lines = [line.strip()
for line in fp.readlines()
if not line.startswith('#') and line.strip()]
sequences = [make_sequence_from_line(line) for line in lines]
return lines, sequences


@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
def test_recommended_emoji_zwj_sequences():
"""
Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
"""
"""Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt."""
# given,
lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')

Expand All @@ -170,14 +162,12 @@ def test_recommended_emoji_zwj_sequences():
})

# verify
assert errors == []
assert not errors
assert num >= 1468


def test_recommended_variation_16_sequences():
"""
Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
"""
"""Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt."""
# given,
lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')

Expand All @@ -198,12 +188,12 @@ def test_recommended_variation_16_sequences():
})

# verify
assert errors == []
assert not errors
assert num >= 742


def test_unicode_9_vs16():
"""Verify effect of VS-16 on unicode_version 9.0 and later"""
"""Verify effect of VS-16 on unicode_version 9.0 and later."""
phrase = ("\u2640" # FEMALE SIGN
"\uFE0F") # VARIATION SELECTOR-16

Expand All @@ -220,7 +210,7 @@ def test_unicode_9_vs16():


def test_unicode_8_vs16():
"""Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
"""Verify that VS-16 has no effect on unicode_version 8.0 and earlier."""
phrase = ("\u2640" # FEMALE SIGN
"\uFE0F") # VARIATION SELECTOR-16

Expand Down
4 changes: 1 addition & 3 deletions tests/test_table_integrity.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""
Executes verify-table-integrity.py as a unit test.
"""
"""Executes verify-table-integrity.py as a unit test."""
# std imports
import os
import sys
Expand Down
Loading
Loading