From 43209e4ba0b6493f8cd4828e719f0603db00f3b6 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 19 Jan 2026 19:43:32 -0500 Subject: [PATCH 1/6] CI Fixes, linting, formatting, etc. - Fix codecov "Token required because branch is protected" error I think codecov stopped working with #156 maybe earlier though - Add linting steps to CI (previously missing) - Add missing tox.ini targets referenced in envlist I expect this build to be marked failed, I will merge it anyway and address linting issues as I go .. --- .github/workflows/ci.yml | 39 ++++++++++++++++++-- tox.ini | 77 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0588d6dd..cc672022 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,39 @@ env: PIP_DISABLE_PIP_VERSION_CHECK: "1" jobs: + lint: + name: ${{ matrix.label }} + runs-on: ubuntu-latest + continue-on-error: true # Allow linting to fail without blocking CI + + strategy: + fail-fast: false + matrix: + include: + # docformatter on 3.13 (not yet compatible with 3.14) + - python-version: '3.13' + toxenv: docformatter_check + label: docformatter + + # All other linting tools on 3.13 + - python-version: '3.13' + toxenv: flake8,flake8_tests,isort_check,pydocstyle,pylint,pylint_tests,codespell,docs + label: Linting + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install tox + run: python -Im pip install tox + + - name: Run linting + run: python -Im tox -e ${{ matrix.toxenv }} + tests: strategy: fail-fast: false @@ -140,8 +173,10 @@ jobs: - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 - env: - CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}} + with: + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: true + verbose: true - name: Fail if coverage is <100%. run: | diff --git a/tox.ini b/tox.ini index 748eab52..1cc8c03a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = update, compile, autopep8, docformatter, isort, pylint, flake8, pydocstyle, docs, verify_tables, py{38, 39, 310, 311, 312, 313, 314}, pypy{38, 39, 310, 311} +envlist = update, compile, autopep8, docformatter, docformatter_check, isort, isort_check, pylint, pylint_tests, flake8, flake8_tests, pydocstyle, mypy, codespell, lint, docs, verify_tables, py{38, 39, 310, 311, 312, 313, 314}, pypy{38, 39, 310, 311} skip_missing_interpreters = true [base] @@ -150,3 +150,78 @@ basepython = python{env:TOXPYTHON:{env:TRAVIS_PYTHON_VERSION:3.10}} passenv = TOXENV,CI,TRAVIS,TRAVIS_*,CODECOV_* deps = codecov commands = codecov -e TOXENV + +[testenv:pydocstyle] +basepython = python3.13 +deps = pydocstyle + restructuredtext_lint + doc8 + pygments +commands = pydocstyle --source --explain {toxinidir}/wcwidth + rst-lint README.rst + doc8 --ignore-path docs/_build --ignore D000 docs + +[testenv:docformatter] +basepython = python3.13 +deps = docformatter>=1.7.7 + untokenize +commands = docformatter --in-place --recursive --pre-summary-newline \ + --wrap-summaries=100 --wrap-descriptions=100 \ + {toxinidir}/wcwidth/ {toxinidir}/bin {toxinidir}/tests/ + +[testenv:docformatter_check] +basepython = python3.13 +deps = {[testenv:docformatter]deps} +commands = docformatter --check --diff --recursive --pre-summary-newline \ + --wrap-summaries=100 --wrap-descriptions=100 \ + {toxinidir}/wcwidth/ {toxinidir}/bin {toxinidir}/tests/ + +[testenv:isort_check] +basepython = python3.13 +deps = isort +commands = isort --diff --check-only wcwidth tests bin + +[testenv:flake8_tests] +basepython = python3.13 +deps = flake8 +commands = flake8 --ignore=W504,F401 tests/ + +[testenv:pylint_tests] +basepython = python3.13 +deps = pytest + pylint +commands = pylint --rcfile={toxinidir}/.pylintrc \ + --disable=invalid-name,import-outside-toplevel,protected-access,unused-argument \ + tests + +[testenv:mypy] +basepython = python3.13 +deps = mypy +commands = mypy --strict wcwidth + +[mypy] +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true + +[testenv:codespell] +basepython = python3.13 +deps = codespell +commands = codespell --skip="*.pyc,htmlcov,_build,build,*.egg-info,.tox" \ + --uri-ignore-words-list '*' \ + --summary --count + +[testenv:lint] +basepython = python3.13 +deps = {[testenv:flake8]deps} + {[testenv:isort_check]deps} + {[testenv:pydocstyle]deps} + {[testenv:pylint]deps} + {[testenv:codespell]deps} +commands = {[testenv:flake8]commands} + {[testenv:flake8_tests]commands} + {[testenv:isort_check]commands} + {[testenv:pydocstyle]commands} + {[testenv:pylint]commands} + {[testenv:pylint_tests]commands} + {[testenv:codespell]commands} From 73983a6b1c35a6b4613c5f1fe86506c00faa432e Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 19 Jan 2026 19:58:07 -0500 Subject: [PATCH 2/6] tox -eformat --- bin/update-tables.py | 19 +++++++++---------- tests/test_ambiguous.py | 11 +++++------ tests/test_clip.py | 5 +++-- tests/test_core.py | 23 ++++++++++++----------- tests/test_emojis.py | 28 +++++++++------------------- tests/test_table_integrity.py | 4 +--- tests/test_textwrap.py | 8 ++++---- tests/test_ucslevel.py | 12 ++++++++++-- tox.ini | 23 ++++++++++++++++------- wcwidth/__init__.py | 6 +++--- wcwidth/control_codes.py | 4 ++-- wcwidth/escape_sequences.py | 7 +++---- wcwidth/grapheme.py | 8 ++++---- wcwidth/table_grapheme.py | 4 ++-- wcwidth/textwrap.py | 21 ++++++++++----------- wcwidth/wcwidth.py | 3 +++ 16 files changed, 96 insertions(+), 90 deletions(-) diff --git a/bin/update-tables.py b/bin/update-tables.py index af27088b..84f6404e 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -429,12 +429,9 @@ def fetch_table_ambiguous_data() -> UnicodeTableRenderCtx: """ Fetch east-asian ambiguous character table for the latest Unicode version. - East Asian Ambiguous (A) characters can display as either 1 cell (narrow) - or 2 cells (wide) depending on the terminal's configuration. This table - allows users to opt-in to treating these characters as wide by passing - ambiguous_width=2 to wcwidth/wcswidth. - - See https://www.unicode.org/reports/tr11/ for the specification. + East Asian Ambiguous (A) characters can display as either 1 cell (narrow) or 2 cells (wide) + depending on the terminal's configuration. This table allows users to opt-in to treating these + characters as wide by passing ambiguous_width=2 to wcwidth/wcswidth. """ table: dict[UnicodeVersion, TableDef] = {} version = fetch_unicode_versions()[-1] @@ -883,10 +880,11 @@ def filenames() -> list[str]: def replace_if_modified(new_filename: str, original_filename: str) -> None: - """Replace original file with new file only if there are significant changes. + """ + Replace original file with new file only if there are significant changes. - If only the 'This code generated' timestamp line differs, discard the new file. - If there are other changes or the original doesn't exist, replace it. + If only the 'This code generated' timestamp line differs, discard the new file. If there are + other changes or the original doesn't exist, replace it. """ if os.path.exists(original_filename): with open(original_filename, encoding='utf-8') as f1, \ @@ -921,7 +919,8 @@ def replace_if_modified(new_filename: str, original_filename: str) -> None: def fetch_all_emoji_files() -> None: - """Fetch emoji variation sequences and ZWJ sequences for all versions. + """ + Fetch emoji variation sequences and ZWJ sequences for all versions. URL locations: - Variation sequences (5.0-12.1): /Public/emoji/{version}/ diff --git a/tests/test_ambiguous.py b/tests/test_ambiguous.py index 77fb895b..0c61cdac 100644 --- a/tests/test_ambiguous.py +++ b/tests/test_ambiguous.py @@ -5,7 +5,6 @@ # local import wcwidth - AMBIGUOUS_CHARS = [ ('\u00a1', 'INVERTED_EXCLAMATION'), ('\u00a7', 'SECTION_SIGN'), @@ -40,34 +39,34 @@ def test_wcswidth_mixed_ambiguous_and_wide(): def test_width_ambiguous(): - """width() respects ambiguous_width parameter.""" + """Width() respects ambiguous_width parameter.""" assert wcwidth.width('\u00b1') == 1 assert wcwidth.width('\u00b1', ambiguous_width=2) == 2 def test_ljust_ambiguous(): - """ljust respects ambiguous_width parameter.""" + """Ljust respects ambiguous_width parameter.""" text = '\u00b1' assert wcwidth.ljust(text, 4) == '\u00b1 ' assert wcwidth.ljust(text, 4, ambiguous_width=2) == '\u00b1 ' def test_rjust_ambiguous(): - """rjust respects ambiguous_width parameter.""" + """Rjust respects ambiguous_width parameter.""" text = '\u00b1' assert wcwidth.rjust(text, 4) == ' \u00b1' assert wcwidth.rjust(text, 4, ambiguous_width=2) == ' \u00b1' def test_center_ambiguous(): - """center respects ambiguous_width parameter.""" + """Center respects ambiguous_width parameter.""" text = '\u00b1' assert wcwidth.center(text, 5) == ' \u00b1 ' assert wcwidth.center(text, 6, ambiguous_width=2) == ' \u00b1 ' def test_wrap_ambiguous(): - """wrap respects ambiguous_width parameter.""" + """Wrap respects ambiguous_width parameter.""" text = '\u00b1' * 5 # 5 ambiguous characters assert wcwidth.wrap(text, 4) == ['\u00b1\u00b1\u00b1\u00b1', '\u00b1'] assert wcwidth.wrap(text, 4, ambiguous_width=2) == ['\u00b1\u00b1', '\u00b1\u00b1', '\u00b1'] diff --git a/tests/test_clip.py b/tests/test_clip.py index b8070cf9..4f453659 100644 --- a/tests/test_clip.py +++ b/tests/test_clip.py @@ -1,8 +1,9 @@ """Tests for clip() and strip_sequences() functions.""" +# 3rd party import pytest -from wcwidth import clip, strip_sequences, width - +# local +from wcwidth import clip, width, strip_sequences STRIP_SEQUENCES_CASES = [ ('', ''), diff --git a/tests/test_core.py b/tests/test_core.py index 75c65a83..08e59e6c 100755 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,5 @@ -"""Core tests for wcwidth module. isort:skip_file""" +"""Core tests for wcwidth module.""" +# std imports import importlib.metadata # local @@ -38,7 +39,7 @@ def test_empty_string(): def basic_string_type(): """ - This is a python 2-specific test of the basic "string type" + This is a python 2-specific test of the basic "string type". Such strings cannot contain anything but ascii in python2. """ @@ -117,14 +118,14 @@ def test_null_width_0(): def test_control_c0_width_negative_1(): - """How the API reacts to CSI (Control sequence initiate). + """ + How the API reacts to CSI (Control sequence initiate). - An example of bad fortune, this terminal sequence is a width of 0 - on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer - (CSI) sequences. + An example of bad fortune, this terminal sequence is a width of 0 on all terminals, but wcwidth + doesn't parse Control-Sequence-Inducer (CSI) sequences. - Also the "legacy" posix functions wcwidth and wcswidth return -1 for - any string containing the C1 control character \x1b (ESC). + Also the "legacy" posix functions wcwidth and wcswidth return -1 for any string containing the + C1 control character \x1b (ESC). """ # given, phrase = '\x1b[0m' @@ -190,8 +191,8 @@ def test_balinese_script(): """ Balinese kapal (ship) is length 3. - This may be an example that is not yet correctly rendered by any terminal so - far, like devanagari. + This may be an example that is not yet correctly rendered by any terminal so far, like + devanagari. """ phrase = ("\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA "\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL @@ -211,7 +212,7 @@ def test_balinese_script(): def test_kr_jamo(): """ - Test basic combining of HANGUL CHOSEONG and JUNGSEONG + Test basic combining of HANGUL CHOSEONG and JUNGSEONG. Example and from Raymond Chen's blog post, https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351 diff --git a/tests/test_emojis.py b/tests/test_emojis.py index 9df6b34d..5072027b 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -24,9 +24,7 @@ def make_sequence_from_line(line): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def emoji_zwj_sequence(): - """ - Emoji zwj sequence of four codepoints is just 2 cells. - """ + """Emoji zwj sequence of four codepoints is just 2 cells.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER @@ -46,9 +44,7 @@ def emoji_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_unfinished_zwj_sequence(): - """ - Ensure index-out-of-bounds does not occur for zero-width joiner without any following character - """ + """Ensure index-out-of-bounds does not occur for ZWJ without any following character.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER @@ -66,9 +62,7 @@ def test_unfinished_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_non_recommended_zwj_sequence(): - """ - Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify - """ + """Verify ZWJ with characters that cannot be joined, wcwidth does not verify.""" phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER @@ -109,8 +103,8 @@ def test_longer_emoji_zwj_sequence(): """ A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! - Also test the same sequence in duplicate, verifying multiple VS-16 sequences - in a single function call. + Also test the same sequence in duplicate, verifying multiple VS-16 sequences in a single + function call. """ # 'Category Code', 'East Asian Width property' -- 'description' phrase = ("\U0001F9D1" # 'So', 'W' -- ADULT @@ -149,9 +143,7 @@ def read_sequences_from_file(filename): @pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") def test_recommended_emoji_zwj_sequences(): - """ - Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt - """ + """Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt.""" # given, lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') @@ -175,9 +167,7 @@ def test_recommended_emoji_zwj_sequences(): def test_recommended_variation_16_sequences(): - """ - Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt - """ + """Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt.""" # given, lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') @@ -203,7 +193,7 @@ def test_recommended_variation_16_sequences(): def test_unicode_9_vs16(): - """Verify effect of VS-16 on unicode_version 9.0 and later""" + """Verify effect of VS-16 on unicode_version 9.0 and later.""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 @@ -220,7 +210,7 @@ def test_unicode_9_vs16(): def test_unicode_8_vs16(): - """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + """Verify that VS-16 has no effect on unicode_version 8.0 and earler.""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 diff --git a/tests/test_table_integrity.py b/tests/test_table_integrity.py index e6804981..8dbfe169 100644 --- a/tests/test_table_integrity.py +++ b/tests/test_table_integrity.py @@ -1,6 +1,4 @@ -""" -Executes verify-table-integrity.py as a unit test. -""" +"""Executes verify-table-integrity.py as a unit test.""" # std imports import os import sys diff --git a/tests/test_textwrap.py b/tests/test_textwrap.py index 33879f37..5e8ca461 100644 --- a/tests/test_textwrap.py +++ b/tests/test_textwrap.py @@ -1,7 +1,7 @@ """Tests for sequence-aware text wrapping functions.""" # std imports -import platform import sys +import platform import textwrap # 3rd party @@ -38,9 +38,9 @@ def _adjust_stdlib_result(expected, kwargs): """ Adjust stdlib textwrap result for known bugs in older Python versions. - CPython #140627: Older versions leave trailing whitespace and preceding - all-whitespace lines when drop_whitespace=True. Fixed in 3.13.11+, 3.14.2+, - and 3.15+. We always strip to normalize across versions. + CPython #140627: Older versions leave trailing whitespace and preceding all-whitespace lines + when drop_whitespace=True. Fixed in 3.13.11+, 3.14.2+, and 3.15+. We always strip to normalize + across versions. """ if not expected: return expected diff --git a/tests/test_ucslevel.py b/tests/test_ucslevel.py index b15fb5f5..a907db2c 100644 --- a/tests/test_ucslevel.py +++ b/tests/test_ucslevel.py @@ -46,7 +46,11 @@ def test_exact_410_unicode(): def test_nearest_505_str(): - """wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'. (str)""" + """ + wcwidth._wcmatch_version('5.0.5') returns nearest '5.0.0'. + + (str) + """ # given given, expected = '5.0.5', '5.0.0' @@ -58,7 +62,11 @@ def test_nearest_505_str(): def test_nearest_505_unicode(): - """wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. (unicode)""" + """ + wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. + + (unicode) + """ # given given, expected = '5.0.5', '5.0.0' diff --git a/tox.ini b/tox.ini index 1cc8c03a..0a4bf6f7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = update, compile, autopep8, docformatter, docformatter_check, isort, isort_check, pylint, pylint_tests, flake8, flake8_tests, pydocstyle, mypy, codespell, lint, docs, verify_tables, py{38, 39, 310, 311, 312, 313, 314}, pypy{38, 39, 310, 311} +envlist = update, compile, autopep8, docformatter, docformatter_check, isort, isort_check, pylint, pylint_tests, flake8, flake8_tests, pydocstyle, mypy, codespell, format, lint, docs, verify_tables, py{38, 39, 310, 311, 312, 313, 314}, pypy{38, 39, 310, 311} skip_missing_interpreters = true [base] @@ -106,7 +106,7 @@ commands = python {toxinidir}/bin/update-tables.py {posargs:--fetch-all-versions basepython = python3.11 deps = autopep8 commands = - {envbindir}/autopep8 \ + autopep8 \ --in-place \ --recursive \ --aggressive \ @@ -116,25 +116,25 @@ commands = [testenv:isort] deps = isort basepython = python3.13 -commands = {envbindir}/isort --quiet --apply --recursive wcwidth tests bin +commands = isort --quiet --apply --recursive wcwidth tests bin [testenv:pylint] basepython = python3.13 deps = pylint -commands = {envbindir}/pylint --rcfile={toxinidir}/.pylintrc \ +commands = pylint --rcfile={toxinidir}/.pylintrc \ --ignore=tests,docs,conf.py,build,distutils,.pyenv,.git,.tox \ {posargs:{toxinidir}}/wcwidth [testenv:flake8] basepython = python3.13 deps = flake8 -commands = {envbindir}/flake8 --exclude=tests docs/ wcwidth/ bin/ tests/ +commands = flake8 --exclude=tests docs/ wcwidth/ bin/ tests/ [testenv:docs] # matches .readthedocs.yaml and environment basepython = python3.12 deps = -r {toxinidir}/docs/requirements.txt -commands = sphinx-build docs/ build/sphinx +commands = sphinx-build -W docs/ build/sphinx [testenv:verify_tables] basepython = python3.13 @@ -143,7 +143,7 @@ commands = python {toxinidir}/bin/verify-table-integrity.py [testenv:linkcheck] basepython = python3.11 deps = -r {toxinidir}/docs/requirements.txt -commands = {envbindir}/sphinx-build -v -W -d {toxinidir}/docs/_build/doctrees -b linkcheck docs docs/_build/linkcheck +commands = sphinx-build -v -W -d {toxinidir}/docs/_build/doctrees -b linkcheck docs docs/_build/linkcheck [testenv:codecov] basepython = python{env:TOXPYTHON:{env:TRAVIS_PYTHON_VERSION:3.10}} @@ -211,6 +211,15 @@ commands = codespell --skip="*.pyc,htmlcov,_build,build,*.egg-info,.tox" \ --uri-ignore-words-list '*' \ --summary --count +[testenv:format] +basepython = python3.13 +deps = {[testenv:isort]deps} + {[testenv:docformatter]deps} + {[testenv:autopep8]deps} +commands = {[testenv:isort]commands} + {[testenv:docformatter]commands} + {[testenv:autopep8]commands} + [testenv:lint] basepython = python3.13 deps = {[testenv:flake8]deps} diff --git a/wcwidth/__init__.py b/wcwidth/__init__.py index d10fe501..106816aa 100644 --- a/wcwidth/__init__.py +++ b/wcwidth/__init__.py @@ -11,18 +11,18 @@ # local from .wcwidth import ZERO_WIDTH # noqa from .wcwidth import (WIDE_EASTASIAN, - VS16_NARROW_TO_WIDE, AMBIGUOUS_EASTASIAN, + VS16_NARROW_TO_WIDE, + clip, ljust, rjust, width, center, wcwidth, wcswidth, - clip, - strip_sequences, list_versions, iter_sequences, + strip_sequences, _wcmatch_version, _wcversion_value) from .bisearch import bisearch as _bisearch diff --git a/wcwidth/control_codes.py b/wcwidth/control_codes.py index a084121a..3a6fff76 100644 --- a/wcwidth/control_codes.py +++ b/wcwidth/control_codes.py @@ -1,8 +1,8 @@ """ Control character sets for terminal handling. -This module provides the control character sets used by the width() function -to handle terminal control characters. +This module provides the control character sets used by the width() function to handle terminal +control characters. """ # Illegal C0/C1 control characters. diff --git a/wcwidth/escape_sequences.py b/wcwidth/escape_sequences.py index fa871d3c..7d7dc36b 100644 --- a/wcwidth/escape_sequences.py +++ b/wcwidth/escape_sequences.py @@ -1,10 +1,9 @@ """ Terminal escape sequence patterns. -This module provides regex patterns for matching terminal escape sequences. -All patterns match sequences that begin with ESC (\\x1b). Before calling -re.match with these patterns, callers should first check that the character -at the current position is ESC for optimal performance. +This module provides regex patterns for matching terminal escape sequences. All patterns match +sequences that begin with ESC (\\x1b). Before calling re.match with these patterns, callers should +first check that the character at the current position is ESC for optimal performance. """ # std imports import re diff --git a/wcwidth/grapheme.py b/wcwidth/grapheme.py index aa466270..6b3296d3 100644 --- a/wcwidth/grapheme.py +++ b/wcwidth/grapheme.py @@ -1,8 +1,8 @@ """ Grapheme cluster segmentation following Unicode Standard Annex #29. -This module provides pure-Python implementation of the grapheme cluster -boundary algorithm as defined in UAX #29: Unicode Text Segmentation. +This module provides pure-Python implementation of the grapheme cluster boundary algorithm as +defined in UAX #29: Unicode Text Segmentation. https://www.unicode.org/reports/tr29/ """ @@ -118,8 +118,8 @@ def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> Optional[BreakResult]: """ Check simple GCB-pair-based break rules (cacheable). - Returns BreakResult for rules that can be determined from GCB properties alone, - or None if complex lookback rules (GB9c, GB11) need to be checked. + Returns BreakResult for rules that can be determined from GCB properties alone, or None if + complex lookback rules (GB9c, GB11) need to be checked. """ # GB3: CR x LF if prev_gcb == GCB.CR and curr_gcb == GCB.LF: diff --git a/wcwidth/table_grapheme.py b/wcwidth/table_grapheme.py index 346590bf..9a52ce9e 100644 --- a/wcwidth/table_grapheme.py +++ b/wcwidth/table_grapheme.py @@ -1,8 +1,8 @@ """ Exports grapheme cluster break property tables for Unicode version 17.0.0. -This module provides lookup tables for Unicode grapheme cluster break properties -as defined in UAX #29: Unicode Text Segmentation. +This module provides lookup tables for Unicode grapheme cluster break properties as defined in UAX +#29: Unicode Text Segmentation. This code generated by wcwidth/bin/update-tables.py on 2026-01-14 17:07:21 UTC. """ diff --git a/wcwidth/textwrap.py b/wcwidth/textwrap.py index c4300019..f8a5f256 100644 --- a/wcwidth/textwrap.py +++ b/wcwidth/textwrap.py @@ -1,9 +1,8 @@ """ Sequence-aware text wrapping functions. -This module provides functions for wrapping text that may contain -terminal escape sequences, with proper handling of Unicode grapheme -clusters and character display widths. +This module provides functions for wrapping text that may contain terminal escape sequences, with +proper handling of Unicode grapheme clusters and character display widths. """ # std imports import textwrap @@ -76,10 +75,10 @@ def _split(self, text: str) -> List[str]: """ Sequence-aware variant of :meth:`textwrap.TextWrapper._split`. - This method ensures that terminal escape sequences don't interfere - with the text splitting logic, particularly for hyphen-based word - breaking. It builds a position mapping from stripped text to original - text, calls the parent's _split on stripped text, then maps chunks back. + This method ensures that terminal escape sequences don't interfere with the text splitting + logic, particularly for hyphen-based word breaking. It builds a position mapping from + stripped text to original text, calls the parent's _split on stripped text, then maps chunks + back. """ # Build a mapping from stripped text positions to original text positions. # We track where each character ENDS so that sequences between characters @@ -140,8 +139,8 @@ def _wrap_chunks(self, chunks: List[str]) -> List[str]: """ Wrap chunks into lines using sequence-aware width. - Override TextWrapper._wrap_chunks to use _width instead of len. - Follows stdlib's algorithm: greedily fill lines, handle long words. + Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm: + greedily fill lines, handle long words. """ if not chunks: return [] @@ -220,8 +219,8 @@ def _handle_long_word(self, reversed_chunks: List[str], """ Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`. - This method ensures that word boundaries are not broken mid-sequence, - and respects grapheme cluster boundaries when breaking long words. + This method ensures that word boundaries are not broken mid-sequence, and respects grapheme + cluster boundaries when breaking long words. """ if width < 1: space_left = 1 diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 277d4e0d..6bd5fe57 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -72,7 +72,9 @@ from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH from .table_ambiguous import AMBIGUOUS_EASTASIAN + _AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))] +# local from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL from .escape_sequences import (ZERO_WIDTH_PATTERN, CURSOR_LEFT_SEQUENCE, @@ -738,6 +740,7 @@ def clip(text, start, end, fillchar=' ', tabsize=8, ambiguous_width=1): >>> clip('a\\tb', 0, 10) # Tab expanded to spaces 'a b' """ + # local from .grapheme import iter_graphemes if start < 0: From 206d14a659f7549e5d3f5a1100d1b626ca0c745d Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 19 Jan 2026 20:09:28 -0500 Subject: [PATCH 3/6] linting, formating --- docs/intro.rst | 2 +- tests/test_textwrap.py | 2 +- tox.ini | 6 ++---- wcwidth/escape_sequences.py | 2 +- wcwidth/grapheme.py | 4 +++- wcwidth/textwrap.py | 2 +- wcwidth/wcwidth.py | 22 +++++++++++----------- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/intro.rst b/docs/intro.rst index bf8649f2..75d70f95 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -244,7 +244,7 @@ sequence-aware wrapping with full control over wrapping behavior. clip() ------ -Use `clip()`_ to extract a substring by the column positions displayed, preserving terminal sequences. +Use `clip()`_ to extract a substring by column positions, preserving terminal sequences. .. code-block:: python diff --git a/tests/test_textwrap.py b/tests/test_textwrap.py index 5e8ca461..1f62e29a 100644 --- a/tests/test_textwrap.py +++ b/tests/test_textwrap.py @@ -8,7 +8,7 @@ import pytest # local -from wcwidth import width, iter_sequences +from wcwidth import iter_sequences from wcwidth.textwrap import SequenceTextWrapper, wrap SGR_RED = '\x1b[31m' diff --git a/tox.ini b/tox.ini index 0a4bf6f7..cecf04a5 100644 --- a/tox.ini +++ b/tox.ini @@ -154,12 +154,10 @@ commands = codecov -e TOXENV [testenv:pydocstyle] basepython = python3.13 deps = pydocstyle - restructuredtext_lint doc8 pygments commands = pydocstyle --source --explain {toxinidir}/wcwidth - rst-lint README.rst - doc8 --ignore-path docs/_build --ignore D000 docs + doc8 --ignore-path docs/_build --ignore-path docs/requirements.txt --ignore D000 --max-line-length 100 docs [testenv:docformatter] basepython = python3.13 @@ -184,7 +182,7 @@ commands = isort --diff --check-only wcwidth tests bin [testenv:flake8_tests] basepython = python3.13 deps = flake8 -commands = flake8 --ignore=W504,F401 tests/ +commands = flake8 --ignore=E501,W504,F401 tests/ [testenv:pylint_tests] basepython = python3.13 diff --git a/wcwidth/escape_sequences.py b/wcwidth/escape_sequences.py index 7d7dc36b..ec51bd3b 100644 --- a/wcwidth/escape_sequences.py +++ b/wcwidth/escape_sequences.py @@ -1,4 +1,4 @@ -""" +r""" Terminal escape sequence patterns. This module provides regex patterns for matching terminal escape sequences. All patterns match diff --git a/wcwidth/grapheme.py b/wcwidth/grapheme.py index 6b3296d3..fed1b018 100644 --- a/wcwidth/grapheme.py +++ b/wcwidth/grapheme.py @@ -32,6 +32,7 @@ class GCB(IntEnum): """Grapheme Cluster Break property values.""" + OTHER = 0 CR = 1 LF = 2 @@ -109,6 +110,7 @@ def _is_incb_extend(ucs: int) -> bool: class BreakResult(NamedTuple): """Result of grapheme cluster break decision.""" + should_break: bool ri_count: int @@ -234,7 +236,7 @@ def iter_graphemes( start: int = 0, end: Optional[int] = None, ) -> Iterator[str]: - """ + r""" Iterate over grapheme clusters in a Unicode string. Grapheme clusters are "user-perceived characters" - what a user would diff --git a/wcwidth/textwrap.py b/wcwidth/textwrap.py index f8a5f256..50998ae1 100644 --- a/wcwidth/textwrap.py +++ b/wcwidth/textwrap.py @@ -322,7 +322,7 @@ def wrap(text: str, width: int = 70, *, subsequent_indent: str = '', break_long_words: bool = True, break_on_hyphens: bool = True) -> List[str]: - """ + r""" Wrap text to fit within given width, returning a list of wrapped lines. Like :func:`textwrap.wrap`, but measures width in display cells rather than diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 6bd5fe57..59126f66 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -71,16 +71,16 @@ from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH -from .table_ambiguous import AMBIGUOUS_EASTASIAN - -_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))] -# local from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL +from .table_ambiguous import AMBIGUOUS_EASTASIAN from .escape_sequences import (ZERO_WIDTH_PATTERN, CURSOR_LEFT_SEQUENCE, CURSOR_RIGHT_SEQUENCE, INDETERMINATE_EFFECT_SEQUENCE) from .unicode_versions import list_versions +from .grapheme import iter_graphemes + +_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))] # Translation table to strip C0/C1 control characters for fast 'ignore' mode. _CONTROL_CHAR_TABLE = str.maketrans('', '', ( @@ -349,7 +349,7 @@ def _wcmatch_version(given_version): def iter_sequences(text): - """ + r""" Iterate through text, yielding segments with sequence identification. This generator yields tuples of ``(segment, is_sequence)`` for each part @@ -414,7 +414,7 @@ def _width_ignored_codes(text, ambiguous_width=1): def width(text, control_codes='parse', tabsize=8, ambiguous_width=1): - """ + r""" Return printable width of text containing many kinds of control codes and sequences. Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal @@ -566,7 +566,7 @@ def width(text, control_codes='parse', tabsize=8, ambiguous_width=1): def ljust(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_width=1): - """ + r""" Return text left-justified in a string of given display width. :param str text: String to justify, may contain terminal sequences. @@ -601,7 +601,7 @@ def ljust(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_width def rjust(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_width=1): - """ + r""" Return text right-justified in a string of given display width. :param str text: String to justify, may contain terminal sequences. @@ -636,7 +636,7 @@ def rjust(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_width def center(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_width=1): - """ + r""" Return text centered in a string of given display width. :param str text: String to center, may contain terminal sequences. @@ -676,7 +676,7 @@ def center(text, dest_width, fillchar=' ', control_codes='parse', ambiguous_widt def strip_sequences(text): - """ + r""" Return text with all terminal escape sequences removed. This is a simple wrapper around :func:`iter_sequences` that concatenates @@ -701,7 +701,7 @@ def strip_sequences(text): def clip(text, start, end, fillchar=' ', tabsize=8, ambiguous_width=1): - """ + r""" Clip text to display columns ``(start, end)`` while preserving all terminal sequences. This function extracts a substring based on visible column positions rather than From 3123e47461ec8744ff070b86383d8c9b7e731ed2 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 19 Jan 2026 20:40:30 -0500 Subject: [PATCH 4/6] linting --- tests/test_emojis.py | 24 ++++++++++++------------ tests/test_width.py | 2 +- tox.ini | 5 +++-- wcwidth/textwrap.py | 5 +++-- wcwidth/wcwidth.py | 8 +++----- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/test_emojis.py b/tests/test_emojis.py index 5072027b..20245049 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -1,9 +1,13 @@ +"""Tests for emoji width measurement and ZWJ sequences.""" # std imports import os # 3rd party import pytest +# local +import wcwidth + # some tests cannot be done on some builds of python, where the internal # unicode structure is limited to 0x10000 for memory conservation, # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" @@ -13,9 +17,6 @@ except ValueError: NARROW_ONLY = True -# local -import wcwidth - def make_sequence_from_line(line): # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) @@ -28,7 +29,7 @@ def emoji_zwj_sequence(): phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 "\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - "\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + "\U0001f4bb") # Fused, Category So, East Asian Width property 'W' -- PERSONAL COMPUTER # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 2) expect_length_phrase = 2 @@ -132,11 +133,10 @@ def test_longer_emoji_zwj_sequence(): def read_sequences_from_file(filename): - fp = open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8') - lines = [line.strip() - for line in fp.readlines() - if not line.startswith('#') and line.strip()] - fp.close() + with open(os.path.join(os.path.dirname(__file__), filename), encoding='utf-8') as fp: + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] sequences = [make_sequence_from_line(line) for line in lines] return lines, sequences @@ -162,7 +162,7 @@ def test_recommended_emoji_zwj_sequences(): }) # verify - assert errors == [] + assert not errors assert num >= 1468 @@ -188,7 +188,7 @@ def test_recommended_variation_16_sequences(): }) # verify - assert errors == [] + assert not errors assert num >= 742 @@ -210,7 +210,7 @@ def test_unicode_9_vs16(): def test_unicode_8_vs16(): - """Verify that VS-16 has no effect on unicode_version 8.0 and earler.""" + """Verify that VS-16 has no effect on unicode_version 8.0 and earlier.""" phrase = ("\u2640" # FEMALE SIGN "\uFE0F") # VARIATION SELECTOR-16 diff --git a/tests/test_width.py b/tests/test_width.py index f16a78c6..38477092 100644 --- a/tests/test_width.py +++ b/tests/test_width.py @@ -4,6 +4,7 @@ # local import wcwidth +from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN BASIC_WIDTH_CASES = [ ('', 0, 'empty'), @@ -253,7 +254,6 @@ def test_cursor_right_unparameterized(): def test_indeterminate_caps_covered_by_term_seq_pattern(seq, cap_name): """Verify all INDETERMINATE_CAPS sequences are matched by ZERO_WIDTH_PATTERN.""" # local - from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN assert ZERO_WIDTH_PATTERN.match(seq) assert wcwidth.width(seq) == 0 diff --git a/tox.ini b/tox.ini index cecf04a5..24632b29 100644 --- a/tox.ini +++ b/tox.ini @@ -189,7 +189,7 @@ basepython = python3.13 deps = pytest pylint commands = pylint --rcfile={toxinidir}/.pylintrc \ - --disable=invalid-name,import-outside-toplevel,protected-access,unused-argument \ + --disable=protected-access,unused-argument,missing-function-docstring,line-too-long \ tests [testenv:mypy] @@ -205,7 +205,8 @@ warn_unused_ignores = true [testenv:codespell] basepython = python3.13 deps = codespell -commands = codespell --skip="*.pyc,htmlcov,_build,build,*.egg-info,.tox" \ +commands = codespell --skip="*.pyc,htmlcov,_build,build,*.egg-info,.tox,data,*.txt,*.csv,*.ods,table_*.py,docs/specs.rst" \ + --ignore-words-list="thirdparty,claus,oclock,womens,aprox" \ --uri-ignore-words-list '*' \ --summary --count diff --git a/wcwidth/textwrap.py b/wcwidth/textwrap.py index 50998ae1..95b4c388 100644 --- a/wcwidth/textwrap.py +++ b/wcwidth/textwrap.py @@ -71,7 +71,7 @@ def _extract_sequences(self, text: str) -> str: result.append(segment) return ''.join(result) - def _split(self, text: str) -> List[str]: + def _split(self, text: str) -> List[str]: # pylint: disable=too-many-locals """ Sequence-aware variant of :meth:`textwrap.TextWrapper._split`. @@ -104,6 +104,7 @@ def _split(self, text: str) -> List[str]: char_end.append(original_pos) # Use parent's _split on the stripped text + # pylint: disable-next=protected-access stripped_chunks = textwrap.TextWrapper._split(self, stripped_text) # Handle text that contains only sequences (no visible characters). @@ -135,7 +136,7 @@ def _split(self, text: str) -> List[str]: return result - def _wrap_chunks(self, chunks: List[str]) -> List[str]: + def _wrap_chunks(self, chunks: List[str]) -> List[str]: # pylint: disable=too-many-branches """ Wrap chunks into lines using sequence-aware width. diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index 59126f66..451cb9ef 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -68,6 +68,7 @@ # local from .bisearch import bisearch as _bisearch +from .grapheme import iter_graphemes from .table_vs16 import VS16_NARROW_TO_WIDE from .table_wide import WIDE_EASTASIAN from .table_zero import ZERO_WIDTH @@ -78,7 +79,6 @@ CURSOR_RIGHT_SEQUENCE, INDETERMINATE_EFFECT_SEQUENCE) from .unicode_versions import list_versions -from .grapheme import iter_graphemes _AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))] @@ -97,7 +97,7 @@ def wcwidth(wc, unicode_version='auto', ambiguous_width=1): :param str wc: A single Unicode character. :param str unicode_version: A Unicode version number, such as - ``'6.0.0'``. A list of version levels suported by wcwidth + ``'6.0.0'``. A list of version levels supported by wcwidth is returned by :func:`list_versions`. Any version string may be specified without error -- the nearest @@ -740,9 +740,7 @@ def clip(text, start, end, fillchar=' ', tabsize=8, ambiguous_width=1): >>> clip('a\\tb', 0, 10) # Tab expanded to spaces 'a b' """ - # local - from .grapheme import iter_graphemes - + # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-positional-arguments,consider-using-max-builtin if start < 0: start = 0 if end <= start: From 4cb0c08325e05deec5ba162d1e5ef31ff7c2527d Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 19 Jan 2026 20:44:30 -0500 Subject: [PATCH 5/6] link to detect_ambiguous_width() --- docs/intro.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/intro.rst b/docs/intro.rst index 75d70f95..6eb0ba8e 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -178,7 +178,7 @@ Use `iter_graphemes()`_ to iterate over *grapheme clusters* of a string. ['o', 'k', '👨\u200d👩\u200d👧'] A grapheme cluster is what a user perceives as a single character, even if it is composed of -multiple Unicode codepoints. This function implements Unicode Standard `Annex #29`_ grapheme cluster +multiple Unicode codepoints. This function implements `Unicode Standard Annex #29`_ grapheme cluster boundary rules. ljust() @@ -639,6 +639,8 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip .. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences .. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences +.. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/ +.. _`Terminal.detect_ambiguous_width()`: https://blessed.readthedocs.io/en/latest/api/terminal.html#blessed.terminal.Terminal.detect_ambiguous_width .. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi :alt: Downloads :target: https://pypi.org/project/wcwidth/ From af7e754b2430a79617310e05b5b81d16e7e5b164 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Tue, 20 Jan 2026 10:43:06 -0500 Subject: [PATCH 6/6] small changes --- .gitignore | 2 ++ tox.ini | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e8bf2cc4..c4be467e 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ htmlcov data .DS_Store .idea +.python-version + diff --git a/tox.ini b/tox.ini index 24632b29..fef4af1b 100644 --- a/tox.ini +++ b/tox.ini @@ -224,7 +224,7 @@ basepython = python3.13 deps = {[testenv:flake8]deps} {[testenv:isort_check]deps} {[testenv:pydocstyle]deps} - {[testenv:pylint]deps} + {[testenv:pylint_tests]deps} {[testenv:codespell]deps} commands = {[testenv:flake8]commands} {[testenv:flake8_tests]commands}