diff --git a/docs/vars.rst b/docs/vars.rst index b186bfa63..7759418ad 100644 --- a/docs/vars.rst +++ b/docs/vars.rst @@ -275,6 +275,10 @@ For the PyMuPDF programmer, some combination (using Python's `|` operator, or si 32768 -- Request collecting text **decoration** properties. This includes text underlining and strikeout. In contrast to public awareness, these are not font properties, but are drawn separately as vector graphics or annotations on top of the text. In addition, the flag bit will also cause MuPDF to detect "fake bold" text. In many cases, Document creators **simulate bold** text by printing the same text multiple times with slight offsets. If this flag is set, such text will be marked as bold in the resulting text spans. +.. py:data:: TEXT_LAZY_VECTORS + + 1048576 -- Delay vector blocks in the extraction slightly to avoid breaking what would otherwise be continuous lines of text. + The following constants represent the default combinations of the above for text extraction and searching: .. py:data:: TEXTFLAGS_TEXT diff --git a/src/__init__.py b/src/__init__.py index 7824eb522..874e59cc4 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -17597,6 +17597,8 @@ def width(self): TEXT_COLLECT_VECTORS = mupdf.FZ_STEXT_COLLECT_VECTORS TEXT_IGNORE_ACTUALTEXT = mupdf.FZ_STEXT_IGNORE_ACTUALTEXT TEXT_SEGMENT = mupdf.FZ_STEXT_SEGMENT +if mupdf_version_tuple >= (1, 27, 1): + TEXT_LAZY_VECTORS = mupdf.FZ_STEXT_LAZY_VECTORS if mupdf_version_tuple >= (1, 26): TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK diff --git a/tests/test_2548.py b/tests/test_2548.py index 4275a1444..24261fc28 100644 --- a/tests/test_2548.py +++ b/tests/test_2548.py @@ -32,7 +32,7 @@ def test_2548(): # This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier # versions with updated MuPDF also fix the bug. rebased = hasattr(pymupdf, 'mupdf') - if pymupdf.mupdf_version_tuple >= (1, 28): + if pymupdf.mupdf_version_tuple >= (1, 27, 1): expected = '' elif pymupdf.mupdf_version_tuple >= (1, 27): expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing' diff --git a/tests/test_textextract.py b/tests/test_textextract.py index 3553715ee..b5ae3e901 100644 --- a/tests/test_textextract.py +++ b/tests/test_textextract.py @@ -387,7 +387,7 @@ def get_all_page_from_pdf(document, last_page=None): assert texts1 == texts0 wt = pymupdf.TOOLS.mupdf_warnings() - if pymupdf.mupdf_version_tuple >= (1, 28): + if pymupdf.mupdf_version_tuple >= (1, 27, 1): expected = '' assert wt == expected elif pymupdf.mupdf_version_tuple >= (1, 27):