Skip to content
Draft
16 changes: 6 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def build():
)
log( f'build(): mupdf_build_dir={mupdf_build_dir!r}')

# Build rebased `extra` module.
# Build `extra` module.
#
if 'p' in PYMUPDF_SETUP_FLAVOUR:
path_so_leaf = _build_extension(
Expand Down Expand Up @@ -687,9 +687,8 @@ def add(flavour, from_, to_):
# Add Windows .lib files.
mupdf_build_dir2 = _windows_lib_directory(mupdf_local, build_type)
add('d', f'{mupdf_build_dir2}/mupdfcpp{wp.cpu.windows_suffix}.lib', f'{to_dir_d}/lib/')
if mupdf_version_tuple >= (1, 26):
# MuPDF-1.25+ language bindings build also builds libmuthreads.
add('d', f'{mupdf_build_dir2}/libmuthreads.lib', f'{to_dir_d}/lib/')
# MuPDF-1.25+ language bindings build also builds libmuthreads.
add('d', f'{mupdf_build_dir2}/libmuthreads.lib', f'{to_dir_d}/lib/')
elif darwin:
add('p', f'{mupdf_build_dir}/_mupdf.so', to_dir)
add('b', f'{mupdf_build_dir}/libmupdfcpp.so', to_dir)
Expand Down Expand Up @@ -983,10 +982,8 @@ def build_mupdf_unix(
# a system limit, not the actual limit of the current shell, and there
# doesn't seem to be a way to find the current shell's limit.
#
build_prefix = f'PyMuPDF-'
if mupdf_version_tuple >= (1, 26):
# Avoid link command length problems seen on musllinux.
build_prefix = ''
# Avoid link command length problems seen on musllinux.
build_prefix = ''
if pyodide:
build_prefix += 'pyodide-'
else:
Expand Down Expand Up @@ -1094,8 +1091,7 @@ def _build_extension( mupdf_local, mupdf_build_dir, build_type, g_py_limited_api
f'{mupdf_local}/include',
)

# Build rebased extension module.
log('Building PyMuPDF rebased.')
log('Building PyMuPDF extension.')
compile_extra_cpp = ''
if darwin:
# Avoids `error: cannot pass object of non-POD type
Expand Down
50 changes: 19 additions & 31 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6784,12 +6784,9 @@ def select(self, pyliste):
pdf = _as_pdf_document(self)
# create page sub-pdf via pdf_rearrange_pages2().
#
if mupdf_version_tuple >= (1, 25, 3):
# We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
# which, for example, breaks test_3705.
mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)
else:
mupdf.pdf_rearrange_pages2(pdf, pyliste)
# We use PDF_CLEAN_STRUCTURE_KEEP otherwise we lose structure tree
# which, for example, breaks test_3705.
mupdf.pdf_rearrange_pages2(pdf, pyliste, mupdf.PDF_CLEAN_STRUCTURE_KEEP)

# remove any existing pages with their kids
self._reset_page_refs()
Expand Down Expand Up @@ -17600,14 +17597,13 @@ def width(self):
if mupdf_version_tuple >= (1, 27, 1):
TEXT_LAZY_VECTORS = mupdf.FZ_STEXT_LAZY_VECTORS

if mupdf_version_tuple >= (1, 26):
TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS
TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK
TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT
TEXT_COLLECT_STYLES = mupdf.FZ_STEXT_COLLECT_STYLES
TEXT_USE_GID_FOR_UNKNOWN_UNICODE = mupdf.FZ_STEXT_USE_GID_FOR_UNKNOWN_UNICODE
TEXT_CLIP_RECT = mupdf.FZ_STEXT_CLIP_RECT
TEXT_ACCURATE_ASCENDERS = mupdf.FZ_STEXT_ACCURATE_ASCENDERS
TEXT_ACCURATE_SIDE_BEARINGS = mupdf.FZ_STEXT_ACCURATE_SIDE_BEARINGS

# 2025-05-07: Non-standard names preserved for backwards compatibility.
TEXT_STEXT_SEGMENT = TEXT_SEGMENT
Expand Down Expand Up @@ -20532,8 +20528,7 @@ def __init__(self, rhs=None):
if rhs:
self.size = rhs.size
self.flags = rhs.flags
if mupdf_version_tuple >= (1, 25, 2):
self.char_flags = rhs.char_flags
self.char_flags = rhs.char_flags
self.font = rhs.font
self.argb = rhs.argb
self.asc = rhs.asc
Expand All @@ -20542,17 +20537,15 @@ def __init__(self, rhs=None):
else:
self.size = -1
self.flags = -1
if mupdf_version_tuple >= (1, 25, 2):
self.char_flags = -1
self.char_flags = -1
self.font = ''
self.argb = -1
self.asc = 0
self.desc = 0
self.bidi = 0
def __str__(self):
ret = f'{self.size} {self.flags}'
if mupdf_version_tuple >= (1, 25, 2):
ret += f' {self.char_flags}'
ret += f' {self.char_flags}'
ret += f' {self.font} {self.color} {self.asc} {self.desc}'
return ret

Expand Down Expand Up @@ -20580,9 +20573,8 @@ def __str__(self):
origin = mupdf.FzPoint(ch.m_internal.origin)
style.size = ch.m_internal.size
style.flags = flags
if mupdf_version_tuple >= (1, 25, 2):
# FZ_STEXT_SYNTHETIC is per-char, not per-span.
style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
# FZ_STEXT_SYNTHETIC is per-char, not per-span.
style.char_flags = ch.m_internal.flags & ~mupdf.FZ_STEXT_SYNTHETIC
style.font = JM_font_name(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
style.argb = ch.m_internal.argb
style.asc = JM_font_ascender(mupdf.FzFont(mupdf.ll_fz_keep_font(ch.m_internal.font)))
Expand All @@ -20591,9 +20583,7 @@ def __str__(self):

if (style.size != old_style.size
or style.flags != old_style.flags
or (mupdf_version_tuple >= (1, 25, 2)
and (style.char_flags != old_style.char_flags)
)
or (style.char_flags != old_style.char_flags)
or style.argb != old_style.argb
or style.font != old_style.font
or style.bidi != old_style.bidi
Expand Down Expand Up @@ -20625,12 +20615,10 @@ def __str__(self):
span[dictkey_size] = style.size
span[dictkey_flags] = style.flags
span[dictkey_bidi] = style.bidi
if mupdf_version_tuple >= (1, 25, 2):
span[dictkey_char_flags] = style.char_flags
span[dictkey_char_flags] = style.char_flags
span[dictkey_font] = JM_EscapeStrFromStr(style.font)
span[dictkey_color] = style.argb & 0xffffff
if mupdf_version_tuple >= (1, 25, 0):
span['alpha'] = style.argb >> 24
span['alpha'] = style.argb >> 24
span["ascender"] = asc
span["descender"] = desc

Expand Down Expand Up @@ -25896,6 +25884,6 @@ def deprecated_function( *args, **kwargs):

__version__ = VersionBind
__doc__ = (
f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library (rebased implementation).\n'
f'PyMuPDF {VersionBind}: Python bindings for the MuPDF {VersionFitz} library.\n'
f'Python {sys.version_info[0]}.{sys.version_info[1]} running on {sys.platform} ({64 if sys.maxsize > 2**32 else 32}-bit).\n'
)
18 changes: 2 additions & 16 deletions src/extra.i
Original file line number Diff line number Diff line change
Expand Up @@ -1337,9 +1337,9 @@ static PyObject *lll_JM_get_annot_xref_list(pdf_obj *page_obj)
//------------------------------------------------------------------------
static PyObject* JM_get_annot_xref_list(const mupdf::PdfObj& page_obj)
{
PyObject* names = PyList_New(0);
if (!page_obj.m_internal)
{
PyObject* names = PyList_New(0);
return names;
}
return lll_JM_get_annot_xref_list( page_obj.m_internal);
Expand Down Expand Up @@ -3081,7 +3081,6 @@ mupdf::FzRect JM_make_spanlist(
float size = -1;
unsigned flags = 0;

#if MUPDF_VERSION_GE(1, 25, 2)
/* From mupdf:include/mupdf/fitz/structured-text.h:fz_stext_char::flags, which
uses anonymous enum values:
FZ_STEXT_STRIKEOUT = 1,
Expand All @@ -3092,7 +3091,6 @@ mupdf::FzRect JM_make_spanlist(
FZ_STEXT_CLIPPED = 64
*/
unsigned char_flags = 0;
#endif

const char *font = "";
unsigned argb = 0;
Expand Down Expand Up @@ -3121,25 +3119,17 @@ mupdf::FzRect JM_make_spanlist(
fz_point origin = ch.m_internal->origin;
style.size = ch.m_internal->size;
style.flags = flags;
#if MUPDF_VERSION_GE(1, 25, 2)
/* FZ_STEXT_SYNTHETIC is per-char, not per-span. */
style.char_flags = ch.m_internal->flags & ~FZ_STEXT_SYNTHETIC;
#endif
style.font = JM_font_name(ch.m_internal->font);
#if MUPDF_VERSION_GE(1, 25, 0)
style.argb = ch.m_internal->argb;
#else
style.argb = ch.m_internal->color;
#endif
style.argb = ch.m_internal->argb;
style.asc = JM_font_ascender(ch.m_internal->font);
style.desc = JM_font_descender(ch.m_internal->font);

if (0
|| style.size != old_style.size
|| style.flags != old_style.flags
#if MUPDF_VERSION_GE(1, 25, 2)
|| style.char_flags != old_style.char_flags
#endif
|| style.argb != old_style.argb
|| strcmp(style.font, old_style.font) != 0
|| style.bidi != old_style.bidi
Expand Down Expand Up @@ -3179,14 +3169,10 @@ mupdf::FzRect JM_make_spanlist(
DICT_SETITEM_DROP(span, dictkey_size, Py_BuildValue("f", style.size));
DICT_SETITEM_DROP(span, dictkey_flags, Py_BuildValue("I", style.flags));
DICT_SETITEM_DROP(span, dictkey_bidi, Py_BuildValue("I", style.bidi));
#if MUPDF_VERSION_GE(1, 25, 2)
DICT_SETITEM_DROP(span, dictkey_char_flags, Py_BuildValue("I", style.char_flags));
#endif
DICT_SETITEM_DROP(span, dictkey_font, JM_EscapeStrFromStr(style.font));
DICT_SETITEM_DROP(span, dictkey_color, Py_BuildValue("I", style.argb & 0xffffff));
#if MUPDF_VERSION_GE(1, 25, 0)
DICT_SETITEMSTR_DROP(span, "alpha", Py_BuildValue("I", style.argb >> 24));
#endif
DICT_SETITEMSTR_DROP(span, "ascender", Py_BuildValue("f", asc));
DICT_SETITEMSTR_DROP(span, "descender", Py_BuildValue("f", desc));

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def install_required_packages():
# We can't run child processes, so rely on required test packages
# already being installed, e.g. in our wheel's <requires_dist>.
return
packages = 'pytest fontTools pymupdf-fonts flake8 pylint codespell'
packages = 'pytest fontTools pymupdf-fonts flake8 pylint codespell mypy'
if platform.system() == 'Windows' and int.bit_length(sys.maxsize+1) == 32:
# No pillow wheel available, and doesn't build easily.
pass
Expand Down
Binary file added tests/resources/test_4751.pdf
Binary file not shown.
11 changes: 2 additions & 9 deletions tests/test_2548.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,20 @@ def test_2548():
_ = page.get_text()
except Exception as ee:
print(f'test_2548: {ee=}')
if hasattr(pymupdf, 'mupdf'):
# Rebased.
expected = "RuntimeError('code=2: cycle in structure tree')"
else:
# Classic.
expected = "RuntimeError('cycle in structure tree')"
expected = "RuntimeError('code=2: cycle in structure tree')"
assert repr(ee) == expected, f'Expected {expected=} but got {repr(ee)=}.'
e = True
wt = pymupdf.TOOLS.mupdf_warnings()
print(f'test_2548(): {wt=}')

# This checks that PyMuPDF 1.23.7 fixes this bug, and also that earlier
# versions with updated MuPDF also fix the bug.
rebased = hasattr(pymupdf, 'mupdf')
if pymupdf.mupdf_version_tuple >= (1, 27, 1):
expected = ''
elif pymupdf.mupdf_version_tuple >= (1, 27):
expected = 'format error: No common ancestor in structure tree\nstructure tree broken, assume tree is missing'
expected = '\n'.join([expected] * 5)
else:
expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing'
if rebased:
assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n'
assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n'
assert not e
7 changes: 1 addition & 6 deletions tests/test_2904.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,7 @@ def test_2904():
print(f'{pymupdf.mupdf_version_tuple=}: {page_id=} {i=} {e=} {img=}:')
if page_id == 5 and i==3:
assert e
if hasattr(pymupdf, 'mupdf'):
# rebased.
assert str(e) == 'code=8: Failed to read JPX header'
else:
# classic
assert str(e) == 'Failed to read JPX header'
assert str(e) == 'code=8: Failed to read JPX header'
else:
assert not e

Expand Down
23 changes: 8 additions & 15 deletions tests/test_annots.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,16 +308,13 @@ def test_2270():
print(f'{text=}')
print(f'{getattr(textpage, "parent")=}')

if pymupdf.mupdf_version_tuple >= (1, 26):
# Check Annotation.get_textpage()'s <clip> arg.
clip = textBox.rect
clip.x1 = clip.x0 + (clip.x1 - clip.x0) / 3
textpage2 = textBox.get_textpage(clip=clip)
text = textpage2.extractText()
print(f'With {clip=}: {text=}')
assert text == 'ab\n'
else:
assert not hasattr(pymupdf.mupdf, 'FZ_STEXT_CLIP_RECT')
# Check Annotation.get_textpage()'s <clip> arg.
clip = textBox.rect
clip.x1 = clip.x0 + (clip.x1 - clip.x0) / 3
textpage2 = textBox.get_textpage(clip=clip)
text = textpage2.extractText()
print(f'With {clip=}: {text=}')
assert text == 'ab\n'


def test_2934_add_redact_annot():
Expand Down Expand Up @@ -492,11 +489,7 @@ def test_4047():

def test_4079():
path = os.path.normpath(f'{__file__}/../../tests/resources/test_4079.pdf')
if pymupdf.mupdf_version_tuple >= (1, 25, 5):
path_after = os.path.normpath(f'{__file__}/../../tests/resources/test_4079_after.pdf')
else:
# 2024-11-27 Expect incorrect behaviour.
path_after = os.path.normpath(f'{__file__}/../../tests/resources/test_4079_after_1.25.pdf')
path_after = os.path.normpath(f'{__file__}/../../tests/resources/test_4079_after.pdf')

path_out = os.path.normpath(f'{__file__}/../../tests/test_4079_out')
with pymupdf.open(path_after) as document_after:
Expand Down
3 changes: 0 additions & 3 deletions tests/test_barcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@


def test_barcode():
if pymupdf.mupdf_version_tuple < (1, 26):
print(f'Not testing barcode because {pymupdf.mupdf_version=} < 1.26')
return
path = os.path.normpath(f'{__file__}/../../tests/test_barcode_out.pdf')

url = 'http://artifex.com'
Expand Down
6 changes: 1 addition & 5 deletions tests/test_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@

def test_codespell():
'''
Check rebased Python code with codespell.
Check Python code with codespell.
'''
if os.environ.get('PYODIDE_ROOT'):
print('test_codespell(): not running on Pyodide - cannot run child processes.')
return

if not hasattr(pymupdf, 'mupdf'):
print('Not running codespell with classic implementation.')
return

if platform.system() == 'Windows':
# Git commands seem to fail on Github Windows runners.
print(f'test_codespell(): Not running on Windows')
Expand Down
5 changes: 1 addition & 4 deletions tests/test_flake8.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,12 @@

def test_flake8():
'''
Check rebased Python code with flake8.
Check Python code with flake8.
'''
if os.environ.get('PYODIDE_ROOT'):
print('test_flake8(): not running on Pyodide - cannot run child processes.')
return

if not hasattr(pymupdf, 'mupdf'):
print(f'Not running flake8 with classic implementation.')
return
ignores = (
'E123', # closing bracket does not match indentation of opening bracket's line
'E124', # closing bracket does not match visual indentation
Expand Down
11 changes: 2 additions & 9 deletions tests/test_font.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ def test_font1():
# Also check we can get font's bbox.
bbox1 = font.bbox
print(f'{bbox1=}')
if hasattr(pymupdf, 'mupdf'):
bbox2 = font.this.fz_font_bbox()
assert bbox2 == bbox1
bbox2 = font.this.fz_font_bbox()
assert bbox2 == bbox1


def test_font2():
Expand Down Expand Up @@ -105,9 +104,6 @@ def test_fontarchive():
]

def test_load_system_font():
if not hasattr(pymupdf, 'mupdf'):
print(f'test_load_system_font(): Not running on classic.')
return
trace = list()
def font_f(name, bold, italic, needs_exact_metrics):
trace.append((name, bold, italic, needs_exact_metrics))
Expand All @@ -130,9 +126,6 @@ def f_fallback(script, language, serif, bold, italic):


def test_mupdf_subset_fonts2():
if not hasattr(pymupdf, 'mupdf'):
print('Not running on rebased.')
return
path = os.path.abspath(f'{__file__}/../../tests/resources/2.pdf')
with pymupdf.open(path) as doc:
n = len(doc)
Expand Down
Loading