diff --git a/.gitignore b/.gitignore
index 64f3f48..fd0d2e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,6 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-.idea/
\ No newline at end of file
+.idea/
+/.vscode/settings.json
+/tests/testresources/pdfs/private/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..8bd9bac
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,8 @@
+## [unreleased]
+
+### 🚀 Features
+
+- Added possibility to use subfields in show utils and added padding option to the YoloOnnxDetector
+- Added LineOrientation detector model to the TesseractRecognizer
+- Added FaceDetector, SignatureDetector
+- Added PdfAssembler
diff --git a/cliff.toml b/cliff.toml
new file mode 100644
index 0000000..b278c49
--- /dev/null
+++ b/cliff.toml
@@ -0,0 +1,92 @@
+# git-cliff ~ configuration file
+# https://git-cliff.org/docs/configuration
+
+
+[changelog]
+# A Tera template to be rendered for each release in the changelog.
+# See https://keats.github.io/tera/docs/#introduction
+body = """
+{% if version %}\
+    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
+{% else %}\
+    ## [unreleased]
+{% endif %}\
+{% for group, commits in commits | group_by(attribute="group") %}
+    ### {{ group | striptags | trim | upper_first }}
+    {% for commit in commits %}
+        - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
+            {% if commit.breaking %}[**breaking**] {% endif %}\
+            {{ commit.message | upper_first }}\
+    {% endfor %}
+{% endfor %}
+"""
+# Remove leading and trailing whitespaces from the changelog's body.
+trim = true
+# Render body even when there are no releases to process.
+render_always = true
+# An array of regex based postprocessors to modify the changelog.
+postprocessors = [
+    # Replace the placeholder <REPO> with a URL.
+    #{ pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" },
+]
+# render body even when there are no releases to process
+# render_always = true
+# output file path
+# output = "test.md"
+
+[git]
+# Parse commits according to the conventional commits specification.
+# See https://www.conventionalcommits.org
+conventional_commits = true
+# Exclude commits that do not match the conventional commits specification.
+filter_unconventional = true
+# Require all commits to be conventional.
+# Takes precedence over filter_unconventional.
+require_conventional = false
+# Split commits on newlines, treating each line as an individual commit.
+split_commits = false
+# An array of regex based parsers to modify commit messages prior to further processing.
+commit_preprocessors = [
+    # Replace issue numbers with link templates to be updated in `changelog.postprocessors`.
+    #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
+    # Check spelling of the commit message using https://github.com/crate-ci/typos.
+    # If the spelling is incorrect, it will be fixed automatically.
+    #{ pattern = '.*', replace_command = 'typos --write-changes -' },
+]
+# Prevent commits that are breaking from being excluded by commit parsers.
+protect_breaking_commits = false
+# An array of regex based parsers for extracting data from the commit message.
+# Assigns commits to groups.
+# Optionally sets the commit's scope and can decide to exclude commits from further processing.
+commit_parsers = [
+    { message = "^feat", group = "<!-- 0 -->🚀 Features" },
+    { message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
+    { message = "^doc", group = "<!-- 3 -->📚 Documentation" },
+    { message = "^perf", group = "<!-- 4 -->⚡ Performance" },
+    { message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
+    { message = "^style", group = "<!-- 5 -->🎨 Styling" },
+    { message = "^test", group = "<!-- 6 -->🧪 Testing" },
+    { message = "^chore\\(release\\): prepare for", skip = true },
+    { message = "^chore\\(deps.*\\)", skip = true },
+    { message = "^chore\\(pr\\)", skip = true },
+    { message = "^chore\\(pull\\)", skip = true },
+    { message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
+    { body = ".*security", group = "<!-- 8 -->🛡️ Security" },
+    { message = "^revert", group = "<!-- 9 -->◀️ Revert" },
+    { message = ".*", group = "<!-- 10 -->💼 Other" },
+]
+# Exclude commits that are not matched by any commit parser.
+filter_commits = false
+# An array of link parsers for extracting external references, and turning them into URLs, using regex.
+link_parsers = []
+# Include only the tags that belong to the current branch.
+use_branch_tags = false
+# Order releases topologically instead of chronologically.
+topo_order = false
+# Order releases topologically instead of chronologically.
+topo_order_commits = true
+# Order of commits in each group/release within the changelog.
+# Allowed values: newest, oldest
+sort_commits = "oldest"
+# Process submodules commits
+recurse_submodules = false
diff --git a/poetry.lock b/poetry.lock
index 4ac458f..6a0b811 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -526,7 +526,7 @@ files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\""}
+markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\""}
 
 [[package]]
 name = "coloredlogs"
@@ -1140,6 +1140,26 @@ tqdm = "*"
 [package.extras]
 test = ["build", "mypy", "pytest", "pytest-xdist", "ruff", "twine", "types-requests", "types-setuptools"]
 
+[[package]]
+name = "git-cliff"
+version = "2.10.1"
+description = ""
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+    {file = "git_cliff-2.10.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:49e5808150d526ad6b728313b77636fe962c7ee6729409f2d42aa6cbe323506b"},
+    {file = "git_cliff-2.10.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:973962f2486d33ddbb624aa2d2e2d370e03721a164c471601736afce75e0935d"},
+    {file = "git_cliff-2.10.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704ff6cea09fd90dd524a8235143897a2d1c8ef9fb045070275f4cf6b0040616"},
+    {file = "git_cliff-2.10.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d72c0bf314014f30ec4287a2dee9f1a14f4c67f73b75991684391e2a98d9b9b"},
+    {file = "git_cliff-2.10.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:49157fdb81384d4282f918a5a533418c943aae1cfc3cde572adb95cfabcb55a8"},
+    {file = "git_cliff-2.10.1-py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36e3537450fb2c295fa8b1b6400c72a166022241de294c24c7bee3ae43284a78"},
+    {file = "git_cliff-2.10.1-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:56f54211a51a8ce3208f3871ee09899607ec539d62aba900b23a53b0be442591"},
+    {file = "git_cliff-2.10.1-py3-none-win32.whl", hash = "sha256:c121069d52136889c7e2f8a93ed878f5fdffe5707f76a0badc098c5b6b71fc97"},
+    {file = "git_cliff-2.10.1-py3-none-win_amd64.whl", hash = "sha256:893f595bfbea536668eaeb7959025982a577d37ccfab4f6cbc0b9d6e265da93f"},
+    {file = "git_cliff-2.10.1.tar.gz", hash = "sha256:2f288e732584e2aff65e86990a12ffeb58898931db96f9b219e016335492da97"},
+]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -1191,6 +1211,28 @@ files = [
 [package.dependencies]
 numpy = ">=1.19.3"
 
+[[package]]
+name = "hf-xet"
+version = "1.1.10"
+description = "Fast transfer of large files with the Hugging Face Hub."
+optional = false
+python-versions = ">=3.8"
+groups = ["main", "test"]
+markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
+files = [
+    {file = "hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d"},
+    {file = "hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b"},
+    {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435"},
+    {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c"},
+    {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06"},
+    {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f"},
+    {file = "hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045"},
+    {file = "hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97"},
+]
+
+[package.extras]
+tests = ["pytest"]
+
 [[package]]
 name = "httpcore"
 version = "1.0.7"
@@ -1240,19 +1282,20 @@ zstd = ["zstandard (>=0.18.0)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.28.1"
+version = "0.35.3"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 groups = ["main", "test"]
 files = [
-    {file = "huggingface_hub-0.28.1-py3-none-any.whl", hash = "sha256:aa6b9a3ffdae939b72c464dbb0d7f99f56e649b55c3d52406f49e0a5a620c0a7"},
-    {file = "huggingface_hub-0.28.1.tar.gz", hash = "sha256:893471090c98e3b6efbdfdacafe4052b20b84d59866fb6f54c33d9af18c303ae"},
+    {file = "huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba"},
+    {file = "huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a"},
 ]
 
 [package.dependencies]
 filelock = "*"
 fsspec = ">=2023.5.0"
+hf-xet = {version = ">=1.1.3,<2.0.0", markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""}
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -1260,16 +1303,19 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "ruff (>=0.9.0)", "soundfile", "ty", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
 hf-transfer = ["hf-transfer (>=0.1.4)"]
+hf-xet = ["hf-xet (>=1.1.2,<2.0.0)"]
 inference = ["aiohttp"]
-quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.9.0)"]
+mcp = ["aiohttp", "mcp (>=1.8.0)", "typer"]
+oauth = ["authlib (>=1.3.2)", "fastapi", "httpx", "itsdangerous"]
+quality = ["libcst (>=1.4.0)", "mypy (==1.15.0) ; python_version >= \"3.9\"", "mypy (>=1.14.1,<1.15.0) ; python_version == \"3.8\"", "ruff (>=0.9.0)", "ty"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
 tensorflow-testing = ["keras (<3.0)", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "authlib (>=1.3.2)", "fastapi", "gradio (>=4.0.0)", "httpx", "itsdangerous", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures (<16.0)", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
 torch = ["safetensors[torch]", "torch"]
 typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
 
@@ -2799,6 +2845,174 @@ files = [
     {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.1.3.1"
+description = "CUBLAS native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
+    {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.1.105"
+description = "CUDA profiling tools runtime libs."
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
+    {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.1.105"
+description = "NVRTC native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
+    {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.1.105"
+description = "CUDA Runtime native Libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
+    {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.1.0.70"
+description = "cuDNN runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
+    {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
+]
+
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.0.2.54"
+description = "CUFFT native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
+    {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.2.106"
+description = "CURAND native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
+    {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.4.5.107"
+description = "CUDA solver native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
+    {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
+]
+
+[package.dependencies]
+nvidia-cublas-cu12 = "*"
+nvidia-cusparse-cu12 = "*"
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.1.0.106"
+description = "CUSPARSE native runtime libraries"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
+    {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
+]
+
+[package.dependencies]
+nvidia-nvjitlink-cu12 = "*"
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.20.5"
+description = "NVIDIA Collective Communication Library (NCCL) Runtime"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.4.127"
+description = "Nvidia JIT LTO Library"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.1.105"
+description = "NVIDIA Tools Extension"
+optional = false
+python-versions = ">=3"
+groups = ["main", "test"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
+    {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
+]
+
 [[package]]
 name = "onnxruntime"
 version = "1.22.0"
@@ -3385,7 +3599,7 @@ description = "Run a subprocess in a pseudo terminal"
 optional = false
 python-versions = "*"
 groups = ["test"]
-markers = "os_name != \"nt\" or sys_platform != \"win32\" and sys_platform != \"emscripten\""
+markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\" or os_name != \"nt\""
 files = [
     {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
     {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
@@ -4986,7 +5200,7 @@ files = [
     {file = "setuptools-78.1.0-py3-none-any.whl", hash = "sha256:3e386e96793c8702ae83d17b853fb93d3e09ef82ec62722e61da5cd22376dcd8"},
     {file = "setuptools-78.1.0.tar.gz", hash = "sha256:18fd474d4a82a5f83dac888df697af65afa82dec7323d09c3e37d1f14288da54"},
 ]
-markers = {main = "python_version >= \"3.12\""}
+markers = {main = "sys_platform == \"darwin\" or python_version >= \"3.12\""}
 
 [package.extras]
 check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""]
@@ -5384,6 +5598,62 @@ files = [
     {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
 ]
 
+[[package]]
+name = "torch"
+version = "2.4.1"
+description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
+optional = false
+python-versions = ">=3.8.0"
+groups = ["main", "test"]
+markers = "sys_platform == \"darwin\""
+files = [
+    {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"},
+    {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"},
+    {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"},
+    {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"},
+    {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"},
+    {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"},
+    {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"},
+    {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"},
+    {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
+    {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
+    {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
+    {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
+    {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
+    {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
+    {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
+    {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
+]
+
+[package.dependencies]
+filelock = "*"
+fsspec = "*"
+jinja2 = "*"
+networkx = "*"
+nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cudnn-cu12 = {version = "9.1.0.70", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+setuptools = "*"
+sympy = "*"
+triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
+typing-extensions = ">=4.8.0"
+
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+optree = ["optree (>=0.11.0)"]
+
 [[package]]
 name = "torch"
 version = "2.6.0+cpu"
@@ -5391,6 +5661,7 @@ description = "Tensors and Dynamic neural networks in Python with strong GPU acc
 optional = false
 python-versions = ">=3.9.0"
 groups = ["main", "test"]
+markers = "sys_platform != \"darwin\""
 files = [
     {file = "torch-2.6.0+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:35a9e78b7e4096968b54c1a198687b981569c50ae93e661aa430f9fd208da102"},
     {file = "torch-2.6.0+cpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:90832f4d118c566b8652a2196ac695fc1f14cf420db27b5a1b41c7eaaf2141e9"},
@@ -5429,6 +5700,46 @@ type = "legacy"
 url = "https://download.pytorch.org/whl/cpu"
 reference = "pytorch_cpu"
 
+[[package]]
+name = "torchvision"
+version = "0.19.1"
+description = "image and video datasets and models for torch deep learning"
+optional = false
+python-versions = ">=3.8"
+groups = ["main", "test"]
+markers = "sys_platform == \"darwin\""
+files = [
+    {file = "torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed"},
+    {file = "torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04"},
+    {file = "torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145"},
+    {file = "torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050"},
+    {file = "torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8"},
+    {file = "torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c"},
+    {file = "torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c"},
+    {file = "torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9"},
+    {file = "torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289"},
+    {file = "torchvision-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4c4e4f5b24ea6b087b02ed492ab1e21bba3352c4577e2def14248cfc60732338"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9281d63ead929bb19143731154cd1d8bf0b5e9873dff8578a40e90a6bec3c6fa"},
+    {file = "torchvision-0.19.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:4d10bc9083c4d5fadd7edd7b729700a7be48dab4f62278df3bc73fa48e48a155"},
+    {file = "torchvision-0.19.1-cp38-cp38-win_amd64.whl", hash = "sha256:ccf085ef1824fb9e16f1901285bf89c298c62dfd93267a39e8ee42c71255242f"},
+    {file = "torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4"},
+    {file = "torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5"},
+    {file = "torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614"},
+]
+
+[package.dependencies]
+numpy = "*"
+pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
+torch = "2.4.1"
+
+[package.extras]
+gdown = ["gdown (>=4.7.3)"]
+scipy = ["scipy"]
+
 [[package]]
 name = "torchvision"
 version = "0.21.0+cpu"
@@ -5436,6 +5747,7 @@ description = "image and video datasets and models for torch deep learning"
 optional = false
 python-versions = ">=3.9"
 groups = ["main", "test"]
+markers = "sys_platform != \"darwin\""
 files = [
     {file = "torchvision-0.21.0+cpu-cp310-cp310-linux_x86_64.whl", hash = "sha256:4ed0a1be50676a7c589ba83b62c9dc0267a87e852b8cd9b7d6db27ab36c6d552"},
     {file = "torchvision-0.21.0+cpu-cp310-cp310-win_amd64.whl", hash = "sha256:554ca0f5948ac89911299f8bfb6f23936d867387ea213ab235adc2814b510d0c"},
@@ -5594,6 +5906,30 @@ torchhub = ["filelock", "huggingface-hub (>=0.26.0,<1.0)", "importlib-metadata",
 video = ["av"]
 vision = ["Pillow (>=10.0.1,<=15.0)"]
 
+[[package]]
+name = "triton"
+version = "3.0.0"
+description = "A language and compiler for custom Deep Learning operations"
+optional = false
+python-versions = "*"
+groups = ["main", "test"]
+markers = "python_version <= \"3.12\" and platform_system == \"Linux\" and platform_machine == \"x86_64\" and sys_platform == \"darwin\""
+files = [
+    {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
+    {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"},
+    {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
+    {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
+    {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
+]
+
+[package.dependencies]
+filelock = "*"
+
+[package.extras]
+build = ["cmake (>=3.20)", "lit"]
+tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"]
+tutorials = ["matplotlib", "pandas", "tabulate"]
+
 [[package]]
 name = "types-python-dateutil"
 version = "2.9.0.20241206"
@@ -5670,8 +6006,8 @@ requests = ">=2.23.0"
 scipy = ">=1.4.1"
 seaborn = ">=0.11.0"
 torch = [
-    {version = ">=1.8.0,<2.4.0 || >2.4.0", markers = "sys_platform == \"win32\""},
     {version = ">=1.8.0", markers = "sys_platform != \"win32\""},
+    {version = ">=1.8.0,<2.4.0 || >2.4.0", markers = "sys_platform == \"win32\""},
 ]
 torchvision = ">=0.9.0"
 tqdm = ">=4.64.0"
@@ -5910,11 +6246,11 @@ files = [
 
 [extras]
 llm = []
-ml = ["torch", "torchvision", "transformers"]
+ml = ["transformers"]
 ocr = ["easyocr", "python-doctr", "surya-ocr"]
 paddle = []
 
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "dbc223fa004895653ea3ee28ab16deef00cc87824450ddc9149056d6bd549ff0"
+content-hash = "8dd2983084085f5f23e928b2644fde1da1ae8dcd8a783ade3fd3f450ccf2db65"
diff --git a/pyproject.toml b/pyproject.toml
index 8a13f38..dc67e5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scaledp"
-version = "0.2.4rc5"
+version = "0.2.4rc11"
 description = "ScaleDP is a library for processing documents using Apache Spark and LLMs"
 authors = ["Mykola Melnyk <mykola@stabrise.com>"]
 repository = "https://github.com/StabRise/scaledp"
@@ -20,21 +20,24 @@ pytesseract = "0.3.13"
 pytest = "^7.4.4"
 PyMuPDF = "1.24.11"
 numpy = "^1.26.4"
-pyarrow = "17.0.0"  #18.1.0
+pyarrow = ">=17.0.0"  #18.1.0
 filelock = "*"
 surya-ocr = {version = "0.8.1", optional = true}
 easyocr = {version = "1.7.2", optional = true}
 python-doctr = {version = "0.10.0", optional = true}
 transformers = {version = "^4.36.0", optional = true}
-torchvision= {version= ">=0.18.0", source = "pytorch_cpu", optional = true}
+torchvision= [
+    {version = ">=0.18.0", markers="sys_platform == 'darwin'", optional = true },
+    {version= ">=0.18.0", markers="sys_platform != 'darwin'", source = "pytorch_cpu", optional = true}
+    ]
 torch =  [
-    #{version = "==2.2.0", platform = "darwin", optional = true },
-    {version = ">=2.4.1", source = "pytorch_cpu", optional = true}
+    {version = "==2.4.1", markers="sys_platform == 'darwin'", optional = true },
+    {version = ">=2.4.1", markers="sys_platform != 'darwin'", source = "pytorch_cpu", optional = true}
 ]
 #dspy = {version = "2.5.43", optional = true}
 levenshtein = "^0.27.1"
 pydantic = ">=1.8.0"
-huggingface-hub = "^0.28.1"
+huggingface-hub = "^0.35.3"
 tenacity = ">=8.2.3"
 openai = ">=1.58.0"
 sparkdantic = "^2.0.0"
@@ -46,7 +49,10 @@ onnxruntime = "1.22.0"
 
 
 [tool.poetry.extras]
-ml = ["transformers", "torch", "torchvision"]
+ml = ["transformers",
+    #"torch",
+    #"torchvision"
+]
 ocr = ["easyocr", "python-doctr", "surya-ocr"]
 llm = ["dspy"]
 paddle = ["paddleocr", "paddlepaddle",]
@@ -67,8 +73,12 @@ pytest-resource-path = "1.3.0"
 coverage = "7.6.4"
 pytest-cov="5.0.0"
 transformers = "^4.36.0"
-torchvision= {version= ">=0.18.0", source = "pytorch_cpu"}
-torch = {version= ">=2.4.1", source = "pytorch_cpu"}
+#torchvision= {version= ">=0.18.0", source = "pytorch_cpu"}
+#torch = {version= ">=2.4.1", source = "pytorch_cpu"}
+torch =  [
+    {version = "==2.4.1", markers="sys_platform == 'darwin'", optional = true },
+    {version = ">=2.4.1", markers="sys_platform != 'darwin'", source = "pytorch_cpu", optional = true}
+]
 python-doctr  = "0.10.0"
 surya-ocr = "0.8.1"
 black = "^24.10.0"
@@ -77,6 +87,10 @@ pre-commit = "^3.7.1"
 ruff = "^0.5.0"
 craft-text-detector-updated = "^0.4.7"
 
+
+[tool.poetry.group.dev.dependencies]
+git-cliff = "^2.10.1"
+
 [build-system]
 #requires = ["poetry-core<2.0.0"]
 requires = ["poetry-core>=1.0.0"]
diff --git a/scaledp/README.md b/scaledp/README.md
index 74db88f..974c514 100644
--- a/scaledp/README.md
+++ b/scaledp/README.md
@@ -37,9 +37,13 @@
 
 ```bash
   poetry version patch
-  poetry publish --build
 ```
 
+### Publish
+
+```bash
+poetry publish --build
+```
 
 ## Pre-commit
 
@@ -53,3 +57,12 @@ To run pre-commit on all files:
 pre-commit run --all-files
 ```
 
+## Update changelogs
+
+```bash
+  poetry run git cliff --unreleased -o
+```
+
+## Deps
+
+crafter
diff --git a/scaledp/__init__.py b/scaledp/__init__.py
index 74644e5..192ebdb 100644
--- a/scaledp/__init__.py
+++ b/scaledp/__init__.py
@@ -13,6 +13,7 @@
 from scaledp.image.ImageCropBoxes import ImageCropBoxes
 from scaledp.image.ImageDrawBoxes import ImageDrawBoxes
 from scaledp.models.detectors.DocTRTextDetector import DocTRTextDetector
+from scaledp.models.detectors.FaceDetector import FaceDetector
 from scaledp.models.detectors.LayoutDetector import LayoutDetector
 from scaledp.models.detectors.SignatureDetector import SignatureDetector
 from scaledp.models.detectors.YoloDetector import YoloDetector
@@ -28,7 +29,12 @@
 from scaledp.models.recognizers.SuryaOcr import SuryaOcr
 from scaledp.models.recognizers.TesseractOcr import TesseractOcr
 from scaledp.models.recognizers.TesseractRecognizer import TesseractRecognizer
+from scaledp.pdf.PdfAddTextLayer import PdfAddTextLayer
+from scaledp.pdf.PdfAssembler import PdfAssembler
+from scaledp.pdf.PdfDataToDocument import PdfDataToDocument
 from scaledp.pdf.PdfDataToImage import PdfDataToImage
+from scaledp.pdf.PdfDataToSingleImage import PdfDataToSingleImage
+from scaledp.pdf.SingleImageToPdf import SingleImageToPdf
 from scaledp.text.TextToDocument import TextToDocument
 from scaledp.utils.show_utils import (
     show_image,
@@ -216,6 +222,7 @@ def ScaleDPSession(
     "YoloDetector",
     "YoloOnnxDetector",
     "SignatureDetector",
+    "FaceDetector",
     "ImageCropBoxes",
     "DSPyExtractor",
     "TesseractRecognizer",
@@ -224,6 +231,11 @@ def ScaleDPSession(
     "LLMExtractor",
     "LLMOcr",
     "LLMNer",
+    "PdfDataToDocument",
+    "PdfDataToSingleImage",
+    "PdfAddTextLayer",
+    "PdfAssembler",
+    "SingleImageToPdf",
     "__version__",
     "files",
     *dir(enums),
diff --git a/scaledp/image/DataToImage.py b/scaledp/image/DataToImage.py
index ede7374..75ead2e 100644
--- a/scaledp/image/DataToImage.py
+++ b/scaledp/image/DataToImage.py
@@ -80,7 +80,10 @@ def transform_udf(self, input, path, resolution):
     def _transform(self, dataset):
         out_col = self.getOutputCol()
         input_col = self._validate(self.getInputCol(), dataset)
-        path_col = self._validate(self.getPathCol(), dataset)
+        try:
+            path_col = self._validate(self.getPathCol(), dataset)
+        except Exception:
+            path_col = lit("memory")
         resolution = (
             dataset["resolution"] if "resolution" in dataset.columns else lit(0)
         )
diff --git a/scaledp/models/detectors/BaseDetector.py b/scaledp/models/detectors/BaseDetector.py
index e96eefe..8fbfa95 100644
--- a/scaledp/models/detectors/BaseDetector.py
+++ b/scaledp/models/detectors/BaseDetector.py
@@ -122,7 +122,6 @@ def transform_udf(self, image, params=None):
             logging.info("Call detector on image")
             result = self.call_detector([(resized_image, image.path)], params)
         except Exception as e:
-            raise e
             exception = traceback.format_exc()
             exception = (
                 f"{self.uid}: Error in object detection: {exception}, {image.exception}"
diff --git a/scaledp/models/detectors/FaceDetector.py b/scaledp/models/detectors/FaceDetector.py
new file mode 100644
index 0000000..48f1776
--- /dev/null
+++ b/scaledp/models/detectors/FaceDetector.py
@@ -0,0 +1,26 @@
+from types import MappingProxyType
+
+from scaledp.enums import Device
+from scaledp.models.detectors.YoloOnnxDetector import YoloOnnxDetector
+
+
+class FaceDetector(YoloOnnxDetector):
+    defaultParams = MappingProxyType(
+        {
+            "inputCol": "image",
+            "outputCol": "boxes",
+            "keepInputData": False,
+            "scaleFactor": 1.0,
+            "scoreThreshold": 0.2,
+            "device": Device.CPU,
+            "batchSize": 2,
+            "partitionMap": False,
+            "numPartitions": 0,
+            "pageCol": "page",
+            "pathCol": "path",
+            "propagateError": False,
+            "task": "detect",
+            "onlyRotated": False,
+            "model": "StabRise/face_detection",
+        },
+    )
diff --git a/scaledp/models/detectors/SignatureDetector.py b/scaledp/models/detectors/SignatureDetector.py
index 8c8613e..5d4c8eb 100644
--- a/scaledp/models/detectors/SignatureDetector.py
+++ b/scaledp/models/detectors/SignatureDetector.py
@@ -1,5 +1,26 @@
-from scaledp import YoloOnnxDetector
+from types import MappingProxyType
+
+from scaledp.enums import Device
+from scaledp.models.detectors.YoloOnnxDetector import YoloOnnxDetector
 
 
 class SignatureDetector(YoloOnnxDetector):
-    pass
+    defaultParams = MappingProxyType(
+        {
+            "inputCol": "image",
+            "outputCol": "signatures",
+            "keepInputData": False,
+            "scaleFactor": 1.0,
+            "scoreThreshold": 0.2,
+            "device": Device.CPU,
+            "batchSize": 2,
+            "partitionMap": False,
+            "numPartitions": 0,
+            "pageCol": "page",
+            "pathCol": "path",
+            "propagateError": False,
+            "task": "detect",
+            "onlyRotated": False,
+            "model": "StabRise/signature_detection",
+        },
+    )
diff --git a/scaledp/models/detectors/YoloOnnxDetector.py b/scaledp/models/detectors/YoloOnnxDetector.py
index 90ab708..0580c02 100644
--- a/scaledp/models/detectors/YoloOnnxDetector.py
+++ b/scaledp/models/detectors/YoloOnnxDetector.py
@@ -27,6 +27,14 @@ class YoloOnnxDetector(BaseDetector, HasDevice, HasBatchSize):
         typeConverter=TypeConverters.toString,
     )
 
+    # Add padding param: integer percent to expand detected boxes
+    padding = Param(
+        Params._dummy(),
+        "padding",
+        "Padding percent to expand detected boxes (integer).",
+        typeConverter=TypeConverters.toInt,
+    )
+
     defaultParams = MappingProxyType(
         {
             "inputCol": "image",
@@ -43,6 +51,7 @@ class YoloOnnxDetector(BaseDetector, HasDevice, HasBatchSize):
             "propagateError": False,
             "task": "detect",
             "onlyRotated": False,
+            "padding": 0,  # default padding percent
         },
     )
 
@@ -88,9 +97,30 @@ def call_detector(cls, images, params):
             # Convert PIL to NumPy (RGB)
             image_np = np.array(image)
             raw_boxes, scores, class_ids = detector.detect_objects(image_np)
-
+            # Expand boxes by padding percent if provided
+            pad_percent = int(params.get("padding", 0)) if params is not None else 0
+            h_img, w_img = image_np.shape[:2]
             for box in raw_boxes:
-                boxes.append(Box.from_bbox(box))
+                # Assume box format is [x1, y1, x2, y2]
+                if pad_percent and len(box) >= 4:
+                    x1, y1, x2, y2 = (
+                        float(box[0]),
+                        float(box[1]),
+                        float(box[2]),
+                        float(box[3]),
+                    )
+                    w = x2 - x1
+                    h = y2 - y1
+                    dx = (pad_percent / 100.0) * w
+                    dy = (pad_percent / 100.0) * h
+                    x1_new = max(0.0, x1 - dx)
+                    y1_new = max(0.0, y1 - dy)
+                    x2_new = min(float(w_img - 1), x2 + dx)
+                    y2_new = min(float(h_img - 1), y2 + dy)
+                    expanded_box = [x1_new, y1_new, x2_new, y2_new]
+                else:
+                    expanded_box = box
+                boxes.append(Box.from_bbox(expanded_box))
             results_final.append(
                 DetectorOutput(path=image_path, type="yolo", bboxes=boxes),
             )
diff --git a/scaledp/models/detectors/yolo/yolo.py b/scaledp/models/detectors/yolo/yolo.py
index dcc2027..7542c1e 100644
--- a/scaledp/models/detectors/yolo/yolo.py
+++ b/scaledp/models/detectors/yolo/yolo.py
@@ -132,9 +132,6 @@ def prepare_input(self, image):
         self.img_height, self.img_width = image.shape[:2]
 
         input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        print(input_img.shape)
-        print(self.input_shape)
-        print(f"Input width: {self.input_width}, Input height: {self.input_height}")
 
         # Rescale image with padding instead of simple resize
         input_img = self.rescale_image_with_padding(
diff --git a/scaledp/params.py b/scaledp/params.py
index d508686..ea1bca8 100644
--- a/scaledp/params.py
+++ b/scaledp/params.py
@@ -525,6 +525,14 @@ def _validate(self, column_name: str, dataset: Any) -> Any:
         Validate input schema.
         """
         if column_name not in dataset.columns:
+            if len(column_name.split(".")) > 1:
+                root_col = column_name.split(".")[0]
+                if root_col not in dataset.columns:
+                    raise ValueError(
+                        f"Missing input column in transformer {self.uid}: "
+                        f"Column '{root_col}' is not present.",
+                    )
+                return dataset[column_name]
             raise ValueError(
                 f"Missing input column in transformer {self.uid}: "
                 f"Column '{column_name}' is not present.",
diff --git a/scaledp/pdf/PdfAddTextLayer.py b/scaledp/pdf/PdfAddTextLayer.py
new file mode 100644
index 0000000..14b9f07
--- /dev/null
+++ b/scaledp/pdf/PdfAddTextLayer.py
@@ -0,0 +1,174 @@
+import io
+import logging
+import traceback
+from types import MappingProxyType
+from typing import Any
+
+import fitz
+from pyspark import keyword_only
+from pyspark.ml import Transformer
+from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
+from pyspark.sql.functions import udf
+
+from scaledp.params import HasColumnValidator, HasInputCols, HasOutputCol, HasResolution
+from scaledp.schemas.PdfDocument import PdfDocument
+
+
+class PdfAddTextLayer(
+    Transformer,
+    DefaultParamsReadable,
+    DefaultParamsWritable,
+    HasOutputCol,
+    HasInputCols,
+    HasColumnValidator,
+    HasResolution,
+):
+    """Add text layer to PDF document using text from Document schema."""
+
+    DEFAULT_PARAMS = MappingProxyType(
+        {
+            "inputCols": ["pdf", "text"],
+            "outputCol": "pdf_with_text",
+            "resolution": 300,  # DPI for coordinate transformation
+        },
+    )
+
+    @keyword_only
+    def __init__(self, **kwargs: Any) -> None:
+        super(PdfAddTextLayer, self).__init__()
+        self._setDefault(**self.DEFAULT_PARAMS)
+        self._set(**kwargs)
+
+    def transform_udf(self, pdf_doc, text_doc):
+        """Transform PDF and text documents to create PDF with text layer."""
+        try:
+            # Check for exceptions in input documents
+            if pdf_doc.exception != "":
+                return PdfDocument(
+                    path=pdf_doc.path,
+                    data=bytes(),
+                    width=pdf_doc.width,
+                    height=pdf_doc.height,
+                    exception=pdf_doc.exception,
+                )
+
+            if text_doc.exception != "":
+                return PdfDocument(
+                    path=pdf_doc.path,
+                    data=bytes(),
+                    width=pdf_doc.width,
+                    height=pdf_doc.height,
+                    exception=text_doc.exception,
+                )
+
+            if not pdf_doc.data:
+                return PdfDocument(
+                    path=pdf_doc.path,
+                    data=bytes(),
+                    width=pdf_doc.width,
+                    height=pdf_doc.height,
+                    exception="PDF document has no data",
+                )
+
+            # Open the PDF document
+            pdf_document = fitz.open(stream=pdf_doc.data, filetype="pdf")
+
+            if len(pdf_document) == 0:
+                pdf_document.close()
+                return PdfDocument(
+                    path=pdf_doc.path,
+                    data=bytes(),
+                    width=pdf_doc.width,
+                    height=pdf_doc.height,
+                    exception="PDF document has no pages",
+                )
+
+            # Get the first page (assuming single page PDF as per requirement)
+            page = pdf_document[0]
+
+            # Calculate scale factor from image coordinates to PDF coordinates
+            pdf_dpi = 72.0  # PDF native DPI
+            scale_factor = pdf_dpi / self.getResolution()
+
+            # Add text layer using bounding boxes from Document
+            if text_doc.bboxes:
+                for bbox in text_doc.bboxes:
+                    # Convert image coordinates to PDF coordinates
+                    # Image coordinates: origin at top-left, y increases downward
+                    # PDF coordinates: origin at bottom-left, y increases upward
+
+                    pdf_x = bbox.x * scale_factor
+                    pdf_y = (bbox.y - 0.2 * bbox.height) * scale_factor  # Flip Y axis
+                    pdf_height = bbox.height * scale_factor
+
+                    # Insert text at the specified position using the correct PyMuPDF method
+                    page.insert_text(
+                        point=fitz.Point(
+                            pdf_x,
+                            pdf_y + pdf_height,
+                        ),  # Bottom-left of text
+                        text=bbox.text,
+                        fontsize=max(8, pdf_height * 0.8),  # Scale font size
+                        # with bbox height
+                        color=(0, 0, 0),  # Black text
+                        overlay=False,
+                    )
+
+            # Save the modified PDF to bytes
+            output_buffer = io.BytesIO()
+            pdf_document.save(output_buffer)
+            pdf_bytes = output_buffer.getvalue()
+            pdf_document.close()
+
+            return PdfDocument(
+                path=pdf_doc.path,
+                data=pdf_bytes,
+                width=pdf_doc.width,
+                height=pdf_doc.height,
+                exception="",
+            )
+
+        except Exception:
+            exception = traceback.format_exc()
+            exception = f"PdfAddTextLayer: {exception}"
+            logging.warning(exception)
+            return PdfDocument(
+                path=pdf_doc.path if pdf_doc else "",
+                data=bytes(),
+                width=pdf_doc.width if pdf_doc else None,
+                height=pdf_doc.height if pdf_doc else None,
+                exception=exception,
+            )
+
+    def _transform(self, dataset):
+        """Transform the dataset by adding text layer to PDF documents."""
+        output_col = self.getOutputCol()
+        input_cols = self.getInputCols()
+
+        # Validate that we have exactly 2 input columns
+        if len(input_cols) != 2:
+            raise ValueError(
+                f"PdfAddTextLayer requires exactly 2 input columns "
+                f"(PDF and text), got {len(input_cols)}",
+            )
+
+        pdf_col, text_col = input_cols
+
+        # Validate input columns exist
+        if pdf_col not in dataset.columns:
+            raise ValueError(
+                f"PDF input column '{pdf_col}' is not present in the DataFrame.",
+            )
+
+        if text_col not in dataset.columns:
+            raise ValueError(
+                f"Text input column '{text_col}' is not present in the DataFrame.",
+            )
+
+        pdf_column = dataset[pdf_col]
+        text_column = dataset[text_col]
+
+        return dataset.withColumn(
+            output_col,
+            udf(self.transform_udf, PdfDocument.get_schema())(pdf_column, text_column),
+        )
diff --git a/scaledp/pdf/PdfAssembler.py b/scaledp/pdf/PdfAssembler.py
new file mode 100644
index 0000000..55fea7d
--- /dev/null
+++ b/scaledp/pdf/PdfAssembler.py
@@ -0,0 +1,218 @@
+import logging
+import traceback
+from types import MappingProxyType
+from typing import Any, List
+
+import fitz
+import pandas as pd
+from pyspark import keyword_only
+from pyspark.ml import Transformer
+from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
+from pyspark.sql.functions import udf
+
+from scaledp.params import (
+    HasInputCol,
+    HasOutputCol,
+    Param,
+    Params,
+    TypeConverters,
+)
+from scaledp.schemas.PdfDocument import PdfDocument
+
+
+class HasGroupByCol(Params):
+    """
+    Mixin for param groupByCol: column name to group by.
+    """
+
+    groupByCol: "Param[str]" = Param(
+        Params._dummy(),
+        "groupByCol",
+        "column name to group by.",
+        typeConverter=TypeConverters.toString,
+    )
+
+    def __init__(self) -> None:
+        super(HasGroupByCol, self).__init__()
+
+    def getGroupByCol(self) -> str:
+        """
+        Gets the value of groupByCol or its default value.
+        """
+        return self.getOrDefault(self.groupByCol)
+
+    def setGroupByCol(self, value):
+        """
+        Sets the value of :py:attr:`groupByCol`.
+        """
+        return self._set(groupByCol=value)
+
+
+class PdfAssembler(
+    Transformer,
+    HasInputCol,
+    HasOutputCol,
+    HasGroupByCol,
+    DefaultParamsReadable,
+    DefaultParamsWritable,
+):
+    """
+    Assembles single-page PDFs into a single PDF document.
+
+    Takes a column containing single-page PDF documents, groups them by origin,
+    and creates a single PDF using PyMuPDF (fitz).
+    """
+
+    DEFAULT_PARAMS = MappingProxyType(
+        {
+            "inputCol": "pdf",
+            "outputCol": "assembled_pdf",
+            "groupByCol": "path",
+        },
+    )
+
+    @keyword_only
+    def __init__(self, **kwargs: Any) -> None:
+        super(PdfAssembler, self).__init__()
+        self._setDefault(**self.DEFAULT_PARAMS)
+        self._set(**kwargs)
+
+    def convert_to_pdf(self, pdfs: List[PdfDocument]) -> PdfDocument:
+        """
+        Convert a list of single-page PDF documents into a single PDF.
+
+        Args:
+            pdfs: List of PdfDocument objects representing single pages
+
+        Returns:
+            PdfDocument: A single PDF document containing all pages
+        """
+        try:
+            if not pdfs or len(pdfs) == 0:
+                return PdfDocument(
+                    path="",
+                    data=bytes(),
+                    exception="No PDFs to assemble",
+                )
+
+            # Filter out invalid PDFs and sort by page if available
+            valid_pdfs = []
+            for pdf_page in pdfs:
+                if (
+                    pdf_page.data is not None
+                    and len(pdf_page.data) > 0
+                    and pdf_page.exception == ""
+                ):
+                    valid_pdfs.append(pdf_page)
+
+            if not valid_pdfs:
+                return PdfDocument(
+                    path=pdfs[0].path if pdfs else "",
+                    data=bytes(),
+                    exception="No valid PDFs to assemble",
+                )
+
+            # Create new PDF document
+            pdf = fitz.open()
+
+            for pdf_page in valid_pdfs:
+                try:
+                    # Open the single page PDF
+                    page_doc = fitz.open("pdf", pdf_page.data)
+                    # Insert all pages from this document (should be just one)
+                    pdf.insert_pdf(page_doc)
+                    page_doc.close()
+                except Exception as e:
+                    logging.warning(f"Failed to insert page from {pdf_page.path}: {e}")
+                    continue
+
+            # Write the assembled PDF to bytes
+            pdf_bytes = pdf.write()
+            pdf.close()
+
+            return PdfDocument(
+                path=valid_pdfs[0].path,
+                data=pdf_bytes,
+                exception="",
+            )
+
+        except Exception:
+            exception = traceback.format_exc()
+            exception = f"PdfAssembler: {exception}"
+            logging.warning(exception)
+            return PdfDocument(
+                path=pdfs[0].path if pdfs and len(pdfs) > 0 else "",
+                data=bytes(),
+                exception=exception,
+            )
+
+    def _transform(self, dataset):
+        """
+        Transform the dataset by grouping single-page PDFs and assembling them.
+        """
+        output_col = self.getOutputCol()
+        input_col = self.getInputCol()
+        group_by_col = self.getGroupByCol()
+
+        if input_col not in dataset.columns:
+            raise ValueError(
+                f"Input column '{input_col}' is not present in the DataFrame.",
+            )
+
+        if group_by_col not in dataset.columns:
+            raise ValueError(
+                f"Group by column '{group_by_col}' is not present in the DataFrame.",
+            )
+
+        # Check if we're working with pandas DataFrame (PandasPipeline)
+        if isinstance(dataset, pd.DataFrame):
+            # Pandas DataFrame approach
+            # Sort by path and page number if available
+            sort_columns = []
+            if "path" in dataset.columns:
+                sort_columns.append("path")
+            if "page_number" in dataset.columns:
+                sort_columns.append("page_number")
+
+            if sort_columns:
+                dataset = dataset.sort_values(sort_columns).reset_index(drop=True)
+
+            # Group by the specified column and collect PDFs for each group
+            grouped = dataset.groupby(group_by_col)[input_col].apply(list).reset_index()
+            grouped.columns = [group_by_col, "pdfs"]
+
+            # Apply the conversion function to each group
+            assembled_pdfs = []
+            for _, row in grouped.iterrows():
+                assembled_pdf = self.convert_to_pdf(row["pdfs"])
+                assembled_pdfs.append(assembled_pdf)
+
+            # Create result DataFrame
+            result = grouped[[group_by_col]].copy()
+            result[output_col] = assembled_pdfs
+
+        else:
+            # Spark DataFrame approach (original implementation)
+            # Sort by path and page number if available, then group by the specified column
+            sorted_dataset = dataset
+            if "path" in dataset.columns:
+                sorted_dataset = dataset.orderBy("path")
+            if "page_number" in dataset.columns:
+                sorted_dataset = sorted_dataset.orderBy("page_number")
+
+            # Group by the specified column and collect all PDFs for each group
+            from pyspark.sql.functions import collect_list
+
+            grouped_dataset = sorted_dataset.groupBy(dataset[group_by_col]).agg(
+                collect_list(dataset[input_col]).alias("pdfs"),
+            )
+
+            # Apply the UDF to assemble PDFs for each group
+            result_dataset = grouped_dataset.withColumn(
+                output_col,
+                udf(self.convert_to_pdf, PdfDocument.get_schema())("pdfs"),
+            )
+
+            return result_dataset
+
+        return result
diff --git a/scaledp/pdf/PdfDataToImage.py b/scaledp/pdf/PdfDataToImage.py
index 12841ef..c087165 100644
--- a/scaledp/pdf/PdfDataToImage.py
+++ b/scaledp/pdf/PdfDataToImage.py
@@ -8,7 +8,7 @@
 from pyspark.ml import Transformer
 from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
 from pyspark.pandas import DataFrame
-from pyspark.sql.functions import udf
+from pyspark.sql.functions import lit, udf
 from pyspark.sql.types import ArrayType, Row
 
 from scaledp.enums import ImageType
@@ -110,7 +110,10 @@ def transform_udf(self, input: Row, path: Row) -> list[Image]:
     def _transform(self, dataset: DataFrame) -> DataFrame:
         out_col = self.getOutputCol()
         input_col = self._validate(self.getInputCol(), dataset)
-        path_col = dataset[self.getPathCol()]
+        try:
+            path_col = self._validate(self.getPathCol(), dataset)
+        except Exception:
+            path_col = lit("memory")
 
         df_1 = dataset.withColumn(
             "temp_data",
diff --git a/scaledp/pdf/__init__.py b/scaledp/pdf/__init__.py
index 8b13789..46f8837 100644
--- a/scaledp/pdf/__init__.py
+++ b/scaledp/pdf/__init__.py
@@ -1 +1,17 @@
+from scaledp.pdf.PdfAddTextLayer import PdfAddTextLayer
+from scaledp.pdf.PdfAssembler import PdfAssembler
+from scaledp.pdf.PdfDataToDocument import PdfDataToDocument
+from scaledp.pdf.PdfDataToImage import PdfDataToImage
+from scaledp.pdf.PdfDataToSingleImage import PdfDataToSingleImage
+from scaledp.pdf.PdfDataToText import PdfDataToText
+from scaledp.pdf.SingleImageToPdf import SingleImageToPdf
 
+__all__ = [
+    "PdfDataToImage",
+    "PdfDataToSingleImage",
+    "PdfAddTextLayer",
+    "PdfAssembler",
+    "PdfDataToDocument",
+    "PdfDataToText",
+    "SingleImageToPdf",
+]
diff --git a/scaledp/pipeline/PandasPipeline.py b/scaledp/pipeline/PandasPipeline.py
index 974abb1..f76f62a 100644
--- a/scaledp/pipeline/PandasPipeline.py
+++ b/scaledp/pipeline/PandasPipeline.py
@@ -149,7 +149,7 @@ def __init__(self, stages) -> None:
         self.setStages(stages)
 
     def fromFile(self, filename: str) -> Any:
-        with Path.open(filename, "rb") as f:
+        with Path(filename).open("rb") as f:
             data = f.read()
 
         data = DatasetPd({"content": [data], "path": [filename], "resolution": [0]})
diff --git a/scaledp/utils/show_utils.py b/scaledp/utils/show_utils.py
index aeb9dac..bcda3dc 100644
--- a/scaledp/utils/show_utils.py
+++ b/scaledp/utils/show_utils.py
@@ -184,6 +184,9 @@ def show_pdf(
     if column_type == "binary":
         df = PdfDataToImage(inputCol=column).transform(df)
         column = "image"
+    elif "struct" in column_type and "data" in column_type:
+        df = PdfDataToImage(inputCol="data").transform(df.select(f"{column}.data"))
+        column = "image"
     else:
         raise ValueError("Column must be binary")
     for id_, row in enumerate(df.limit(limit).select(column).collect()):
diff --git a/tests/conftest.py b/tests/conftest.py
index 70440f3..4f69427 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,7 +25,7 @@ def image_file(resource_path_root):
 
 @pytest.fixture
 def image_rotated_text_file(resource_path_root):
-    return (resource_path_root / "images/RotatedText1.png").absolute().as_posix()
+    return (resource_path_root / "images/RotatedText.png").absolute().as_posix()
 
 
 @pytest.fixture
@@ -115,15 +115,22 @@ def image_pdf_df(spark_session, resource_path_root):
 
 
 @pytest.fixture
-def signatures_pdf_df(spark_session, resource_path_root):
+def signatures_pdf_file(resource_path_root):
+    return (
+        (resource_path_root / "pdfs" / "SampleWithSignatures.pdf").absolute().as_posix()
+    )
+
+
+@pytest.fixture
+def signatures_pdf_df(spark_session, signatures_pdf_file):
     return spark_session.read.format("binaryFile").load(
-        (resource_path_root / "pdfs" / "signatures.pdf").absolute().as_posix(),
+        signatures_pdf_file,
     )
 
 
 @pytest.fixture
-def signatures_pdf_file(spark_session, resource_path_root):
-    return (resource_path_root / "pdfs" / "signatures.pdf").absolute().as_posix()
+def face_pdf_file(spark_session, resource_path_root):
+    return (resource_path_root / "pdfs" / "SampleWithFace.pdf").absolute().as_posix()
 
 
 @pytest.fixture
@@ -190,6 +197,17 @@ def image_signature_df(spark_session, resource_path_root):
     return bin_to_image.transform(df)
 
 
+@pytest.fixture
+def image_face_df(spark_session, resource_path_root):
+    df = spark_session.read.format("binaryFile").load(
+        (resource_path_root / "images" / "document_with_face.png")
+        .absolute()
+        .as_posix(),
+    )
+    bin_to_image = DataToImage().setImageType(ImageType.WEBP.value)
+    return bin_to_image.transform(df)
+
+
 @pytest.fixture
 def receipt_json(receipt_json_path: Path) -> Path:
     return receipt_json_path.open("r").read()
diff --git a/tests/image/test_data_to_image.py b/tests/image/test_data_to_image.py
index 0ef8756..8aca14a 100644
--- a/tests/image/test_data_to_image.py
+++ b/tests/image/test_data_to_image.py
@@ -12,6 +12,33 @@ def test_data_to_image(raw_image_df):
     assert result[0].image.path == result[0].path
     assert result[0].image.exception == ""
 
+    to_image = DataToImage()
+    result = to_image.transform(raw_image_df)
+    result1 = (
+        DataToImage(inputCol="image.data", outputCol="image1")
+        .transform(result)
+        .collect()
+    )
+
+    assert len(result1) == 1
+    # present image field
+    assert hasattr(result1[0], "image")
+    # image has right path field
+    assert result1[0].image.path == result1[0].path
+    assert result1[0].image.exception == ""
+
+
+def test_data_to_image_without_path(raw_image_df):
+    to_image = DataToImage()
+    result = to_image.transform(raw_image_df.drop("path")).collect()
+
+    assert len(result) == 1
+    # present image field
+    assert hasattr(result[0], "image")
+    # image has right path field
+    assert result[0].image.path == "memory"
+    assert result[0].image.exception == ""
+
 
 def test_wrong_data_to_image(binary_pdf_df):
     to_image = DataToImage()
diff --git a/tests/models/detectors/test_face_detector.py b/tests/models/detectors/test_face_detector.py
new file mode 100644
index 0000000..66f9867
--- /dev/null
+++ b/tests/models/detectors/test_face_detector.py
@@ -0,0 +1,90 @@
+import tempfile
+
+from pyspark.ml import PipelineModel
+
+from scaledp import ImageDrawBoxes, PdfDataToImage
+from scaledp.enums import Device
+from scaledp.models.detectors.FaceDetector import FaceDetector
+from scaledp.pipeline.PandasPipeline import PandasPipeline
+
+
+def test_face_detector(image_face_df):
+
+    detector = FaceDetector(
+        device=Device.CPU,
+        keepInputData=True,
+        partitionMap=True,
+        numPartitions=0,
+        scoreThreshold=0.25,
+        task="detect",
+        padding=20,
+    )
+
+    draw = ImageDrawBoxes(
+        keepInputData=True,
+        inputCols=["image", "boxes"],
+        filled=False,
+        color="green",
+        lineWidth=5,
+        displayDataList=[],
+    )
+    # Transform the image dataframe through the OCR stage
+    pipeline = PipelineModel(stages=[detector, draw])
+    result = pipeline.transform(image_face_df)
+
+    data = result.select("image_with_boxes", "boxes").collect()
+
+    # Verify the pipeline result
+    assert len(data) == 1, "Expected exactly one result"
+
+    # # Check that exceptions is empty
+    assert data[0].boxes.exception == ""
+
+    # Save the output image to a temporary file for verification
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
+        temp.write(data[0].image_with_boxes.data)
+        temp.close()
+
+        # Print the path to the temporary file
+        print("file://" + temp.name)
+
+
+def test_face_pdf_detector_pandas(face_pdf_file, patch_spark):
+
+    pdf = PdfDataToImage(
+        inputCol="content",
+        outputCol="image",
+        pageLimit=1,
+    )
+
+    detector = FaceDetector(
+        device=Device.CPU,
+        keepInputData=True,
+        partitionMap=False,
+        numPartitions=0,
+        scoreThreshold=0.25,
+        task="detect",
+    )
+
+    draw = ImageDrawBoxes(
+        keepInputData=True,
+        inputCols=["image", "boxes"],
+        filled=False,
+        color="green",
+        lineWidth=5,
+        displayDataList=["score", "angle"],
+    )
+    # Transform the image dataframe through the OCR stage
+    pipeline = PandasPipeline(stages=[pdf, detector, draw])
+    data = pipeline.fromFile(face_pdf_file)
+
+    # Verify the pipeline result
+    assert len(data) == 1, "Expected exactly one result"
+
+    # Save the output image to a temporary file for verification
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
+        temp.write(data["image_with_boxes"][0].data)
+        temp.close()
+
+        # Print the path to the temporary file
+        print("file://" + temp.name)
diff --git a/tests/models/detectors/test_layout_detector.py b/tests/models/detectors/test_layout_detector.py
index 8b982d6..243e260 100644
--- a/tests/models/detectors/test_layout_detector.py
+++ b/tests/models/detectors/test_layout_detector.py
@@ -36,16 +36,6 @@ def layout_detector():
     )
 
 
-def test_layout_detector_initialization(layout_detector):
-    """Test that LayoutDetector initializes correctly."""
-    assert layout_detector.getInputCol() == "image"
-    assert layout_detector.getOutputCol() == "layout_boxes"
-    assert layout_detector.getScoreThreshold() == 0.5
-    assert layout_detector.getDevice() == Device.CPU
-    assert layout_detector.getWhiteList() == []
-    assert layout_detector.getModel() == "PP-DocLayout_plus-L"
-
-
 def test_layout_detector_with_drawn_boxes(image_df):
     """Test LayoutDetector with drawn boxes on the original image."""
     detector = LayoutDetector(
@@ -114,15 +104,3 @@ def test_layout_detector_with_custom_layout_types():
 
     assert detector.getWhiteList() == ["text", "table"]
     assert detector.getModel() == "PP-DocLayout-M"
-
-
-def test_layout_detector_output_schema(layout_detector):
-    """Test that the output schema is correct."""
-    schema = layout_detector.outputSchema()
-
-    # Check that the schema has the expected fields
-    field_names = [field.name for field in schema.fields]
-    expected_fields = ["path", "type", "bboxes", "exception"]
-
-    for field in expected_fields:
-        assert field in field_names
diff --git a/tests/models/detectors/test_signature_detector.py b/tests/models/detectors/test_signature_detector.py
index da3f966..9432c6b 100644
--- a/tests/models/detectors/test_signature_detector.py
+++ b/tests/models/detectors/test_signature_detector.py
@@ -1,7 +1,5 @@
 import tempfile
 
-import pyspark
-from pipeline.PandasPipeline import PandasPipeline, pathSparkFunctions
 from pyspark.ml import PipelineModel
 
 from scaledp import (
@@ -10,7 +8,7 @@
     SignatureDetector,
 )
 from scaledp.enums import Device
-from scaledp.pdf.PdfDataToSingleImage import PdfDataToSingleImage
+from scaledp.pipeline.PandasPipeline import PandasPipeline
 
 
 def test_signature_detector(image_signature_df):
@@ -20,14 +18,12 @@ def test_signature_detector(image_signature_df):
         keepInputData=True,
         partitionMap=True,
         numPartitions=0,
-        scoreThreshold=0.25,
         task="detect",
-        model="/home/mykola/PycharmProjects/scaledp-models/detection/document/signature/detector_yolo_1cls.onnx",
     )
 
     draw = ImageDrawBoxes(
         keepInputData=True,
-        inputCols=["image", "boxes"],
+        inputCols=["image", "signatures"],
         filled=False,
         color="green",
         lineWidth=5,
@@ -43,7 +39,7 @@ def test_signature_detector(image_signature_df):
     assert len(data) == 1, "Expected exactly one result"
 
     # # Check that exceptions is empty
-    assert data[0].boxes.exception == ""
+    assert data[0].signatures.exception == ""
 
     # Save the output image to a temporary file for verification
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
@@ -56,28 +52,23 @@ def test_signature_detector(image_signature_df):
 
 def test_signature_pdf_detector(signatures_pdf_df):
 
-    pdf = PdfDataToSingleImage(outputCol="image", keepInputData=True)
-
-    detector = SignatureDetector(
-        device=Device.CPU,
-        keepInputData=True,
-        partitionMap=False,
-        numPartitions=0,
-        scoreThreshold=0.25,
-        task="detect",
-        model="/home/mykola/PycharmProjects/scaledp-models/detection/document/signature/detector_yolo_1cls.onnx",
+    pipeline = PipelineModel(
+        stages=[
+            PdfDataToImage(outputCol="image"),
+            SignatureDetector(
+                device=Device.CPU,
+                keepInputData=True,
+                outputCol="signatures",
+                scoreThreshold=0.20,
+            ),
+            ImageDrawBoxes(
+                keepInputData=True,
+                inputCols=["image", "signatures"],
+                filled=True,
+                color="black",
+            ),
+        ],
     )
-
-    draw = ImageDrawBoxes(
-        keepInputData=True,
-        inputCols=["image", "boxes"],
-        filled=False,
-        color="green",
-        lineWidth=5,
-        displayDataList=["score", "angle"],
-    )
-    # Transform the image dataframe through the OCR stage
-    pipeline = PipelineModel(stages=[pdf, detector, draw])
     result = pipeline.transform(signatures_pdf_df)
 
     data = result.collect()
@@ -86,7 +77,7 @@ def test_signature_pdf_detector(signatures_pdf_df):
     assert len(data) == 1, "Expected exactly one result"
 
     # # Check that exceptions is empty
-    assert data[0].boxes.exception == ""
+    assert data[0].signatures.exception == ""
 
     # Save the output image to a temporary file for verification
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp:
@@ -97,11 +88,7 @@ def test_signature_pdf_detector(signatures_pdf_df):
         print("file://" + temp.name)
 
 
-def test_signature_pdf_detector_pandas(signatures_pdf_file):
-    pathSparkFunctions(pyspark)
-
-    # pdf = PdfDataToSingleImage(inputCol="content", outputCol="image",
-    #                            keepInputData=True)
+def test_signature_pdf_detector_pandas(signatures_pdf_file, patch_spark):
 
     pdf = PdfDataToImage(
         inputCol="content",
@@ -121,7 +108,7 @@ def test_signature_pdf_detector_pandas(signatures_pdf_file):
 
     draw = ImageDrawBoxes(
         keepInputData=True,
-        inputCols=["image", "boxes"],
+        inputCols=["image", "signatures"],
         filled=False,
         color="green",
         lineWidth=5,
diff --git a/tests/pdf/test_single_image_to_pdf.py b/tests/pdf/test_single_image_to_pdf.py
index 2ce87da..b6e5523 100644
--- a/tests/pdf/test_single_image_to_pdf.py
+++ b/tests/pdf/test_single_image_to_pdf.py
@@ -41,3 +41,23 @@ def test_image_to_pdf(image_df):
 
         # Write PDF data to temporary file
         temp.write(result[0].pdf.data)
+
+
+def test_image_to_pdf_show(image_df):
+    """
+    Test function to convert image DataFrame to PDF format.
+
+    Args:
+        image_df: DataFrame containing image data to be converted
+
+    Side Effects:
+        - Creates a temporary PDF file with the converted image
+        - Prints the local file path of the created PDF
+    """
+    # Initialize the PDF converter
+    image_to_pdf = SingleImageToPdf()
+
+    # Transform the image DataFrame to PDF format and cache for performance
+    result_df = image_to_pdf.transform(image_df).cache()
+
+    result_df.show_pdf(column="pdf", limit=1)
diff --git a/tests/pytest.ini b/tests/pytest.ini
index 9e1fe95..7284a33 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -5,3 +5,4 @@ spark_options =
 addopts = --log-cli-level=INFO -s
 env =
     PYARROW_IGNORE_TIMEZONE = 1
+    OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
diff --git a/tests/testresources/images/document_with_face.png b/tests/testresources/images/document_with_face.png
new file mode 100644
index 0000000..2d3f77e
Binary files /dev/null and b/tests/testresources/images/document_with_face.png differ
diff --git a/tests/testresources/pdfs/SampleWithFace.pdf b/tests/testresources/pdfs/SampleWithFace.pdf
new file mode 100644
index 0000000..56e359e
Binary files /dev/null and b/tests/testresources/pdfs/SampleWithFace.pdf differ
diff --git a/tests/testresources/pdfs/SampleWithQRCode.pdf b/tests/testresources/pdfs/SampleWithQRCode.pdf
new file mode 100644
index 0000000..98db21f
Binary files /dev/null and b/tests/testresources/pdfs/SampleWithQRCode.pdf differ
diff --git a/tests/testresources/pdfs/SampleWithRotatedText.pdf b/tests/testresources/pdfs/SampleWithRotatedText.pdf
new file mode 100644
index 0000000..ab7b0b3
Binary files /dev/null and b/tests/testresources/pdfs/SampleWithRotatedText.pdf differ
diff --git a/tests/testresources/pdfs/signatures.pdf b/tests/testresources/pdfs/SampleWithSignatures.pdf
similarity index 91%
rename from tests/testresources/pdfs/signatures.pdf
rename to tests/testresources/pdfs/SampleWithSignatures.pdf
index f59c38e..669ded0 100644
Binary files a/tests/testresources/pdfs/signatures.pdf and b/tests/testresources/pdfs/SampleWithSignatures.pdf differ