Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- feat: Add chat completion handler for Granite-Docling (and SmolVLM)
- feat: Add `special` argument to keep special tokens in chat completion output

## [0.3.16]

- feat: Update llama.cpp to ggerganov/llama.cpp@4227c9be4268ac844921b90f31595f81236bd317
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ if (LLAMA_BUILD)
add_compile_definitions(GGML_USE_METAL)
endif()

# Set version for mtmd (required by upstream CMakeLists.txt)
# NOTE: This is a workaround for mtmd build requirements.
# Version is set to 0.0.0 for local builds. If upstream adds version
# compatibility checks, this may need to match llama.cpp version.
if (NOT DEFINED LLAMA_BUILD_NUMBER)
set(LLAMA_BUILD_NUMBER 0)
endif()
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})

# Building llava
add_subdirectory(vendor/llama.cpp/tools/mtmd)

Expand Down
35 changes: 35 additions & 0 deletions examples/granite_docling/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from llama_cpp import Llama
from llama_cpp.llama_chat_format import GraniteDoclingChatHandler


chat_handler = GraniteDoclingChatHandler.from_pretrained(
repo_id="ggml-org/granite-docling-258M-GGUF",
filename="mmproj*Q8_0*",
)
llama = Llama.from_pretrained(
repo_id="ggml-org/granite-docling-258M-GGUF",
filename="granite*Q8_0*",
chat_handler=chat_handler,
n_ctx=8192,
)
response = llama.create_chat_completion(
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": "https://huggingface.co/spaces/ibm-granite/granite-docling-258m-demo/resolve/main/data/images/new_arxiv.png"},
{"type": "text", "text": "Convert this page to docling."},
],
}
],
stream=True,
special=True,
)

for chunk in response:
delta = chunk["choices"][0]["delta"]
if "content" not in delta:
continue
print(delta["content"], end="", flush=True)

print()
2 changes: 1 addition & 1 deletion llama_cpp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .llama_cpp import *
from .llama import *

__version__ = "0.3.16"
__version__ = "0.4.0"
Loading