Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ third_party/runtime/
!third_party/runtime/CMakeLists.txt
__pycache__/
.pytest_cache/
**/NimbleSDK
models/**/data
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "third_party/tokenizers-cpp"]
path = third_party/tokenizers-cpp
url = https://github.com/NimbleEdge/tokenizers-cpp.git

6 changes: 4 additions & 2 deletions coreruntime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ endif()

# set(DEBUGFLAGS " -Werror -Wno-write-strings -Weffc++ -Wall -Wuninitialized -Wnon-virtual-dtor -Wshadow -Werror=format-security -Wunused-member-function -Wunused-function ")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=switch -Werror=return-type -Werror=implicit-fallthrough \
-Werror=non-virtual-dtor -Werror=format -Werror=format-security -Werror=unused-member-function -Werror=unused-function -Werror=writable-strings")
-Werror=non-virtual-dtor -Werror=format -Werror=format-security -Werror=unused-function -Werror=write-strings")

# string(CONCAT RELEASEFLAGS ${DEBUGFLAGS} " -fstack-protector-strong -ffunction-sections -fdata-sections ")
# #
Expand Down Expand Up @@ -84,6 +84,7 @@ add_subdirectory(nimblenet)
add_subdirectory(delitepy)
add_subdirectory("../third_party/json" "${CMAKE_BINARY_DIR}/third_party/json")
add_subdirectory("../third_party/SPSCQueue" "${CMAKE_BINARY_DIR}/third_party/SPSCQueue")
add_subdirectory("../third_party/tokenizers-cpp" "${CMAKE_BINARY_DIR}/third_party/tokenizers-cpp")
if (GENAI)
add_subdirectory("../third_party/miniz" "${CMAKE_BINARY_DIR}/third_party/miniz")
endif()
Expand Down Expand Up @@ -182,6 +183,7 @@ else()
target_compile_definitions(nimblenet PUBLIC -DIOS_PLATFORM="mac")
add_subdirectory(platform/unix) # produces ${CLIENT_INCLUDES}
add_subdirectory("../third_party/runtime" "${CMAKE_BINARY_DIR}/third_party/runtime") # -> produces ${BACKEND_LIBS} ${BACKEND_DIR} ${BACKED_INCLUDES}
# Use system curl library instead of conda environment
target_link_libraries(nimblenet ${VISIBILITY} curl)

# target_link_libraries(nimblenet ${VISIBILITY} clientlib)
Expand All @@ -194,7 +196,7 @@ if(NOT ANDROID_ABI)
list(APPEND ADDITIONAL_LIBS ZLIB::ZLIB)
endif()

target_link_libraries(nimblenet PRIVATE nlohmann_json::nlohmann_json ${VISIBILITY} SPSCQueue ${VISIBILITY} ${BACKEND_LIBS} ${VISIBILITY} ${ADDITIONAL_LIBS})
target_link_libraries(nimblenet PRIVATE nlohmann_json::nlohmann_json ${VISIBILITY} SPSCQueue ${VISIBILITY} tokenizers_cpp ${VISIBILITY} ${BACKEND_LIBS} ${VISIBILITY} ${ADDITIONAL_LIBS})
if (GENAI)
target_link_libraries(nimblenet PRIVATE miniz)
endif()
Expand Down
14 changes: 9 additions & 5 deletions coreruntime/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def main():
if "-DCMAKE_BUILD_TYPE=Release" in cmake_args:
STRIP = 1

CMAKE_CXX_FLAGS = ""
CMAKE_CXX_FLAGS = "-Wno-unused-member-function -Wno-implicit-fallthrough "
if args.testing:
cmake_args += " -DTESTING=1 "

Expand All @@ -61,16 +61,20 @@ def main():
COMMON_FLAGS = (
f"-B{os.getcwd()}/build/ "
f"{cmake_args} "
"-DCMAKE_POLICY_VERSION_MINIMUM=3.5 "
"-DCMAKE_CXX_FLAGS_RELEASE='-Wno-unused-function -Wno-implicit-fallthrough -DNDEBUG -O3' "
"-DCMAKE_CXX_FLAGS_DEBUG='-Wno-unused-function -Wno-implicit-fallthrough -g' "
)

# Determine compiler settings based on architecture
if arch == "arm":
cmake_command = f"cmake CMakeLists.txt {COMMON_FLAGS} -DCMAKE_CXX_COMPILER=g++ -DMACOS=1 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_CXX_FLAGS='{CMAKE_CXX_FLAGS}'"
elif arch == "x86_64":
CMAKE_CXX_FLAGS += " -stdlib=libstdc++ "
# Replace clang-specific flags with g++ compatible ones
CMAKE_CXX_FLAGS = CMAKE_CXX_FLAGS.replace("-Wno-unused-member-function", "-Wno-unused-function")
cmake_command = (
f"cmake CMakeLists.txt {COMMON_FLAGS} "
f"-DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CXX_FLAGS='{CMAKE_CXX_FLAGS}'"
f"-DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS='{CMAKE_CXX_FLAGS}'"
)
else:
cmake_command = f"cmake CMakeLists.txt {COMMON_FLAGS} -DMACOS=1"
Expand Down Expand Up @@ -103,14 +107,14 @@ def main():
if args.simulator:
if not args.ci_build:
# re-install deliteai
subprocess.run(f"python{python_version} -m pip uninstall deliteai", shell=True, check=True)
subprocess.run(f"python{python_version} -m pip uninstall -y deliteai", shell=True, check=True)
subprocess.run("rm -rf dist deliteai*", shell=True, check=True)
subprocess.run(f"python{python_version} setup.py bdist_wheel", shell=True, check=True)
subprocess.run(f"python{python_version} -m pip install dist/*", shell=True, check=True)

# re-install delitepy-library-stubs
subprocess.run(
f"python{python_version} -m pip uninstall delitepy-library-stubs",
f"python{python_version} -m pip uninstall -y delitepy-library-stubs",
shell=True,
check=True,
)
Expand Down
7 changes: 1 addition & 6 deletions coreruntime/delitepy/library_stubs/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,7 @@ def render_src_template() -> None:
check=True,
)
subprocess.run(
[
f"{delitepy_dir}/scripts/render_jinja2_templates.py",
f"{library_stubs_dir}/src_template",
f"{library_stubs_dir}/src_gen",
coreruntime_dir,
],
["cp", "-r", f"{library_stubs_dir}/src_template", f"{library_stubs_dir}/src_gen"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Is this accidental change?
  2. cp -R is the portable form, compared to cp -r.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree

check=True,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#
# SPDX-License-Identifier: Apache-2.0

"""Package delitepy containing modules nimblenet and ne_re."""
"""Package delitepy containing modules nimblenet, ne_re, and tokenizers."""

from delitepy.nimblenet import *
from delitepy.ne_re import *
from delitepy.tokenizers import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# SPDX-FileCopyrightText: (C) 2025 DeliteAI Authors
#
# SPDX-License-Identifier: Apache-2.0

"""Package delitepy.tokenizers for tokenizer functionality."""

from typing import List, Union
from delitepy.nimblenet.tensor import Tensor

def from_pretrained(model_name_or_path: str) -> str:
"""Load a pre-trained tokenizer from HuggingFace Hub or local file.

Args:
model_name_or_path: Path to tokenizer.json file or HuggingFace model name

Returns:
Tokenizer handle (opaque string identifier)

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> tokenizer = tokenizers.from_pretrained("/path/to/tokenizer.json")
"""
pass

def from_file(file_path: str) -> str:
"""Load a tokenizer from a file path.

Args:
file_path: Path to tokenizer.json or .model file

Returns:
Tokenizer handle (opaque string identifier)

Example:
>>> tokenizer = tokenizers.from_file("tokenizer.json")
>>> tokenizer = tokenizers.from_file("model.spm")
"""
pass

def from_json(json_str: str) -> str:
"""Create a tokenizer from a JSON string.

Args:
json_str: JSON string containing tokenizer configuration

Returns:
Tokenizer handle (opaque string identifier)

Example:
>>> json_config = '{"model": {...}, "normalizer": {...}}'
>>> tokenizer = tokenizers.from_json(json_config)
"""
pass

def from_sentencepiece(model_path: str) -> str:
"""Load a SentencePiece tokenizer from a .model file.

Args:
model_path: Path to SentencePiece .model file

Returns:
Tokenizer handle (opaque string identifier)

Example:
>>> tokenizer = tokenizers.from_sentencepiece("tokenizer.model")
"""
pass

def encode(tokenizer: str, text: str) -> Tensor:
"""Encode text into token IDs.

Args:
tokenizer: Tokenizer handle from from_pretrained/from_file/etc.
text: Text to encode

Returns:
Tensor containing token IDs (INT32)

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> token_ids = tokenizers.encode(tokenizer, "Hello world!")
>>> print(token_ids.shape) # [num_tokens]
"""
pass

def decode(tokenizer: str, token_ids: Tensor) -> str:
"""Decode token IDs back to text.

Args:
tokenizer: Tokenizer handle
token_ids: Tensor containing token IDs (INT32)

Returns:
Decoded text string

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> token_ids = tokenizers.encode(tokenizer, "Hello world!")
>>> text = tokenizers.decode(tokenizer, token_ids)
>>> print(text) # "Hello world!"
"""
pass

def get_vocab_size(tokenizer: str) -> int:
"""Get the vocabulary size of the tokenizer.

Args:
tokenizer: Tokenizer handle

Returns:
Size of the vocabulary

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> vocab_size = tokenizers.get_vocab_size(tokenizer)
>>> print(vocab_size) # 30522
"""
pass

def token_to_id(tokenizer: str, token: str) -> int:
"""Convert a token string to its ID.

Args:
tokenizer: Tokenizer handle
token: Token string

Returns:
Token ID, or -1 if token not found

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> token_id = tokenizers.token_to_id(tokenizer, "[CLS]")
>>> print(token_id) # 101
"""
pass

def id_to_token(tokenizer: str, token_id: int) -> str:
"""Convert a token ID to its string representation.

Args:
tokenizer: Tokenizer handle
token_id: Token ID

Returns:
Token string, or empty string if ID not found

Example:
>>> tokenizer = tokenizers.from_pretrained("bert-base-uncased")
>>> token = tokenizers.id_to_token(tokenizer, 101)
>>> print(token) # "[CLS]"
"""
pass
74 changes: 0 additions & 74 deletions coreruntime/delitepy/scripts/render_jinja2_templates.py

This file was deleted.

1 change: 1 addition & 0 deletions coreruntime/nimblenet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ set(BASE
data_variable/src/pre_processor_nimble_net_variable.cpp
data_variable/src/raw_event_store_data_variable.cpp
data_variable/src/regex_data_variable.cpp
data_variable/src/tokenizers_data_variable.cpp
data_variable/src/single_variable.cpp
data_variable/src/tensor_data_variable.cpp
job_scheduler/src/base_job.cpp
Expand Down
4 changes: 4 additions & 0 deletions coreruntime/nimblenet/asset_manager/src/asset_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ std::string Asset::get_file_name_on_device() const {
case AssetType::LLM:
return name + version + rmconstants::LLMFolderName;
#endif // GENAI
default:
return name + version;
}
}

Expand Down Expand Up @@ -96,6 +98,8 @@ std::string get_string_from_asset_type(const AssetType& assetType) {
case AssetType::LLM:
return "llm";
#endif // GENAI
default:
return "unknown";
}
}

Expand Down
2 changes: 2 additions & 0 deletions coreruntime/nimblenet/core_sdk/src/core_sdk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ std::pair<CloudConfigResponse, Deployment> CoreSDK::get_cloud_config_and_update_
}
case CloudConfigState::Unmodified:
return {cloudConfig, deployment};
default:
return {cloudConfig, deployment};
}
}

Expand Down
Loading