diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..958627b
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,19 @@
+# see https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+---
+Language: Cpp
+Standard: c++11
+
+SortIncludes: false
+
+IndentWidth: 2
+TabWidth: 2
+UseTab: Never
+ColumnLimit: 160
+BreakBeforeBraces: Attach
+IndentExternBlock: Indent
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AllowShortFunctionsOnASingleLine: None
+IndentCaseLabels: true
+PointerAlignment: Right
+SpaceBeforeParens: ControlStatements
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 28873b7..a641c78 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,37 +10,43 @@ jobs:
os: [linux, macos, macos-arm64]
lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1]
include:
- - os: linux
- runner: ubuntu-latest
- - os: macos
- runner: macos-15-intel
- - os: macos-arm64
- runner: macos-latest
+ - os: linux
+ runner: ubuntu-latest
+ - os: macos
+ runner: macos-15-intel
+ - os: macos-arm64
+ runner: macos-latest
exclude:
- - os: macos-arm64
- lua: luajit=2.0
+ - os: macos-arm64
+ lua: luajit=2.0
name: ${{ matrix.os }} (${{ matrix.lua }})
runs-on: ${{ matrix.runner }}
steps:
# Checks-out the repository under $GITHUB_WORKSPACE.
- uses: actions/checkout@v6
- - name: Install libreadline
+ - name: Install libreadline
if: runner.os == 'Linux'
run: |
sudo apt-get install -y libreadline-dev
- name: Install Lua (${{ matrix.lua }})
run: |
- pip install git+https://github.com/luarocks/hererocks
+ pipx install git+https://github.com/luarocks/hererocks
+ pipx ensurepath
+ export PATH=$PATH:/root/.local/bin:$HOME/.local/bin
hererocks lua_install -r^ --${{ matrix.lua }}
env:
MACOSX_DEPLOYMENT_TARGET: 11.0
- name: Build lua-simdjson
+ shell: bash
run: |
+ set -e
source lua_install/bin/activate
luarocks make
- name: Run tests
+ shell: bash
run: |
+ set -e
source lua_install/bin/activate
luarocks install lua-cjson2
luarocks install busted
@@ -51,7 +57,7 @@ jobs:
fail-fast: false
matrix:
lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1]
- target: [mingw,vs]
+ target: [mingw, vs]
runs-on: windows-2022
steps:
# Checks-out the repository under $GITHUB_WORKSPACE.
diff --git a/Makefile.win b/Makefile.win
index 4eafad4..83c6592 100644
--- a/Makefile.win
+++ b/Makefile.win
@@ -24,4 +24,4 @@ clean:
del *.dll src\*.obj *.lib *.exp 2>nul
install: $(TARGET)
- copy $(TARGET) $(INST_LIBDIR)
+ copy $(TARGET) $(INST_LIBDIR)
\ No newline at end of file
diff --git a/README.md b/README.md
index 98e345a..5ebd3bf 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
# lua-simdjson
+
[](https://github.com/FourierTransformer/lua-simdjson/actions?query=branch%3Amaster)
A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer.
@@ -6,26 +7,32 @@ A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is
Current simdjson version: 4.2.3
## Installation
+
If all the requirements are met, lua-simdjson can be install via luarocks with:
-```
+```bash
luarocks install lua-simdjson
```
+
Otherwise it can be installed manually by pulling the repo and running luarocks make.
## Requirements
- * lua-simdjson only works on 64bit systems.
- * a Lua build environment with support for C++11
- * g++ version 7+ and clang++ version 6+ or newer should work!
+
+* lua-simdjson only works on 64bit systems.
+* a Lua build environment with support for C++11
+ * g++ version 7+ and clang++ version 6+ or newer should work!
## Parsing
+
There are two main ways to parse JSON in lua-simdjson:
+
1. With `parse`: this parses JSON and returns a Lua table with the parsed values
2. With `open`: this reads in the JSON and keeps it in simdjson's internal format. The values can then be accessed using a JSON pointer (examples below)
Both of these methods also have support to read files on disc with `parseFile` and `openFile` respectively. If handling JSON from disk, these methods should be used and are incredibly fast.
## Typing
+
* lua-simdjson uses `simdjson.null` to represent `null` values from parsed JSON.
* Any application should use that for comparison as needed.
* it uses `lua_pushnumber` and `lua_pushinteger` for JSON floats and ints respectively, so your Lua version may handle that slightly differently.
@@ -33,7 +40,9 @@ Both of these methods also have support to read files on disc with `parseFile` a
* All other types map as expected.
### Parse some JSON
+
The `parse` methods will return a normal Lua table that can be interacted with.
+
```lua
local simdjson = require("simdjson")
local response = simdjson.parse([[
@@ -61,7 +70,9 @@ print(fileResponse["statuses"][1]["id"])
```
### Open some json
+
The `open` methods currently require the use of a JSON pointer, but are very quick. They are best used when you only need a part of a response. In the example below, it could be useful for just getting the `Thumnail` object with `:atPointer("/Image/Thumbnail")` which will then only create a Lua table with those specific values.
+
```lua
local simdjson = require("simdjson")
local response = simdjson.open([[
@@ -93,10 +104,81 @@ The `open` and `parse` codeblocks should print out the same values. It's worth n
This lazy style of using the simdjson data structure could also be used with array access in the future.
+## Encoding
+
+The `encode` method converts Lua tables into JSON strings. It supports nested tables, arrays, and all standard JSON types.
+
+```lua
+local simdjson = require("simdjson")
+
+-- Encode a simple table
+local data = {
+ name = "John Doe",
+ age = 30,
+ active = true,
+ score = 95.5
+}
+local json = simdjson.encode(data)
+print(json) -- {"name":"John Doe","age":30,"active":true,"score":95.5}
+
+-- Encode nested structures
+local complex = {
+ user = {
+ id = 123,
+ tags = {"lua", "json", "fast"}
+ },
+ metadata = {
+ created = "2024-01-01",
+ count = 42
+ }
+}
+local json = simdjson.encode(complex)
+
+-- Use simdjson.null for JSON null values
+local withNull = {
+ value = simdjson.null,
+ name = "test"
+}
+local json = simdjson.encode(withNull) -- {"value":null,"name":"test"}
+
+-- Optional: specify encoding options with a configuration table
+local deepData = { level1 = { level2 = { level3 = "value" } } }
+local json = simdjson.encode(deepData, {maxDepth = 10}) -- max depth of 10
+
+-- You can also specify bufferSize per-call (default: 16KB)
+local json = simdjson.encode(data, {bufferSize = 32 * 1024}) -- 32KB buffer
+
+-- Or combine both options
+local json = simdjson.encode(deepData, {maxDepth = 10, bufferSize = 8192})
+```
+
+You can also configure global encoding settings:
+
+```lua
+-- Set maximum nesting depth globally (default: 1024)
+simdjson.setMaxEncodeDepth(512)
+local currentDepth = simdjson.getMaxEncodeDepth()
+
+-- Set encode buffer size in bytes (default: 16KB)
+simdjson.setEncodeBufferSize(32 * 1024) -- 32KB
+local currentSize = simdjson.getEncodeBufferSize()
+```
+
+**Encoding behavior:**
+
+* Tables with consecutive integer keys starting at 1 are encoded as JSON arrays
+* All other tables are encoded as JSON objects
+* Numbers are formatted as integers when possible, or floats with 14 digits of precision
+* Integers larger than 2^53 are encoded in scientific notation for JSON compatibility
+* Strings are automatically escaped according to JSON specifications
+* `simdjson.null` represents JSON `null`
+
## Error Handling
+
lua-simdjson will error out with any errors from simdjson encountered while parsing. They are very good at helping identify what has gone wrong during parsing.
## Benchmarks
+
I ran some benchmarks against lua-cjson, rapidjson, and dkjson. For each test, I loaded the JSON into memory, and then had the parsers go through each file 100 times and took the average time it took to parse to a Lua table. You can see all the results in the [benchmark](benchmark/) folder. I've included a sample output run via Lua (the LuaJIT graph looks very similar, also in the benchmark folder). The y-axis is logarithmic, so every half step down is twice as fast.

@@ -116,7 +198,7 @@ lua-simdjson, like the simdjson library performs better on more modern hardware.
* since it's an external module, it's not quite as easy to just grab the file and go (dkjson has you covered here!)
## Philosophy
-I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. The big _thing_ that's missing so far is encoding a lua table to JSON. I may add in an encoder at some point.
+I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options.
## Licenses
* The jsonexamples, src/simdjson.cpp, src/simdjson.h are unmodified from the released version simdjson under the Apache License 2.0.
diff --git a/spec/compile_spec.lua b/spec/compile_spec.lua
index d9627e2..f5a46fa 100644
--- a/spec/compile_spec.lua
+++ b/spec/compile_spec.lua
@@ -81,9 +81,8 @@ end)
local major, minor = _VERSION:match('([%d]+)%.(%d+)')
if tonumber(major) >= 5 and tonumber(minor) >= 3 then
- describe("Make sure ints and floats parse correctly", function ()
+ describe("Make sure ints and floats parse correctly", function()
it("should handle decoding numbers appropriately", function()
-
local numberCheck = simdjson.parse([[
{
"float": 1.2,
@@ -101,7 +100,6 @@ if tonumber(major) >= 5 and tonumber(minor) >= 3 then
assert.are.same("float", math.type(numberCheck["one_above_max_signed_integer"]))
assert.are.same("integer", math.type(numberCheck["min_unsigned_integer"]))
assert.are.same("float", math.type(numberCheck["max_unsigned_integer"]))
-
end)
end)
end
@@ -129,3 +127,19 @@ describe("Make sure invalid files are not accepted", function()
end)
end
end)
+
+describe("Active implementation function", function()
+ it("should return a valid implementation name", function()
+ local impl = simdjson.activeImplementation()
+ assert.is_not_nil(impl)
+ assert.is_string(impl)
+ assert.is_truthy(impl:match("%w+")) -- Contains at least one word character
+ assert.is_true(#impl > 0) -- Non-empty string
+ end)
+
+ it("should contain implementation details", function()
+ local impl = simdjson.activeImplementation()
+ -- Implementation string should have format like "arm64 (ARM NEON)" or "haswell (Intel AVX2)"
+ assert.is_truthy(impl:match("%(.*%)")) -- Contains parentheses with description
+ end)
+end)
diff --git a/spec/encode_security_spec.lua b/spec/encode_security_spec.lua
new file mode 100644
index 0000000..83e7853
--- /dev/null
+++ b/spec/encode_security_spec.lua
@@ -0,0 +1,396 @@
+local simdjson = require("simdjson")
+local cjson = require("cjson")
+
+describe("encode() security and edge cases", function()
+ describe("String injection and escaping", function()
+ it("should properly escape quote characters", function()
+ local data = { value = 'test"with"quotes' }
+ local encoded = simdjson.encode(data)
+ assert.is_true(encoded:find('\\"') ~= nil)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.value, decoded.value)
+ end)
+
+ it("should properly escape backslashes", function()
+ local data = { value = 'test\\with\\backslashes' }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.value, decoded.value)
+ end)
+
+ it("should properly escape control characters", function()
+ local test_cases = {
+ { str = "line1\nline2", name = "newline" },
+ { str = "tab\there", name = "tab" },
+ { str = "return\rhere", name = "carriage return" },
+ { str = "backspace\bhere", name = "backspace" },
+ { str = "form\ffeed", name = "form feed" },
+ }
+
+ for _, test in ipairs(test_cases) do
+ local data = { value = test.str }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.value, decoded.value)
+ end
+ end)
+
+ it("should handle strings with null bytes", function()
+ -- Note: null bytes may be truncated as C strings are null-terminated
+ local data = { value = "before\x00after" }
+ local encoded = simdjson.encode(data)
+ -- Verify encoding doesn't crash and produces valid JSON
+ assert.is_true(encoded:find("before") ~= nil)
+ local decoded = simdjson.parse(encoded)
+ -- String may be truncated at null byte
+ assert.is_true(decoded.value == "before" or decoded.value == "before\x00after")
+ end)
+
+ it("should handle common control characters safely", function()
+ -- Test specific control characters that should be properly escaped
+ local test_chars = {
+ { char = "\t", name = "tab", escape = "\\t" },
+ { char = "\n", name = "newline", escape = "\\n" },
+ { char = "\r", name = "carriage return", escape = "\\r" },
+ { char = "\b", name = "backspace", escape = "\\b" },
+ { char = "\f", name = "form feed", escape = "\\f" },
+ }
+
+ for _, test in ipairs(test_chars) do
+ local data = { value = "before" .. test.char .. "after" }
+ local encoded = simdjson.encode(data)
+ -- Verify the character is properly escaped in JSON
+ assert.is_true(encoded:find("before") ~= nil)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.value, decoded.value)
+ end
+ end)
+ end)
+
+ describe("Potential XSS and HTML injection", function()
+ it("should handle HTML/XML special characters", function()
+ local data = {
+ html = "",
+ xml = "",
+ tags = "
test
",
+ entities = "<>&"'"
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.html, decoded.html)
+ assert.are.same(data.xml, decoded.xml)
+ assert.are.same(data.tags, decoded.tags)
+ assert.are.same(data.entities, decoded.entities)
+ end)
+
+ it("should not execute embedded JavaScript", function()
+ local malicious = {
+ js = "'; alert('xss'); //",
+ comment = "/* comment */ code",
+ injection = "\"); malicious(); //"
+ }
+ local encoded = simdjson.encode(malicious)
+ -- Verify it's properly escaped
+ assert.is_true(encoded:find("alert") ~= nil)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(malicious.js, decoded.js)
+ end)
+ end)
+
+ describe("Key injection and object vulnerabilities", function()
+ it("should handle keys with special characters", function()
+ local data = {
+ ["key'with'quotes"] = "value1",
+ ['key"with"doublequotes'] = "value2",
+ ["key\\with\\backslash"] = "value3",
+ ["key\nwith\nnewline"] = "value4",
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data["key'with'quotes"], decoded["key'with'quotes"])
+ assert.are.same(data['key"with"doublequotes'], decoded['key"with"doublequotes'])
+ end)
+
+ it("should handle prototype pollution keys", function()
+ -- Common prototype pollution attack keys
+ local data = {
+ ["__proto__"] = "should_be_safe",
+ ["constructor"] = "safe_value",
+ ["prototype"] = "another_safe"
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data["__proto__"], decoded["__proto__"])
+ assert.are.same(data["constructor"], decoded["constructor"])
+ end)
+
+ it("should handle empty string keys", function()
+ local data = { [""] = "empty_key_value" }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data[""], decoded[""])
+ end)
+
+ it("should handle very long keys", function()
+ local long_key = string.rep("a", 10000)
+ local data = { [long_key] = "value" }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data[long_key], decoded[long_key])
+ end)
+ end)
+
+ describe("Number vulnerabilities", function()
+ it("should handle very large integers without overflow", function()
+ local data = {
+ max_int = 9007199254740991, -- Max safe integer in JavaScript
+ min_int = -9007199254740991,
+ large_pos = 9223372036854775807, -- Max int64
+ large_neg = -9223372036854775808, -- Min int64
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ -- Allow for precision loss on very large numbers
+ assert.is_true(math.abs(decoded.max_int - data.max_int) < 1)
+ end)
+
+ it("should handle floating point edge cases", function()
+ local data = {
+ zero = 0.0,
+ very_small = 1e-308,
+ very_large = 1e308,
+ negative = -123.456,
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.zero, decoded.zero)
+ end)
+
+ it("should handle many decimal places", function()
+ local data = { pi = 3.14159265358979323846264338327950288 }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ -- Check that precision is maintained reasonably
+ assert.is_true(math.abs(decoded.pi - 3.141592653589793) < 0.000001)
+ end)
+ end)
+
+ describe("Nested structure vulnerabilities", function()
+ it("should enforce max depth to prevent stack overflow", function()
+ -- Create a very deep structure
+ local function create_deep(depth)
+ if depth == 0 then
+ return "bottom"
+ end
+ return { nested = create_deep(depth - 1) }
+ end
+
+ local deep = create_deep(50)
+
+ -- Should succeed with high limit
+ local success1 = pcall(function()
+ simdjson.encode(deep, { maxDepth = 100 })
+ end)
+ assert.is_true(success1)
+
+ -- Should fail with low limit
+ local success2 = pcall(function()
+ simdjson.encode(deep, { maxDepth = 10 })
+ end)
+ assert.is_false(success2)
+ end)
+
+ it("should handle wide objects without issues", function()
+ -- Create object with many keys
+ local wide = {}
+ for i = 1, 1000 do
+ wide["key" .. i] = "value" .. i
+ end
+ local encoded = simdjson.encode(wide)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(wide["key500"], decoded["key500"])
+ end)
+
+ it("should handle wide arrays without issues", function()
+ local wide = {}
+ for i = 1, 1000 do
+ wide[i] = i
+ end
+ local encoded = simdjson.encode(wide)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(#wide, #decoded)
+ assert.are.same(wide[500], decoded[500])
+ end)
+ end)
+
+ describe("Memory and performance vulnerabilities", function()
+ it("should handle very long strings", function()
+ -- Create a 1MB string
+ local long_string = string.rep("x", 1024 * 1024)
+ local data = { large = long_string }
+ local encoded = simdjson.encode(data)
+ assert.is_true(#encoded > 1024 * 1024)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(#long_string, #decoded.large)
+ end)
+
+ it("should handle arrays with many elements", function()
+ local large_array = {}
+ for i = 1, 10000 do
+ large_array[i] = i
+ end
+ local data = { arr = large_array }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(#large_array, #decoded.arr)
+ assert.are.same(large_array[5000], decoded.arr[5000])
+ end)
+
+ it("should handle mixed large structure", function()
+ local data = {
+ strings = {},
+ numbers = {},
+ objects = {}
+ }
+ for i = 1, 100 do
+ data.strings[i] = string.rep("test", 100)
+ data.numbers[i] = i * 1.5
+ data.objects[i] = { id = i, name = "item" .. i }
+ end
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(#data.strings, #decoded.strings)
+ end)
+ end)
+
+ describe("Unicode and encoding vulnerabilities", function()
+ it("should handle various Unicode characters", function()
+ local data = {
+ emoji = "😀🎉🔥💯",
+ chinese = "你好世界",
+ arabic = "مرحبا",
+ russian = "Привет",
+ mixed = "Hello 世界 🌍",
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.emoji, decoded.emoji)
+ assert.are.same(data.chinese, decoded.chinese)
+ assert.are.same(data.mixed, decoded.mixed)
+ end)
+
+ it("should handle Unicode escapes", function()
+ -- String with Unicode escape sequences
+ local data = { unicode = "test\\u0041\\u0042\\u0043" }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.unicode, decoded.unicode)
+ end)
+
+ it("should handle zero-width and special Unicode", function()
+ local data = {
+ zero_width = "test\226\128\139here", -- Zero-width space (U+200B)
+ rtl_mark = "test\226\128\143mark", -- Right-to-left mark (U+200F)
+ combining = "e\204\129", -- e with acute accent combining (U+0301)
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.zero_width, decoded.zero_width)
+ end)
+ end)
+
+ describe("Malformed or unexpected input", function()
+ it("should handle empty structures", function()
+ local data = {
+ empty_object = {},
+ empty_array = {},
+ empty_string = "",
+ }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(type(decoded.empty_object), "table")
+ assert.are.same(decoded.empty_string, "")
+ end)
+
+ it("should handle boolean edge cases", function()
+ local data = {
+ true_val = true,
+ false_val = false,
+ bool_array = { true, false, true, false },
+ }
+ local encoded = simdjson.encode(data)
+ assert.is_true(encoded:find("true") ~= nil)
+ assert.is_true(encoded:find("false") ~= nil)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(data.true_val, decoded.true_val)
+ assert.are.same(data.false_val, decoded.false_val)
+ end)
+
+ it("should consistently handle repeated encoding", function()
+ local data = { test = "value", num = 42 }
+ local encoded1 = simdjson.encode(data)
+ local encoded2 = simdjson.encode(data)
+ local encoded3 = simdjson.encode(data)
+
+ local decoded1 = simdjson.parse(encoded1)
+ local decoded2 = simdjson.parse(encoded2)
+ local decoded3 = simdjson.parse(encoded3)
+
+ assert.are.same(decoded1.test, decoded2.test)
+ assert.are.same(decoded2.test, decoded3.test)
+ end)
+ end)
+
+ describe("SQL and NoSQL injection patterns", function()
+ it("should safely handle SQL injection patterns", function()
+ local injection_patterns = {
+ "'; DROP TABLE users; --",
+ "1' OR '1'='1",
+ "admin'--",
+ "' OR 1=1--",
+ "'; EXEC sp_MSForEachTable 'DROP TABLE ?'; --",
+ }
+
+ for _, pattern in ipairs(injection_patterns) do
+ local data = { query = pattern }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(pattern, decoded.query)
+ end
+ end)
+
+ it("should safely handle NoSQL injection patterns", function()
+ local nosql_patterns = {
+ "{'$gt': ''}",
+ "{'$ne': null}",
+ "{'$where': 'this.password.length > 0'}",
+ }
+
+ for _, pattern in ipairs(nosql_patterns) do
+ local data = { filter = pattern }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(pattern, decoded.filter)
+ end
+ end)
+ end)
+
+ describe("Path traversal and file inclusion", function()
+ it("should handle path traversal strings", function()
+ local paths = {
+ "../../etc/passwd",
+ "..\\..\\windows\\system32",
+ "/etc/passwd",
+ "C:\\Windows\\System32\\config\\SAM",
+ "../../../../../etc/shadow",
+ }
+
+ for _, path in ipairs(paths) do
+ local data = { path = path }
+ local encoded = simdjson.encode(data)
+ local decoded = simdjson.parse(encoded)
+ assert.are.same(path, decoded.path)
+ end
+ end)
+ end)
+end)
diff --git a/spec/encode_spec.lua b/spec/encode_spec.lua
new file mode 100644
index 0000000..dbe9471
--- /dev/null
+++ b/spec/encode_spec.lua
@@ -0,0 +1,414 @@
+local simdjson = require("simdjson")
+local cjson = require("cjson")
+
+
+describe("encode numbers correctly", function()
+ it("should encode numbers the same as cjson", function()
+ local testData = {
+ float = 1.2,
+ min_signed_integer = -9223372036854775808,
+ max_signed_integer = 9223372036854775807,
+ one_above_max_signed_integer = 9223372036854775808,
+ min_unsigned_integer = 0,
+ max_unsigned_integer = 18446744073709551615
+ }
+
+ for k, v in pairs(testData) do
+ local td = { [k] = v }
+ local simdjsonEncoded = simdjson.encode(td)
+ local cjsonEncoded = cjson.encode(td)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+
+ local cjsonEncode = cjson.encode(testData)
+ local simdjsonEncode = simdjson.encode(testData)
+ assert.are.same(cjsonEncode, simdjsonEncode)
+ end)
+
+ it("should encode special float values", function()
+ local testCases = {
+ { value = 0.0, name = "zero" },
+ { value = 3.14159265358979, name = "pi" },
+ { value = 2.718281828459045, name = "e" },
+ { value = 1.23e-10, name = "small scientific" },
+ { value = 1.23e10, name = "large scientific" },
+ { value = -123.456, name = "negative float" },
+ }
+
+ for _, test in ipairs(testCases) do
+ local data = { value = test.value }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+ end)
+
+ it("should encode array of numbers", function()
+ local numbers = { 1, 2, 3, 4, 5, -1, -2, 0, 1.5, 2.7 }
+ local data = { numbers = numbers }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+end)
+
+describe("encode strings correctly", function()
+ it("should encode simple strings", function()
+ local testCases = {
+ { str = "hello", name = "simple" },
+ { str = "", name = "empty" },
+ { str = "hello world", name = "with space" },
+ { str = "123", name = "numeric string" },
+ }
+
+ for _, test in ipairs(testCases) do
+ local data = { str = test.str }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+ end)
+
+ it("should encode strings with special characters", function()
+ local testCases = {
+ { str = "hello\nworld", name = "newline" },
+ { str = "hello\tworld", name = "tab" },
+ { str = "hello\rworld", name = "carriage return" },
+ { str = "hello\"world", name = "quote" },
+ { str = "hello\\world", name = "backslash" },
+ }
+
+ for _, test in ipairs(testCases) do
+ local data = { str = test.str }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+ end)
+
+ it("should encode forward slash without escaping", function()
+ -- simdjson doesn't escape forward slashes (which is valid JSON)
+ local data = { str = "hello/world" }
+ local simdjsonEncoded = simdjson.encode(data)
+ assert.are.same('{"str":"hello/world"}', simdjsonEncoded)
+ end)
+
+ it("should encode unicode strings", function()
+ local testCases = {
+ { str = "Hello 世界", name = "chinese" },
+ { str = "Hello मुndi", name = "hindi" },
+ { str = "Hello 🌍", name = "emoji" },
+ { str = "café", name = "accented" },
+ }
+
+ for _, test in ipairs(testCases) do
+ local data = { str = test.str }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+ end)
+
+ it("should encode array of strings", function()
+ local strings = { "one", "two", "three", "", "with space" }
+ local data = { strings = strings }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+end)
+
+describe("encode booleans correctly", function()
+ it("should encode boolean values", function()
+ local data1 = { value = true }
+ assert.are.same(cjson.encode(data1), simdjson.encode(data1))
+
+ local data2 = { value = false }
+ assert.are.same(cjson.encode(data2), simdjson.encode(data2))
+ end)
+
+ it("should encode boolean arrays", function()
+ local bools = { true, false, true, false }
+ local data = { bools = bools }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode mixed boolean and other types", function()
+ local mixed = { true, 1, "test", false, 2.5 }
+ local data = { mixed = mixed }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+end)
+
+describe("encode arrays correctly", function()
+ it("should encode empty arrays", function()
+ local data = { arr = {} }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode nested arrays", function()
+ local data = {
+ nested = {
+ { 1, 2, 3 },
+ { 4, 5, 6 },
+ { 7, 8, 9 }
+ }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode deeply nested arrays", function()
+ local data = { arr = { { { { { 1 } } } } } }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode arrays with mixed types", function()
+ local data = {
+ mixed = { 1, "two", 3.0, true, false, { nested = "value" } }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+end)
+
+describe("encode objects correctly", function()
+ it("should encode empty objects", function()
+ local data = {}
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode objects with string keys", function()
+ local data = {
+ key1 = "value1",
+ key2 = "value2",
+ key3 = "value3"
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ -- Note: key order may differ, so we decode and compare
+ local simdjsonDecoded = simdjson.parse(simdjsonEncoded)
+ local cjsonDecoded = cjson.decode(cjsonEncoded)
+ assert.are.same(cjsonDecoded, simdjsonDecoded)
+ end)
+
+ it("should encode objects with numeric keys", function()
+ local data = {
+ ["1"] = "one",
+ ["2"] = "two",
+ ["3"] = "three"
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ local simdjsonDecoded = simdjson.parse(simdjsonEncoded)
+ local cjsonDecoded = cjson.decode(cjsonEncoded)
+ assert.are.same(cjsonDecoded, simdjsonDecoded)
+ end)
+
+ it("should encode nested objects", function()
+ local data = {
+ outer = {
+ middle = {
+ inner = {
+ value = "deep"
+ }
+ }
+ }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should encode objects with mixed value types", function()
+ local data = {
+ string = "value",
+ number = 42,
+ float = 3.14,
+ bool_true = true,
+ bool_false = false,
+ array = { 1, 2, 3 },
+ object = { nested = "value" }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ local simdjsonDecoded = simdjson.parse(simdjsonEncoded)
+ local cjsonDecoded = cjson.decode(cjsonEncoded)
+ assert.are.same(cjsonDecoded, simdjsonDecoded)
+ end)
+end)
+
+describe("encode complex json types", function()
+ it("should encode complex json types the same as cjson", function()
+ local testData = {
+ object = {
+ key1 = "value1",
+ key2 = 2,
+ key3 = { nestedKey = "nestedValue" }
+ },
+ mixed = {
+ "string",
+ 123,
+ true,
+ { nestedArray = { 1, 2, 3 } },
+ { nestedObject = { key = "value" } }
+ },
+ mixed_complex = {
+ array = { "abc", 123, true },
+ object = { key = "value", number = 456 },
+ nested_object = {
+ inner_key = { 1, 2, 3, { deep_key = "deep_value" } }
+ }
+ },
+ bools = { true, false, true, false }
+ }
+
+ for k, v in pairs(testData) do
+ local td = { [k] = v }
+ local simdjsonEncoded = simdjson.encode(td)
+ local cjsonEncoded = cjson.encode(td)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end
+ end)
+
+ it("should encode complex nested structures", function()
+ local data = {
+ users = {
+ {
+ id = 1,
+ name = "Alice",
+ active = true,
+ scores = { 95, 87, 92 }
+ },
+ {
+ id = 2,
+ name = "Bob",
+ active = false,
+ scores = { 78, 85, 90 }
+ }
+ },
+ metadata = {
+ version = "1.0",
+ count = 2
+ }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should handle arrays of objects", function()
+ local data = {
+ items = {
+ { id = 1, name = "Item 1" },
+ { id = 2, name = "Item 2" },
+ { id = 3, name = "Item 3" }
+ }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should handle objects with array values", function()
+ local data = {
+ numbers = { 1, 2, 3, 4, 5 },
+ strings = { "a", "b", "c" },
+ booleans = { true, false, true }
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ local simdjsonDecoded = simdjson.parse(simdjsonEncoded)
+ local cjsonDecoded = cjson.decode(cjsonEncoded)
+ assert.are.same(cjsonDecoded, simdjsonDecoded)
+ end)
+end)
+
+describe("encode edge cases", function()
+ it("should handle very long strings", function()
+ local longString = string.rep("a", 10000)
+ local data = { str = longString }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should handle large arrays", function()
+ local largeArray = {}
+ for i = 1, 1000 do
+ largeArray[i] = i
+ end
+ local data = { arr = largeArray }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+
+ it("should handle sparse arrays as objects", function()
+ local sparseArray = {}
+ sparseArray[1] = "first"
+ sparseArray[5] = "fifth"
+ sparseArray[10] = "tenth"
+ local data = { sparse = sparseArray }
+
+ -- simdjson treats sparse arrays as objects
+ local simdjsonEncoded = simdjson.encode(data)
+ assert.is_true(simdjsonEncoded:find('"sparse"') ~= nil)
+
+ -- Verify it can be decoded back
+ local decoded = simdjson.parse(simdjsonEncoded)
+ assert.is_not_nil(decoded.sparse)
+ end)
+
+ it("should encode keys with special characters", function()
+ local data = {
+ ["key with spaces"] = "value1",
+ ["key-with-dashes"] = "value2",
+ ["key_with_underscores"] = "value3",
+ ["key.with.dots"] = "value4"
+ }
+ local simdjsonEncoded = simdjson.encode(data)
+ local cjsonEncoded = cjson.encode(data)
+ local simdjsonDecoded = simdjson.parse(simdjsonEncoded)
+ local cjsonDecoded = cjson.decode(cjsonEncoded)
+ assert.are.same(cjsonDecoded, simdjsonDecoded)
+ end)
+
+ it("should roundtrip encode and decode", function()
+ local original = {
+ name = "Test",
+ value = 42,
+ active = true,
+ items = { 1, 2, 3 },
+ nested = { key = "value" }
+ }
+ local encoded = simdjson.encode(original)
+ local decoded = simdjson.parse(encoded)
+
+ -- Compare individual fields since table equality is by reference
+ assert.are.same(original.name, decoded.name)
+ assert.are.same(original.value, decoded.value)
+ assert.are.same(original.active, decoded.active)
+ assert.are.same(original.items[1], decoded.items[1])
+ assert.are.same(original.nested.key, decoded.nested.key)
+ end)
+
+ it("basic string", function()
+ local original = "test string"
+ local simdjsonEncoded = simdjson.encode(original)
+ local cjsonEncoded = cjson.encode(original)
+ assert.are.same(cjsonEncoded, simdjsonEncoded)
+ end)
+end)
diff --git a/spec/performance_spec.lua b/spec/performance_spec.lua
new file mode 100644
index 0000000..7807a2a
--- /dev/null
+++ b/spec/performance_spec.lua
@@ -0,0 +1,399 @@
+local simdjson = require("simdjson")
+local cjson = require("cjson")
+
+-- Track wins
+local simdjson_wins = 0
+local cjson_wins = 0
+local total_tests = 0
+local iterations = 10000
+
+-- Helper function to measure time
+local function measure_time(func, iterations)
+ iterations = iterations or 1
+ collectgarbage("collect") -- Clean up before measurement
+ local start = os.clock()
+ for i = 1, iterations do func() end
+ local elapsed = os.clock() - start
+ return elapsed, elapsed / iterations
+end
+
+-- Helper to format numbers
+local function format_number(num)
+ if num < 0.001 then
+ return string.format("%.6f ms", num * 1000)
+ elseif num < 1 then
+ return string.format("%.3f ms", num * 1000)
+ else
+ return string.format("%.3f s", num)
+ end
+end
+
+-- Helper to show comparison
+local function show_comparison(name, simdjson_time, cjson_time)
+ local speedup = cjson_time / simdjson_time
+ local winner = speedup > 1 and "simdjson" or "cjson"
+ local ratio = speedup > 1 and speedup or (1 / speedup)
+
+ -- Track wins
+ total_tests = total_tests + 1
+ if winner == "simdjson" then
+ simdjson_wins = simdjson_wins + 1
+ else
+ cjson_wins = cjson_wins + 1
+ end
+
+ -- Add newline before first result to separate from test marker
+ if total_tests == 1 then print() end
+
+ print(string.format(
+ " %-30s | simdjson: %s | cjson: %s | %s is %.2fx faster", name,
+ format_number(simdjson_time), format_number(cjson_time), winner,
+ ratio))
+end
+
+describe("Performance Comparison: simdjson vs cjson", function()
+ it(string.format("Simple Object Encoding (%s iterations)", iterations),
+ function()
+ local simple_data = { name = "test", value = 42, active = true }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(simple_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(simple_data)
+ end, iterations)
+ show_comparison("Simple object", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Array Encoding (%s iterations)", iterations), function()
+ local array_data = {}
+ for i = 1, 100 do array_data[i] = i end
+ array_data = { numbers = array_data }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(array_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(array_data)
+ end, iterations)
+ show_comparison("100-element array", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Nested Object Encoding (%s iterations)", iterations),
+ function()
+ local nested_data = {
+ level1 = { level2 = { level3 = { level4 = { value = "deep" } } } }
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(nested_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(nested_data)
+ end, iterations)
+ show_comparison("5-level nesting", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Nested Object Encoding (%s iterations)", iterations),
+ function()
+ local nested_data = {
+ level1 = {
+ level2 = {
+ level3 = {
+ level4 = {
+ level5 = {
+ level6 = {
+ level7 = {
+ level8 = {
+ level9 = {
+ level10 = { value = "deep" }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(nested_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(nested_data)
+ end, iterations)
+ show_comparison("10-level nesting", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("String-Heavy Data (%s iterations)", iterations),
+ function()
+ local string_data = {
+ str1 = "The quick brown fox jumps over the lazy dog",
+ str2 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit",
+ str3 = "Pack my box with five dozen liquor jugs"
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(string_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(string_data)
+ end, iterations)
+ show_comparison("String-heavy object", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Mixed Type Array (%s iterations)", iterations), function()
+ local mixed_array = {
+ data = { 1, "two", 3.0, true, false, { nested = "value" } }
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(mixed_array)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(mixed_array)
+ end, iterations)
+ show_comparison("Mixed type array", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Large Object (%s iterations)", iterations), function()
+ local large_object = {}
+ for i = 1, 100 do large_object["key" .. i] = "value" .. i end
+ large_object = { data = large_object }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(large_object)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(large_object)
+ end, iterations)
+ show_comparison("100-key object", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Large Array (%s iterations)", iterations), function()
+ local large_array = {}
+ for i = 1, 1000 do large_array[i] = i end
+ large_array = { data = large_array }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(large_array)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(large_array)
+ end, iterations)
+
+ show_comparison("1000-element array", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Large Objects (%s iterations)", iterations), function()
+ local large_array = {}
+ for i = 1, 1000 do large_array["a" .. i] = i end
+ large_array = { data = large_array }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(large_array)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(large_array)
+ end, iterations)
+
+ show_comparison("1000-K/V pair object", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Complex Realistic Data (%s iterations)", iterations),
+ function()
+ local realistic_data = {
+ users = {
+ {
+ id = 1,
+ name = "Alice Smith",
+ email = "alice@example.com",
+ active = true,
+ score = 95.5
+ }, {
+ id = 2,
+ name = "Bob Jones",
+ email = "bob@example.com",
+ active = false,
+ score = 87.3
+ }, {
+ id = 3,
+ name = "Carol White",
+ email = "carol@example.com",
+ active = true,
+ score = 92.1
+ }
+ },
+ metadata = { version = "1.0", timestamp = 1704197400, count = 3 },
+ settings = { theme = "dark", language = "en", notifications = true }
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(realistic_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(realistic_data)
+ end, iterations)
+ show_comparison("Realistic complex data", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Simple JSON Parsing (%s iterations)", iterations),
+ function()
+ local simple_json = '{"name":"test","value":42,"active":true}'
+
+ local simdjson_time = measure_time(function()
+ simdjson.parse(simple_json)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.decode(simple_json)
+ end, 10000)
+
+ show_comparison("Simple parsing", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Array Parsing (%s iterations)", iterations), function()
+ local array_json =
+ '{"numbers":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}'
+
+ local simdjson_time = measure_time(function()
+ simdjson.parse(array_json)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.decode(array_json)
+ end, iterations)
+
+ show_comparison("Array parsing", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Nested Object Parsing (%s iterations)", iterations),
+ function()
+ local nested_json =
+ '{"level1":{"level2":{"level3":{"level4":{"value":"deep"}}}}}'
+
+ local simdjson_time = measure_time(function()
+ simdjson.parse(nested_json)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.decode(nested_json)
+ end, iterations)
+ show_comparison("Nested parsing", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Large JSON Parsing (%s iterations)", iterations),
+ function()
+ local large_json_data = {}
+ for i = 1, 100 do large_json_data["key" .. i] = "value" .. i end
+ local large_json = cjson.encode({ data = large_json_data })
+
+ local simdjson_time = measure_time(function()
+ simdjson.parse(large_json)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.decode(large_json)
+ end, iterations)
+ show_comparison("Large object parsing", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Round-trip: Encode + Parse (%s iterations)", iterations),
+ function()
+ local roundtrip_data = {
+ id = 123,
+ name = "Test User",
+ values = { 1, 2, 3, 4, 5 },
+ metadata = { active = true, score = 95.5 }
+ }
+
+ local simdjson_time = measure_time(function()
+ local encoded = simdjson.encode(roundtrip_data)
+ simdjson.parse(encoded)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ local encoded = cjson.encode(roundtrip_data)
+ cjson.decode(encoded)
+ end, iterations)
+
+ show_comparison("Round-trip", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Special Characters (%s iterations)", iterations),
+ function()
+ local special_chars_data = {
+ escaped = 'test"with"quotes\nand\nnewlines\ttabs',
+ unicode = "Hello 世界 🌍"
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(special_chars_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(special_chars_data)
+ end, iterations)
+ show_comparison("Special characters", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Boolean Arrays (%s iterations)", iterations), function()
+ local bool_data = {
+ flags = { true, false, true, false, true, false, true, false }
+ }
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(bool_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(bool_data)
+ end, iterations)
+
+ show_comparison("Boolean arrays", simdjson_time, cjson_time)
+ end)
+
+ it(string.format("Large Boolean Array (%s iterations)", iterations), function()
+ local bool_data = {}
+ local choices = { true, false }
+ for i = 1, 1000 do bool_data[i] = choices[math.random(2)] end
+
+
+ local simdjson_time = measure_time(function()
+ simdjson.encode(bool_data)
+ end, iterations)
+
+ local cjson_time = measure_time(function()
+ cjson.encode(bool_data)
+ end, iterations)
+
+ show_comparison("Large boolean arrays", simdjson_time, cjson_time)
+ end)
+
+ -- Print summary after all tests
+ after_each(function() end) -- No-op to ensure we're in test context
+
+ teardown(function()
+ print("\n" .. string.rep("=", 80))
+ print("Using SIMD implementation: " .. simdjson.activeImplementation())
+ print(string.format("Performance Summary: %d total tests", total_tests))
+ print(string.rep("=", 80))
+ print(string.format(" simdjson wins: %d (%.1f%%)", simdjson_wins,
+ (simdjson_wins / total_tests) * 100))
+ print(string.format(" cjson wins: %d (%.1f%%)", cjson_wins,
+ (cjson_wins / total_tests) * 100))
+ print(string.rep("=", 80))
+ end)
+end)
diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp
index 4004b88..1fd6bef 100644
--- a/src/luasimdjson.cpp
+++ b/src/luasimdjson.cpp
@@ -1,3 +1,4 @@
+#include
#include
#include
@@ -11,12 +12,23 @@
#define NDEBUG
#define __OPTIMIZE__ 1
-#include "simdjson.h"
#include "luasimdjson.h"
+#include "simdjson.h"
#define LUA_SIMDJSON_NAME "simdjson"
#define LUA_SIMDJSON_VERSION "0.0.8"
+// keys encode max depth configuration.
+#define LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY "simdjson.maxEncodeDepth"
+#define DEFAULT_MAX_ENCODE_DEPTH simdjson::DEFAULT_MAX_DEPTH
+
+// Encode buffer size reservation configuration
+#define LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY "simdjson.encodeBufferSize"
+#define DEFAULT_ENCODE_BUFFER_SIZE (16 * 1024) // 16KB
+#define DEFAULT_MAX_ENCODE_BUFFER_SIZE simdjson::SIMDJSON_MAXSIZE_BYTES
+// Max size for number to string conversion buffer
+#define ENCODE_NUMBER_BUFFER_SIZE 32
+
using namespace simdjson;
#if !defined(luaL_newlibtable) && (!defined LUA_VERSION_NUM || LUA_VERSION_NUM <= 501)
@@ -25,11 +37,9 @@ using namespace simdjson;
** Stolen from: http://lua-users.org/wiki/CompatibilityWithLuaFive
** Adapted from Lua 5.2.0
*/
-static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
-{
+static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) {
luaL_checkstack(L, nup + 1, "too many upvalues");
- for (; l->name != NULL; l++)
- { /* fill the table with given functions */
+ for (; l->name != NULL; l++) { /* fill the table with given functions */
int i;
lua_pushstring(L, l->name);
for (i = 0; i < nup; i++) /* copy upvalues to the top */
@@ -43,112 +53,102 @@ static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
ondemand::parser ondemand_parser;
simdjson::padded_string jsonbuffer;
+thread_local simdjson::builder::string_builder *encode_buffer = nullptr; // Reused across encode() calls
+thread_local size_t encode_buffer_size = 0; // Track current buffer size
-template
-void convert_ondemand_element_to_table(lua_State *L, T &element)
-{
+template void convert_ondemand_element_to_table(lua_State *L, T &element) {
static_assert(std::is_base_of::value || std::is_base_of::value, "type parameter must be document or value");
- switch (element.type())
- {
-
- case ondemand::json_type::array:
- {
- int count = 1;
- lua_newtable(L);
-
- for (ondemand::value child : element.get_array())
- {
- lua_pushinteger(L, count);
- convert_ondemand_element_to_table(L, child);
- lua_settable(L, -3);
- count = count + 1;
- }
- break;
- }
-
- case ondemand::json_type::object:
- lua_newtable(L);
- for (ondemand::field field : element.get_object())
- {
- std::string_view s = field.unescaped_key();
- lua_pushlstring(L, s.data(), s.size());
- convert_ondemand_element_to_table(L, field.value());
- lua_settable(L, -3);
- }
- break;
+ switch (element.type()) {
+ case ondemand::json_type::array: {
+ int count = 1;
+ lua_newtable(L);
- case ondemand::json_type::number:
- {
- ondemand::number number = element.get_number();
- ondemand::number_type number_type = number.get_number_type();
- switch (number_type)
- {
- case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number:
- lua_pushnumber(L, element.get_double());
+ for (ondemand::value child : element.get_array()) {
+ lua_pushinteger(L, count);
+ convert_ondemand_element_to_table(L, child);
+ lua_settable(L, -3);
+ count = count + 1;
+ }
break;
+ }
- case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer:
- lua_pushinteger(L, element.get_int64());
+ case ondemand::json_type::object:
+ lua_newtable(L);
+ for (ondemand::field field : element.get_object()) {
+ std::string_view s = field.unescaped_key();
+ lua_pushlstring(L, s.data(), s.size());
+ convert_ondemand_element_to_table(L, field.value());
+ lua_settable(L, -3);
+ }
break;
- case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer:
- {
-// a uint64 can be greater than an int64, so we must check how large and pass as a number
-// if larger but LUA_MAXINTEGER (which is only defined in 5.3+)
+ case ondemand::json_type::number: {
+ ondemand::number number = element.get_number();
+ ondemand::number_type number_type = number.get_number_type();
+ switch (number_type) {
+ case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number:
+ lua_pushnumber(L, element.get_double());
+ break;
+
+ case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer:
+ lua_pushinteger(L, element.get_int64());
+ break;
+
+ case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: {
+// a uint64 can be greater than an int64, so we must check how large and pass as
+// a number if larger but LUA_MAXINTEGER (which is only defined in 5.3+)
#if defined(LUA_MAXINTEGER)
- uint64_t actual_value = element.get_uint64();
- if (actual_value > LUA_MAXINTEGER)
- {
- lua_pushnumber(L, actual_value);
- }
- else
- {
- lua_pushinteger(L, actual_value);
- }
+ uint64_t actual_value = element.get_uint64();
+ if (actual_value > LUA_MAXINTEGER) {
+ lua_pushnumber(L, actual_value);
+ } else {
+ lua_pushinteger(L, actual_value);
+ }
#else
- lua_pushnumber(L, element.get_double());
+ lua_pushnumber(L, element.get_double());
#endif
+ break;
+ }
+
+ case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer:
+ lua_pushnumber(L, element.get_double());
+ break;
+ }
break;
}
- case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer:
- lua_pushnumber(L, element.get_double());
+ case ondemand::json_type::string: {
+ std::string_view s = element.get_string();
+ lua_pushlstring(L, s.data(), s.size());
break;
}
- break;
- }
- case ondemand::json_type::string:
- {
- std::string_view s = element.get_string();
- lua_pushlstring(L, s.data(), s.size());
- break;
- }
-
- case ondemand::json_type::boolean:
- lua_pushboolean(L, element.get_bool());
- break;
+ case ondemand::json_type::boolean:
+ lua_pushboolean(L, element.get_bool());
+ break;
- case ondemand::json_type::null:
- // calling is_null().value() will trigger an exception if the value is invalid
- if (element.is_null().value())
- {
- lua_pushlightuserdata(L, NULL);
- }
- break;
+ case ondemand::json_type::null:
+ // calling is_null().value() will trigger an exception if the value
+ // is invalid
+ if (element.is_null().value()) {
+ lua_pushlightuserdata(L, NULL);
+ }
+ break;
- case ondemand::json_type::unknown:
- default:
- luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported type encountered");
- break;
+ case ondemand::json_type::unknown:
+ default:
+ luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported "
+ "type "
+ "encountered");
+ break;
}
}
-// from https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding
+// from
+// https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding
// Returns the default size of the page in bytes on this system.
-long page_size()
-{
+long page_size() {
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
@@ -162,71 +162,54 @@ long page_size()
// allows us to reuse a json buffer pretty safely
// Returns true if the buffer + len + simdjson::SIMDJSON_PADDING crosses the
// page boundary.
-bool need_allocation(const char *buf, size_t len)
-{
- return ((reinterpret_cast(buf + len - 1) % page_size()) <
- simdjson::SIMDJSON_PADDING);
+bool need_allocation(const char *buf, size_t len) {
+ return ((reinterpret_cast(buf + len - 1) % page_size()) < simdjson::SIMDJSON_PADDING);
}
-simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len,
- simdjson::padded_string &jsonbuffer)
-{
- if (need_allocation(buf, len))
- { // unlikely case
+simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len, simdjson::padded_string &jsonbuffer) {
+ if (need_allocation(buf, len)) { // unlikely case
jsonbuffer = simdjson::padded_string(buf, len);
return jsonbuffer;
- }
- else
- { // no reallcation needed (very likely)
- return simdjson::padded_string_view(buf, len,
- len + simdjson::SIMDJSON_PADDING);
+ } else { // no reallcation needed (very likely)
+ return simdjson::padded_string_view(buf, len, len + simdjson::SIMDJSON_PADDING);
}
}
-static int parse(lua_State *L)
-{
+static int parse(lua_State *L) {
size_t json_str_len;
const char *json_str = luaL_checklstring(L, 1, &json_str_len);
ondemand::document doc;
- try
- {
+ try {
// makes a padded_string_view for a bit of quickness!
doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer));
convert_ondemand_element_to_table(L, doc);
- }
- catch (simdjson::simdjson_error &error)
- {
+ } catch (simdjson::simdjson_error &error) {
luaL_error(L, error.what());
}
return 1;
}
-static int parse_file(lua_State *L)
-{
+static int parse_file(lua_State *L) {
const char *json_file = luaL_checkstring(L, 1);
padded_string json_string;
ondemand::document doc;
- try
- {
+ try {
json_string = padded_string::load(json_file);
doc = ondemand_parser.iterate(json_string);
convert_ondemand_element_to_table(L, doc);
- }
- catch (simdjson::simdjson_error &error)
- {
+ } catch (simdjson::simdjson_error &error) {
luaL_error(L, error.what());
}
return 1;
}
-static int active_implementation(lua_State *L)
-{
+static int active_implementation(lua_State *L) {
const auto &implementation = simdjson::get_active_implementation();
std::string name = implementation->name();
const std::string description = implementation->description();
@@ -237,113 +220,503 @@ static int active_implementation(lua_State *L)
return 1;
}
+// Add forward declaration near the top after includes
+static void serialize_data(lua_State *L, int current_depth, int max_depth, simdjson::builder::string_builder &builder);
+
+// Helper function to parse encode options from Lua table
+static void parse_encode_options(lua_State *L, int table_index, int &max_depth, size_t &desired_buffer_size) {
+ // Check for maxDepth in options table
+ lua_getfield(L, table_index, "maxDepth");
+ if (!lua_isnil(L, -1)) {
+ if (!lua_isnumber(L, -1)) {
+ luaL_error(L, "maxDepth option must be a number");
+ }
+ max_depth = lua_tointeger(L, -1);
+ if (max_depth < 1) {
+ luaL_error(L, "maxDepth must be at least 1");
+ }
+ }
+ lua_pop(L, 1);
+
+ // Check for bufferSize in options table
+ lua_getfield(L, table_index, "bufferSize");
+ if (!lua_isnil(L, -1)) {
+ if (!lua_isnumber(L, -1)) {
+ luaL_error(L, "bufferSize option must be a number");
+ }
+ int buffer_size = lua_tointeger(L, -1);
+ if (buffer_size < 1) {
+ luaL_error(L, "bufferSize must be at least 1");
+ }
+ if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) {
+ luaL_error(L, "bufferSize must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE);
+ }
+ desired_buffer_size = buffer_size;
+ }
+ lua_pop(L, 1);
+}
+
+// Helper function to get max encode depth from registry (with caching for performance)
+static int get_max_depth(lua_State *L) {
+ lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY);
+ lua_gettable(L, LUA_REGISTRYINDEX);
+
+ int max_depth = DEFAULT_MAX_ENCODE_DEPTH;
+ if (lua_isnumber(L, -1)) {
+ max_depth = lua_tointeger(L, -1);
+ }
+ lua_pop(L, 1);
+
+ return max_depth;
+}
+
+// Helper function to set max encode depth in registry (and update cache)
+static void set_max_depth(lua_State *L, int max_depth) {
+ lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY);
+ lua_pushinteger(L, max_depth);
+ lua_settable(L, LUA_REGISTRYINDEX);
+}
+
+// Helper function to get encode buffer size from registry (with caching for performance)
+static size_t get_encode_buffer_size(lua_State *L) {
+ lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY);
+ lua_gettable(L, LUA_REGISTRYINDEX);
+
+ size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE;
+ if (lua_isnumber(L, -1)) {
+ buffer_size = lua_tointeger(L, -1);
+ }
+ lua_pop(L, 1);
+
+ return buffer_size;
+}
+
+// Helper function to set encode buffer size in registry (and update cache)
+static void set_encode_buffer_size(lua_State *L, size_t buffer_size) {
+ lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY);
+ lua_pushinteger(L, buffer_size);
+ lua_settable(L, LUA_REGISTRYINDEX);
+}
+
+// Check if table on stack top is a valid array and return its length
+// Returns -1 if not an array, otherwise returns maximum index
+static int get_table_array_size(lua_State *L) {
+ double key_num;
+ int max_index = 0;
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ // Check if key is a number
+ if (lua_type(L, -2) == LUA_TNUMBER) {
+ key_num = lua_tonumber(L, -2);
+ // Check if it's a positive integer
+ if (std::floor(key_num) == key_num && key_num >= 1) {
+ if (static_cast(key_num) > max_index) {
+ max_index = static_cast(key_num);
+ }
+ lua_pop(L, 1);
+ continue;
+ }
+ }
+
+ // Non-integer key found - not an array
+ lua_pop(L, 2);
+ return -1;
+ }
+
+ // Return max_index if we found any valid integer keys (allows sparse arrays)
+ return max_index;
+}
+
+// Helper function to format a number as a string
+// Returns pointer to thread-local buffer and length
+inline std::pair format_number_as_string(lua_State *L, int index) {
+ thread_local char buffer[ENCODE_NUMBER_BUFFER_SIZE];
+ size_t len;
+
+ // JSON numbers are represented as doubles, which have limited precision
+ // for integers beyond 2^53. Check this first regardless of Lua version.
+#if defined(LUA_MAXINTEGER)
+ const double max_safe_int = LUA_MAXINTEGER;
+#else
+ const double max_safe_int = 9007199254740992.0; // 2^53
+#endif
+
+#if LUA_VERSION_NUM >= 503
+ // Lua 5.3+ has native integer type
+ if (lua_isinteger(L, index)) {
+ lua_Integer num = lua_tointeger(L, index);
+ // Check if the integer fits safely in a JSON number (double)
+ if (num > -max_safe_int && num < max_safe_int) {
+ // Optimized: Use std::to_string for faster integer conversion
+ std::string str = std::to_string(num);
+ len = str.size();
+ if (len < sizeof(buffer)) {
+ memcpy(buffer, str.c_str(), len + 1); // Include null terminator for safety
+ return {buffer, len};
+ } else {
+ // Fallback for very large numbers (rare)
+ len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num);
+ return {buffer, len};
+ }
+ }
+ // Too large for safe integer representation, format as float
+ len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num);
+ return {buffer, len};
+ }
+#else
+ // For Lua 5.1/5.2, check if the number is an integer value
+ {
+ double num = lua_tonumber(L, index);
+ if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) {
+ if (num > -max_safe_int && num < max_safe_int) {
+ // Optimized: Use std::to_string for integers
+ std::string str = std::to_string(static_cast(num));
+ len = str.size();
+ if (len < sizeof(buffer)) {
+ memcpy(buffer, str.c_str(), len + 1);
+ return {buffer, len};
+ } else {
+ len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num));
+ return {buffer, len};
+ }
+ }
+ }
+ }
+#endif
+
+ // For floats: Use snprintf to maintain original formatting (e.g., preserve trailing zeros)
+ lua_Number num = lua_tonumber(L, index);
+ len = snprintf(buffer, sizeof(buffer), "%.14g", num);
+ return {buffer, len};
+}
+
+// Serialize a Lua boolean as a JSON boolean
+inline void serialize_append_bool(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) {
+ // check if it is really a boolean
+ if (lua_isboolean(L, lindex)) {
+ if (lua_toboolean(L, lindex)) {
+// Use append_raw with string_view for batched append (more efficient than multiple char appends)
+#if __cplusplus >= 202002L
+ builder.append(true);
+#else
+ builder.append_raw(std::string_view("true", 4));
+#endif
+ } else {
+#if __cplusplus >= 202002L
+ builder.append(false);
+#else
+ builder.append_raw(std::string_view("false", 5));
+#endif
+ }
+ } else {
+ builder.append_null();
+ }
+};
+
+// Serialize a Lua number as a JSON number
+static void serialize_append_number(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) {
+ auto num_result = format_number_as_string(L, lindex);
+ const char *num_str = num_result.first;
+ size_t len = num_result.second;
+ // Use append_raw with string_view for numbers (no quotes)
+ builder.append_raw(std::string_view(num_str, len));
+};
+
+// Serialize a Lua string with proper JSON escaping
+static void serialize_append_string(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) {
+ size_t len;
+ const char *str = lua_tolstring(L, lindex, &len);
+ builder.escape_and_append_with_quotes(str);
+};
+
+// Serialize a Lua table with integer indices as a JSON array, handling sparse arrays with nulls for missing indices.
+static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int table_index, int array_size,
+ int current_depth, int max_depth) {
+ bool first = true;
+ // Get the actual stack index if using relative indexing (but not registry)
+ if (table_index < 0 && table_index != LUA_REGISTRYINDEX) {
+ table_index = lua_gettop(L) + table_index + 1;
+ }
+
+ builder.start_array();
+
+ for (int i = 1; i <= array_size; i++) {
+ if (!first) {
+ builder.append_comma();
+ }
+ first = false;
+
+ // Push the value at index i onto the stack (or nil if missing)
+ lua_rawgeti(L, table_index, i);
+
+ // If the value is nil, encode as null; otherwise, serialize normally
+ if (lua_isnil(L, -1)) {
+ builder.append_null();
+ } else {
+ serialize_data(L, current_depth, max_depth, builder);
+ }
+
+ // Pop the value from the stack
+ lua_pop(L, 1);
+ }
+
+ builder.end_array();
+}
+
+// Serialize a Lua table as a JSON object.
+static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int current_depth, int max_depth) {
+ builder.start_object();
+ bool first = true;
+
+ // Start iteration with nil key
+ lua_pushnil(L);
+
+ while (lua_next(L, -2) != 0) {
+ if (!first) {
+ builder.append_comma();
+ }
+ first = false;
+
+ // Cache key type to avoid multiple lua_type calls
+ int key_type = lua_type(L, -2);
+
+ // Serialize the key
+ if (key_type == LUA_TSTRING) {
+ size_t key_len;
+ const char *key = lua_tolstring(L, -2, &key_len);
+ // Always use the proper escape function for string keys
+ builder.escape_and_append_with_quotes(std::string_view(key, key_len));
+ } else if (key_type == LUA_TNUMBER) {
+ auto key_result = format_number_as_string(L, -2);
+ // Numeric keys are formatted as strings with quotes
+ builder.escape_and_append_with_quotes(std::string_view(key_result.first, key_result.second));
+ } else {
+ const char *type_name = lua_typename(L, key_type);
+ luaL_error(L, "unsupported key type in table for serialization: %s", type_name);
+ }
+
+ builder.append_colon();
+
+ // Serialize the value (it's already on top of stack)
+ serialize_data(L, current_depth, max_depth, builder);
+ // Pop value, keep key for next iteration
+ lua_pop(L, 1);
+ }
+
+ builder.end_object();
+}
+
+// Main serialization dispatcher: converts Lua values to JSON based on their type
+static void serialize_data(lua_State *L, int current_depth, int max_depth, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder) {
+ // Check depth to prevent stack overflow
+ if (current_depth > max_depth) {
+ luaL_error(L, "maximum nesting depth exceeded (limit: %d)", max_depth);
+ }
+
+ switch (lua_type(L, -1)) {
+ case LUA_TSTRING: {
+ serialize_append_string(L, builder, -1);
+ } break;
+ case LUA_TNUMBER: {
+ serialize_append_number(L, builder, -1);
+ } break;
+ case LUA_TBOOLEAN: {
+ serialize_append_bool(L, builder, -1);
+ } break;
+ case LUA_TTABLE: {
+ current_depth++;
+ int array_size = get_table_array_size(L);
+ if (array_size > 0) {
+ // Handle as array
+ serialize_append_array(L, builder, -1, array_size, current_depth, max_depth);
+ } else {
+ // Handle as object
+ serialize_append_object(L, builder, current_depth, max_depth);
+ }
+ } break;
+ case LUA_TNIL: {
+ // Treat Lua nil as JSON null
+ builder.append_null();
+ } break;
+ case LUA_TLIGHTUSERDATA: {
+ // Treat lightuserdata NULL as JSON null
+ if (lua_touserdata(L, -1) == NULL) {
+ builder.append_null();
+ } else {
+ luaL_error(L, "unsupported lightuserdata value for serialization");
+ }
+ } break;
+ default: {
+ const char *type_name = lua_typename(L, lua_type(L, -1));
+ luaL_error(L, "unsupported Lua data type for serialization: %s", type_name);
+ }
+ }
+};
+
+// encode Lua data types into JSON string
+static int encode(lua_State *L) {
+ // the output string once the building is done.
+ std::string_view json;
+
+ int num_args = lua_gettop(L);
+ luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments");
+
+ // Get max_depth and buffer_size from options table if provided, otherwise use global settings
+ int max_depth = get_max_depth(L);
+ size_t desired_buffer_size = get_encode_buffer_size(L);
+
+ if (num_args == 2) {
+ luaL_checktype(L, 2, LUA_TTABLE);
+ parse_encode_options(L, 2, max_depth, desired_buffer_size);
+ lua_pop(L, 1); // Remove options table, leaving data on top
+ }
+
+ // Get desired buffer size and recreate buffer if size changed
+ if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) {
+ if (encode_buffer != nullptr) {
+ delete encode_buffer;
+ }
+ encode_buffer = new SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder(desired_buffer_size);
+ encode_buffer_size = desired_buffer_size;
+ }
+
+ // Reuse buffer - clear it but retain capacity, this should mean successive calls
+ // are efficient in most cases.
+ encode_buffer->clear();
+
+ serialize_data(L, 0, max_depth, *encode_buffer);
+ auto v_err = encode_buffer->view().get(json);
+ if (v_err) {
+ return luaL_error(L, "failed to get JSON view from buffer: %s", simdjson::error_message(v_err));
+ }
+
+ // validate utf-8
+ if (!encode_buffer->validate_unicode()) {
+ return luaL_error(L, "encoded JSON contains invalid UTF-8 sequences");
+ }
+
+ lua_pushlstring(L, json.data(), json.size());
+ return 1;
+};
+
+// Set maximum nesting depth for encoding
+static int setMaxEncodeDepth(lua_State *L) {
+ int max_depth = luaL_checkinteger(L, 1);
+ if (max_depth < 1) {
+ return luaL_error(L, "Maximum encode depth must be at least 1");
+ }
+ set_max_depth(L, max_depth);
+ return 0;
+}
+
+// Get current maximum nesting depth for encoding
+static int getMaxEncodeDepth(lua_State *L) {
+ lua_pushinteger(L, get_max_depth(L));
+ return 1;
+}
+
+// Set encode buffer initial capacity in bytes
+static int setEncodeBufferSize(lua_State *L) {
+ int buffer_size = luaL_checkinteger(L, 1);
+ if (buffer_size < 1) {
+ return luaL_error(L, "Encode buffer size must be at least 1");
+ }
+ if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) {
+ return luaL_error(L, "Encode buffer size must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE);
+ }
+ set_encode_buffer_size(L, buffer_size);
+ return 0;
+}
+
+// Get encode buffer initial capacity in bytes
+static int getEncodeBufferSize(lua_State *L) {
+ lua_pushinteger(L, get_encode_buffer_size(L));
+ return 1;
+}
+
// ParsedObject as C++ class
#define LUA_MYOBJECT "ParsedObject"
-class ParsedObject
-{
+class ParsedObject {
private:
simdjson::padded_string json_string;
ondemand::document doc;
std::unique_ptr parser;
public:
- ParsedObject(const char *json_file)
- : json_string(padded_string::load(json_file)),
- parser(new ondemand::parser{})
- {
+ ParsedObject(const char *json_file) : json_string(padded_string::load(json_file)), parser(new ondemand::parser{}) {
this->doc = this->parser.get()->iterate(json_string);
}
- ParsedObject(const char *json_str, size_t json_str_len)
- : json_string(json_str, json_str_len),
- parser(new ondemand::parser{})
- {
+ ParsedObject(const char *json_str, size_t json_str_len) : json_string(json_str, json_str_len), parser(new ondemand::parser{}) {
this->doc = this->parser.get()->iterate(json_string);
}
- ~ParsedObject() {}
- ondemand::document *get_doc() { return &(this->doc); }
+ ~ParsedObject() {
+ }
+ ondemand::document *get_doc() {
+ return &(this->doc);
+ }
};
-static int ParsedObject_delete(lua_State *L)
-{
+static int ParsedObject_delete(lua_State *L) {
delete *reinterpret_cast(lua_touserdata(L, 1));
return 0;
}
-static int ParsedObject_open(lua_State *L)
-{
+static int ParsedObject_open(lua_State *L) {
size_t json_str_len;
const char *json_str = luaL_checklstring(L, 1, &json_str_len);
- try
- {
- ParsedObject **parsedObject =
- (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *)));
+ try {
+ ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *)));
*parsedObject = new ParsedObject(json_str, json_str_len);
luaL_getmetatable(L, LUA_MYOBJECT);
lua_setmetatable(L, -2);
- }
- catch (simdjson::simdjson_error &error)
- {
+ } catch (simdjson::simdjson_error &error) {
luaL_error(L, error.what());
}
return 1;
}
-static int ParsedObject_open_file(lua_State *L)
-{
+static int ParsedObject_open_file(lua_State *L) {
const char *json_file = luaL_checkstring(L, 1);
- try
- {
- ParsedObject **parsedObject =
- (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *)));
+ try {
+ ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *)));
*parsedObject = new ParsedObject(json_file);
luaL_getmetatable(L, LUA_MYOBJECT);
lua_setmetatable(L, -2);
- }
- catch (simdjson::simdjson_error &error)
- {
+ } catch (simdjson::simdjson_error &error) {
luaL_error(L, error.what());
}
return 1;
}
-static int ParsedObject_atPointer(lua_State *L)
-{
- ondemand::document *document =
- (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))
- ->get_doc();
+static int ParsedObject_atPointer(lua_State *L) {
+ ondemand::document *document = (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))->get_doc();
const char *pointer = luaL_checkstring(L, 2);
- try
- {
+ try {
ondemand::value returned_element = document->at_pointer(pointer);
convert_ondemand_element_to_table(L, returned_element);
- }
- catch (simdjson::simdjson_error &error)
- {
+ } catch (simdjson::simdjson_error &error) {
luaL_error(L, error.what());
}
return 1;
}
-static int ParsedObject_newindex(lua_State *L)
-{
- luaL_error(L, "This should be treated as a read-only table. We may one day add array access for the elements, and it'll likely not be modifiable.");
+static int ParsedObject_newindex(lua_State *L) {
+ luaL_error(L, "This should be treated as a read-only table. We may one day "
+ "add array "
+ "access for the elements, and it'll likely not be modifiable.");
return 1;
}
static const struct luaL_Reg arraylib_m[] = {
- {"at", ParsedObject_atPointer},
- {"atPointer", ParsedObject_atPointer},
- {"__newindex", ParsedObject_newindex},
- {"__gc", ParsedObject_delete},
- {NULL, NULL}};
-
-int luaopen_simdjson(lua_State *L)
-{
+ {"at", ParsedObject_atPointer}, {"atPointer", ParsedObject_atPointer}, {"__newindex", ParsedObject_newindex}, {"__gc", ParsedObject_delete}, {NULL, NULL}};
+
+int luaopen_simdjson(lua_State *L) {
luaL_newmetatable(L, LUA_MYOBJECT);
lua_pushvalue(L, -1); /* duplicates the metatable */
lua_setfield(L, -2, "__index");
diff --git a/src/luasimdjson.h b/src/luasimdjson.h
index 7f92718..85d91af 100644
--- a/src/luasimdjson.h
+++ b/src/luasimdjson.h
@@ -7,20 +7,30 @@
#endif
extern "C" {
- static int parse(lua_State*);
- static int parse_file(lua_State*);
- static int active_implementation(lua_State*);
- static int ParsedObject_open(lua_State*);
- static int ParsedObject_open_file(lua_State*);
+ static int parse(lua_State *);
+ static int parse_file(lua_State *);
+ static int active_implementation(lua_State *);
+ static int ParsedObject_open(lua_State *);
+ static int ParsedObject_open_file(lua_State *);
+ static int encode(lua_State *);
+ static int setMaxEncodeDepth(lua_State *);
+ static int getMaxEncodeDepth(lua_State *);
+ static int setEncodeBufferSize(lua_State *);
+ static int getEncodeBufferSize(lua_State *);
- static const struct luaL_Reg luasimdjson[] = {
- {"parse", parse},
- {"parseFile", parse_file},
- {"activeImplementation", active_implementation},
- {"open", ParsedObject_open},
- {"openFile", ParsedObject_open_file},
+ static const struct luaL_Reg luasimdjson[] = {
+ {"parse", parse},
+ {"parseFile", parse_file},
+ {"activeImplementation", active_implementation},
+ {"open", ParsedObject_open},
+ {"openFile", ParsedObject_open_file},
+ {"encode", encode},
+ {"setMaxEncodeDepth", setMaxEncodeDepth},
+ {"getMaxEncodeDepth", getMaxEncodeDepth},
+ {"setEncodeBufferSize", setEncodeBufferSize},
+ {"getEncodeBufferSize", getEncodeBufferSize},
- {NULL, NULL},
- };
- LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State*);
+ {NULL, NULL},
+ };
+ LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State *);
}