diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..958627b --- /dev/null +++ b/.clang-format @@ -0,0 +1,19 @@ +# see https://clang.llvm.org/docs/ClangFormatStyleOptions.html +--- +Language: Cpp +Standard: c++11 + +SortIncludes: false + +IndentWidth: 2 +TabWidth: 2 +UseTab: Never +ColumnLimit: 160 +BreakBeforeBraces: Attach +IndentExternBlock: Indent +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AllowShortFunctionsOnASingleLine: None +IndentCaseLabels: true +PointerAlignment: Right +SpaceBeforeParens: ControlStatements diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28873b7..a641c78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,37 +10,43 @@ jobs: os: [linux, macos, macos-arm64] lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1] include: - - os: linux - runner: ubuntu-latest - - os: macos - runner: macos-15-intel - - os: macos-arm64 - runner: macos-latest + - os: linux + runner: ubuntu-latest + - os: macos + runner: macos-15-intel + - os: macos-arm64 + runner: macos-latest exclude: - - os: macos-arm64 - lua: luajit=2.0 + - os: macos-arm64 + lua: luajit=2.0 name: ${{ matrix.os }} (${{ matrix.lua }}) runs-on: ${{ matrix.runner }} steps: # Checks-out the repository under $GITHUB_WORKSPACE. - uses: actions/checkout@v6 - - name: Install libreadline + - name: Install libreadline if: runner.os == 'Linux' run: | sudo apt-get install -y libreadline-dev - name: Install Lua (${{ matrix.lua }}) run: | - pip install git+https://github.com/luarocks/hererocks + pipx install git+https://github.com/luarocks/hererocks + pipx ensurepath + export PATH=$PATH:/root/.local/bin:$HOME/.local/bin hererocks lua_install -r^ --${{ matrix.lua }} env: MACOSX_DEPLOYMENT_TARGET: 11.0 - name: Build lua-simdjson + shell: bash run: | + set -e source lua_install/bin/activate luarocks make - name: Run tests + shell: bash run: | + set -e source lua_install/bin/activate luarocks install lua-cjson2 luarocks install busted @@ -51,7 +57,7 @@ jobs: fail-fast: false matrix: lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1] - target: [mingw,vs] + target: [mingw, vs] runs-on: windows-2022 steps: # Checks-out the repository under $GITHUB_WORKSPACE. diff --git a/Makefile.win b/Makefile.win index 4eafad4..83c6592 100644 --- a/Makefile.win +++ b/Makefile.win @@ -24,4 +24,4 @@ clean: del *.dll src\*.obj *.lib *.exp 2>nul install: $(TARGET) - copy $(TARGET) $(INST_LIBDIR) + copy $(TARGET) $(INST_LIBDIR) \ No newline at end of file diff --git a/README.md b/README.md index 98e345a..5ebd3bf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # lua-simdjson + [![Build Status](https://github.com/FourierTransformer/lua-simdjson/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/FourierTransformer/lua-simdjson/actions?query=branch%3Amaster) A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer. @@ -6,26 +7,32 @@ A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is Current simdjson version: 4.2.3 ## Installation + If all the requirements are met, lua-simdjson can be install via luarocks with: -``` +```bash luarocks install lua-simdjson ``` + Otherwise it can be installed manually by pulling the repo and running luarocks make. ## Requirements - * lua-simdjson only works on 64bit systems. - * a Lua build environment with support for C++11 - * g++ version 7+ and clang++ version 6+ or newer should work! + +* lua-simdjson only works on 64bit systems. +* a Lua build environment with support for C++11 + * g++ version 7+ and clang++ version 6+ or newer should work! ## Parsing + There are two main ways to parse JSON in lua-simdjson: + 1. With `parse`: this parses JSON and returns a Lua table with the parsed values 2. With `open`: this reads in the JSON and keeps it in simdjson's internal format. The values can then be accessed using a JSON pointer (examples below) Both of these methods also have support to read files on disc with `parseFile` and `openFile` respectively. If handling JSON from disk, these methods should be used and are incredibly fast. ## Typing + * lua-simdjson uses `simdjson.null` to represent `null` values from parsed JSON. * Any application should use that for comparison as needed. * it uses `lua_pushnumber` and `lua_pushinteger` for JSON floats and ints respectively, so your Lua version may handle that slightly differently. @@ -33,7 +40,9 @@ Both of these methods also have support to read files on disc with `parseFile` a * All other types map as expected. ### Parse some JSON + The `parse` methods will return a normal Lua table that can be interacted with. + ```lua local simdjson = require("simdjson") local response = simdjson.parse([[ @@ -61,7 +70,9 @@ print(fileResponse["statuses"][1]["id"]) ``` ### Open some json + The `open` methods currently require the use of a JSON pointer, but are very quick. They are best used when you only need a part of a response. In the example below, it could be useful for just getting the `Thumnail` object with `:atPointer("/Image/Thumbnail")` which will then only create a Lua table with those specific values. + ```lua local simdjson = require("simdjson") local response = simdjson.open([[ @@ -93,10 +104,81 @@ The `open` and `parse` codeblocks should print out the same values. It's worth n This lazy style of using the simdjson data structure could also be used with array access in the future. +## Encoding + +The `encode` method converts Lua tables into JSON strings. It supports nested tables, arrays, and all standard JSON types. + +```lua +local simdjson = require("simdjson") + +-- Encode a simple table +local data = { + name = "John Doe", + age = 30, + active = true, + score = 95.5 +} +local json = simdjson.encode(data) +print(json) -- {"name":"John Doe","age":30,"active":true,"score":95.5} + +-- Encode nested structures +local complex = { + user = { + id = 123, + tags = {"lua", "json", "fast"} + }, + metadata = { + created = "2024-01-01", + count = 42 + } +} +local json = simdjson.encode(complex) + +-- Use simdjson.null for JSON null values +local withNull = { + value = simdjson.null, + name = "test" +} +local json = simdjson.encode(withNull) -- {"value":null,"name":"test"} + +-- Optional: specify encoding options with a configuration table +local deepData = { level1 = { level2 = { level3 = "value" } } } +local json = simdjson.encode(deepData, {maxDepth = 10}) -- max depth of 10 + +-- You can also specify bufferSize per-call (default: 16KB) +local json = simdjson.encode(data, {bufferSize = 32 * 1024}) -- 32KB buffer + +-- Or combine both options +local json = simdjson.encode(deepData, {maxDepth = 10, bufferSize = 8192}) +``` + +You can also configure global encoding settings: + +```lua +-- Set maximum nesting depth globally (default: 1024) +simdjson.setMaxEncodeDepth(512) +local currentDepth = simdjson.getMaxEncodeDepth() + +-- Set encode buffer size in bytes (default: 16KB) +simdjson.setEncodeBufferSize(32 * 1024) -- 32KB +local currentSize = simdjson.getEncodeBufferSize() +``` + +**Encoding behavior:** + +* Tables with consecutive integer keys starting at 1 are encoded as JSON arrays +* All other tables are encoded as JSON objects +* Numbers are formatted as integers when possible, or floats with 14 digits of precision +* Integers larger than 2^53 are encoded in scientific notation for JSON compatibility +* Strings are automatically escaped according to JSON specifications +* `simdjson.null` represents JSON `null` + ## Error Handling + lua-simdjson will error out with any errors from simdjson encountered while parsing. They are very good at helping identify what has gone wrong during parsing. ## Benchmarks + I ran some benchmarks against lua-cjson, rapidjson, and dkjson. For each test, I loaded the JSON into memory, and then had the parsers go through each file 100 times and took the average time it took to parse to a Lua table. You can see all the results in the [benchmark](benchmark/) folder. I've included a sample output run via Lua (the LuaJIT graph looks very similar, also in the benchmark folder). The y-axis is logarithmic, so every half step down is twice as fast. ![Lua Performance Column Chart](benchmark/lua-perf.png) @@ -116,7 +198,7 @@ lua-simdjson, like the simdjson library performs better on more modern hardware. * since it's an external module, it's not quite as easy to just grab the file and go (dkjson has you covered here!) ## Philosophy -I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. The big _thing_ that's missing so far is encoding a lua table to JSON. I may add in an encoder at some point. +I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. ## Licenses * The jsonexamples, src/simdjson.cpp, src/simdjson.h are unmodified from the released version simdjson under the Apache License 2.0. diff --git a/spec/compile_spec.lua b/spec/compile_spec.lua index d9627e2..f5a46fa 100644 --- a/spec/compile_spec.lua +++ b/spec/compile_spec.lua @@ -81,9 +81,8 @@ end) local major, minor = _VERSION:match('([%d]+)%.(%d+)') if tonumber(major) >= 5 and tonumber(minor) >= 3 then - describe("Make sure ints and floats parse correctly", function () + describe("Make sure ints and floats parse correctly", function() it("should handle decoding numbers appropriately", function() - local numberCheck = simdjson.parse([[ { "float": 1.2, @@ -101,7 +100,6 @@ if tonumber(major) >= 5 and tonumber(minor) >= 3 then assert.are.same("float", math.type(numberCheck["one_above_max_signed_integer"])) assert.are.same("integer", math.type(numberCheck["min_unsigned_integer"])) assert.are.same("float", math.type(numberCheck["max_unsigned_integer"])) - end) end) end @@ -129,3 +127,19 @@ describe("Make sure invalid files are not accepted", function() end) end end) + +describe("Active implementation function", function() + it("should return a valid implementation name", function() + local impl = simdjson.activeImplementation() + assert.is_not_nil(impl) + assert.is_string(impl) + assert.is_truthy(impl:match("%w+")) -- Contains at least one word character + assert.is_true(#impl > 0) -- Non-empty string + end) + + it("should contain implementation details", function() + local impl = simdjson.activeImplementation() + -- Implementation string should have format like "arm64 (ARM NEON)" or "haswell (Intel AVX2)" + assert.is_truthy(impl:match("%(.*%)")) -- Contains parentheses with description + end) +end) diff --git a/spec/encode_security_spec.lua b/spec/encode_security_spec.lua new file mode 100644 index 0000000..83e7853 --- /dev/null +++ b/spec/encode_security_spec.lua @@ -0,0 +1,396 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + +describe("encode() security and edge cases", function() + describe("String injection and escaping", function() + it("should properly escape quote characters", function() + local data = { value = 'test"with"quotes' } + local encoded = simdjson.encode(data) + assert.is_true(encoded:find('\\"') ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end) + + it("should properly escape backslashes", function() + local data = { value = 'test\\with\\backslashes' } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end) + + it("should properly escape control characters", function() + local test_cases = { + { str = "line1\nline2", name = "newline" }, + { str = "tab\there", name = "tab" }, + { str = "return\rhere", name = "carriage return" }, + { str = "backspace\bhere", name = "backspace" }, + { str = "form\ffeed", name = "form feed" }, + } + + for _, test in ipairs(test_cases) do + local data = { value = test.str } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end + end) + + it("should handle strings with null bytes", function() + -- Note: null bytes may be truncated as C strings are null-terminated + local data = { value = "before\x00after" } + local encoded = simdjson.encode(data) + -- Verify encoding doesn't crash and produces valid JSON + assert.is_true(encoded:find("before") ~= nil) + local decoded = simdjson.parse(encoded) + -- String may be truncated at null byte + assert.is_true(decoded.value == "before" or decoded.value == "before\x00after") + end) + + it("should handle common control characters safely", function() + -- Test specific control characters that should be properly escaped + local test_chars = { + { char = "\t", name = "tab", escape = "\\t" }, + { char = "\n", name = "newline", escape = "\\n" }, + { char = "\r", name = "carriage return", escape = "\\r" }, + { char = "\b", name = "backspace", escape = "\\b" }, + { char = "\f", name = "form feed", escape = "\\f" }, + } + + for _, test in ipairs(test_chars) do + local data = { value = "before" .. test.char .. "after" } + local encoded = simdjson.encode(data) + -- Verify the character is properly escaped in JSON + assert.is_true(encoded:find("before") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end + end) + end) + + describe("Potential XSS and HTML injection", function() + it("should handle HTML/XML special characters", function() + local data = { + html = "", + xml = "", + tags = "
test
", + entities = "<>&"'" + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.html, decoded.html) + assert.are.same(data.xml, decoded.xml) + assert.are.same(data.tags, decoded.tags) + assert.are.same(data.entities, decoded.entities) + end) + + it("should not execute embedded JavaScript", function() + local malicious = { + js = "'; alert('xss'); //", + comment = "/* comment */ code", + injection = "\"); malicious(); //" + } + local encoded = simdjson.encode(malicious) + -- Verify it's properly escaped + assert.is_true(encoded:find("alert") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(malicious.js, decoded.js) + end) + end) + + describe("Key injection and object vulnerabilities", function() + it("should handle keys with special characters", function() + local data = { + ["key'with'quotes"] = "value1", + ['key"with"doublequotes'] = "value2", + ["key\\with\\backslash"] = "value3", + ["key\nwith\nnewline"] = "value4", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data["key'with'quotes"], decoded["key'with'quotes"]) + assert.are.same(data['key"with"doublequotes'], decoded['key"with"doublequotes']) + end) + + it("should handle prototype pollution keys", function() + -- Common prototype pollution attack keys + local data = { + ["__proto__"] = "should_be_safe", + ["constructor"] = "safe_value", + ["prototype"] = "another_safe" + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data["__proto__"], decoded["__proto__"]) + assert.are.same(data["constructor"], decoded["constructor"]) + end) + + it("should handle empty string keys", function() + local data = { [""] = "empty_key_value" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data[""], decoded[""]) + end) + + it("should handle very long keys", function() + local long_key = string.rep("a", 10000) + local data = { [long_key] = "value" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data[long_key], decoded[long_key]) + end) + end) + + describe("Number vulnerabilities", function() + it("should handle very large integers without overflow", function() + local data = { + max_int = 9007199254740991, -- Max safe integer in JavaScript + min_int = -9007199254740991, + large_pos = 9223372036854775807, -- Max int64 + large_neg = -9223372036854775808, -- Min int64 + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + -- Allow for precision loss on very large numbers + assert.is_true(math.abs(decoded.max_int - data.max_int) < 1) + end) + + it("should handle floating point edge cases", function() + local data = { + zero = 0.0, + very_small = 1e-308, + very_large = 1e308, + negative = -123.456, + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.zero, decoded.zero) + end) + + it("should handle many decimal places", function() + local data = { pi = 3.14159265358979323846264338327950288 } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + -- Check that precision is maintained reasonably + assert.is_true(math.abs(decoded.pi - 3.141592653589793) < 0.000001) + end) + end) + + describe("Nested structure vulnerabilities", function() + it("should enforce max depth to prevent stack overflow", function() + -- Create a very deep structure + local function create_deep(depth) + if depth == 0 then + return "bottom" + end + return { nested = create_deep(depth - 1) } + end + + local deep = create_deep(50) + + -- Should succeed with high limit + local success1 = pcall(function() + simdjson.encode(deep, { maxDepth = 100 }) + end) + assert.is_true(success1) + + -- Should fail with low limit + local success2 = pcall(function() + simdjson.encode(deep, { maxDepth = 10 }) + end) + assert.is_false(success2) + end) + + it("should handle wide objects without issues", function() + -- Create object with many keys + local wide = {} + for i = 1, 1000 do + wide["key" .. i] = "value" .. i + end + local encoded = simdjson.encode(wide) + local decoded = simdjson.parse(encoded) + assert.are.same(wide["key500"], decoded["key500"]) + end) + + it("should handle wide arrays without issues", function() + local wide = {} + for i = 1, 1000 do + wide[i] = i + end + local encoded = simdjson.encode(wide) + local decoded = simdjson.parse(encoded) + assert.are.same(#wide, #decoded) + assert.are.same(wide[500], decoded[500]) + end) + end) + + describe("Memory and performance vulnerabilities", function() + it("should handle very long strings", function() + -- Create a 1MB string + local long_string = string.rep("x", 1024 * 1024) + local data = { large = long_string } + local encoded = simdjson.encode(data) + assert.is_true(#encoded > 1024 * 1024) + local decoded = simdjson.parse(encoded) + assert.are.same(#long_string, #decoded.large) + end) + + it("should handle arrays with many elements", function() + local large_array = {} + for i = 1, 10000 do + large_array[i] = i + end + local data = { arr = large_array } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(#large_array, #decoded.arr) + assert.are.same(large_array[5000], decoded.arr[5000]) + end) + + it("should handle mixed large structure", function() + local data = { + strings = {}, + numbers = {}, + objects = {} + } + for i = 1, 100 do + data.strings[i] = string.rep("test", 100) + data.numbers[i] = i * 1.5 + data.objects[i] = { id = i, name = "item" .. i } + end + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(#data.strings, #decoded.strings) + end) + end) + + describe("Unicode and encoding vulnerabilities", function() + it("should handle various Unicode characters", function() + local data = { + emoji = "😀🎉🔥💯", + chinese = "你好世界", + arabic = "مرحبا", + russian = "Привет", + mixed = "Hello 世界 🌍", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.emoji, decoded.emoji) + assert.are.same(data.chinese, decoded.chinese) + assert.are.same(data.mixed, decoded.mixed) + end) + + it("should handle Unicode escapes", function() + -- String with Unicode escape sequences + local data = { unicode = "test\\u0041\\u0042\\u0043" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.unicode, decoded.unicode) + end) + + it("should handle zero-width and special Unicode", function() + local data = { + zero_width = "test\226\128\139here", -- Zero-width space (U+200B) + rtl_mark = "test\226\128\143mark", -- Right-to-left mark (U+200F) + combining = "e\204\129", -- e with acute accent combining (U+0301) + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.zero_width, decoded.zero_width) + end) + end) + + describe("Malformed or unexpected input", function() + it("should handle empty structures", function() + local data = { + empty_object = {}, + empty_array = {}, + empty_string = "", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(type(decoded.empty_object), "table") + assert.are.same(decoded.empty_string, "") + end) + + it("should handle boolean edge cases", function() + local data = { + true_val = true, + false_val = false, + bool_array = { true, false, true, false }, + } + local encoded = simdjson.encode(data) + assert.is_true(encoded:find("true") ~= nil) + assert.is_true(encoded:find("false") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.true_val, decoded.true_val) + assert.are.same(data.false_val, decoded.false_val) + end) + + it("should consistently handle repeated encoding", function() + local data = { test = "value", num = 42 } + local encoded1 = simdjson.encode(data) + local encoded2 = simdjson.encode(data) + local encoded3 = simdjson.encode(data) + + local decoded1 = simdjson.parse(encoded1) + local decoded2 = simdjson.parse(encoded2) + local decoded3 = simdjson.parse(encoded3) + + assert.are.same(decoded1.test, decoded2.test) + assert.are.same(decoded2.test, decoded3.test) + end) + end) + + describe("SQL and NoSQL injection patterns", function() + it("should safely handle SQL injection patterns", function() + local injection_patterns = { + "'; DROP TABLE users; --", + "1' OR '1'='1", + "admin'--", + "' OR 1=1--", + "'; EXEC sp_MSForEachTable 'DROP TABLE ?'; --", + } + + for _, pattern in ipairs(injection_patterns) do + local data = { query = pattern } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(pattern, decoded.query) + end + end) + + it("should safely handle NoSQL injection patterns", function() + local nosql_patterns = { + "{'$gt': ''}", + "{'$ne': null}", + "{'$where': 'this.password.length > 0'}", + } + + for _, pattern in ipairs(nosql_patterns) do + local data = { filter = pattern } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(pattern, decoded.filter) + end + end) + end) + + describe("Path traversal and file inclusion", function() + it("should handle path traversal strings", function() + local paths = { + "../../etc/passwd", + "..\\..\\windows\\system32", + "/etc/passwd", + "C:\\Windows\\System32\\config\\SAM", + "../../../../../etc/shadow", + } + + for _, path in ipairs(paths) do + local data = { path = path } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(path, decoded.path) + end + end) + end) +end) diff --git a/spec/encode_spec.lua b/spec/encode_spec.lua new file mode 100644 index 0000000..dbe9471 --- /dev/null +++ b/spec/encode_spec.lua @@ -0,0 +1,414 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + + +describe("encode numbers correctly", function() + it("should encode numbers the same as cjson", function() + local testData = { + float = 1.2, + min_signed_integer = -9223372036854775808, + max_signed_integer = 9223372036854775807, + one_above_max_signed_integer = 9223372036854775808, + min_unsigned_integer = 0, + max_unsigned_integer = 18446744073709551615 + } + + for k, v in pairs(testData) do + local td = { [k] = v } + local simdjsonEncoded = simdjson.encode(td) + local cjsonEncoded = cjson.encode(td) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + + local cjsonEncode = cjson.encode(testData) + local simdjsonEncode = simdjson.encode(testData) + assert.are.same(cjsonEncode, simdjsonEncode) + end) + + it("should encode special float values", function() + local testCases = { + { value = 0.0, name = "zero" }, + { value = 3.14159265358979, name = "pi" }, + { value = 2.718281828459045, name = "e" }, + { value = 1.23e-10, name = "small scientific" }, + { value = 1.23e10, name = "large scientific" }, + { value = -123.456, name = "negative float" }, + } + + for _, test in ipairs(testCases) do + local data = { value = test.value } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode array of numbers", function() + local numbers = { 1, 2, 3, 4, 5, -1, -2, 0, 1.5, 2.7 } + local data = { numbers = numbers } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode strings correctly", function() + it("should encode simple strings", function() + local testCases = { + { str = "hello", name = "simple" }, + { str = "", name = "empty" }, + { str = "hello world", name = "with space" }, + { str = "123", name = "numeric string" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode strings with special characters", function() + local testCases = { + { str = "hello\nworld", name = "newline" }, + { str = "hello\tworld", name = "tab" }, + { str = "hello\rworld", name = "carriage return" }, + { str = "hello\"world", name = "quote" }, + { str = "hello\\world", name = "backslash" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode forward slash without escaping", function() + -- simdjson doesn't escape forward slashes (which is valid JSON) + local data = { str = "hello/world" } + local simdjsonEncoded = simdjson.encode(data) + assert.are.same('{"str":"hello/world"}', simdjsonEncoded) + end) + + it("should encode unicode strings", function() + local testCases = { + { str = "Hello 世界", name = "chinese" }, + { str = "Hello मुndi", name = "hindi" }, + { str = "Hello 🌍", name = "emoji" }, + { str = "café", name = "accented" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode array of strings", function() + local strings = { "one", "two", "three", "", "with space" } + local data = { strings = strings } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode booleans correctly", function() + it("should encode boolean values", function() + local data1 = { value = true } + assert.are.same(cjson.encode(data1), simdjson.encode(data1)) + + local data2 = { value = false } + assert.are.same(cjson.encode(data2), simdjson.encode(data2)) + end) + + it("should encode boolean arrays", function() + local bools = { true, false, true, false } + local data = { bools = bools } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode mixed boolean and other types", function() + local mixed = { true, 1, "test", false, 2.5 } + local data = { mixed = mixed } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode arrays correctly", function() + it("should encode empty arrays", function() + local data = { arr = {} } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode nested arrays", function() + local data = { + nested = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode deeply nested arrays", function() + local data = { arr = { { { { { 1 } } } } } } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode arrays with mixed types", function() + local data = { + mixed = { 1, "two", 3.0, true, false, { nested = "value" } } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode objects correctly", function() + it("should encode empty objects", function() + local data = {} + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode objects with string keys", function() + local data = { + key1 = "value1", + key2 = "value2", + key3 = "value3" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + -- Note: key order may differ, so we decode and compare + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should encode objects with numeric keys", function() + local data = { + ["1"] = "one", + ["2"] = "two", + ["3"] = "three" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should encode nested objects", function() + local data = { + outer = { + middle = { + inner = { + value = "deep" + } + } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode objects with mixed value types", function() + local data = { + string = "value", + number = 42, + float = 3.14, + bool_true = true, + bool_false = false, + array = { 1, 2, 3 }, + object = { nested = "value" } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) +end) + +describe("encode complex json types", function() + it("should encode complex json types the same as cjson", function() + local testData = { + object = { + key1 = "value1", + key2 = 2, + key3 = { nestedKey = "nestedValue" } + }, + mixed = { + "string", + 123, + true, + { nestedArray = { 1, 2, 3 } }, + { nestedObject = { key = "value" } } + }, + mixed_complex = { + array = { "abc", 123, true }, + object = { key = "value", number = 456 }, + nested_object = { + inner_key = { 1, 2, 3, { deep_key = "deep_value" } } + } + }, + bools = { true, false, true, false } + } + + for k, v in pairs(testData) do + local td = { [k] = v } + local simdjsonEncoded = simdjson.encode(td) + local cjsonEncoded = cjson.encode(td) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode complex nested structures", function() + local data = { + users = { + { + id = 1, + name = "Alice", + active = true, + scores = { 95, 87, 92 } + }, + { + id = 2, + name = "Bob", + active = false, + scores = { 78, 85, 90 } + } + }, + metadata = { + version = "1.0", + count = 2 + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle arrays of objects", function() + local data = { + items = { + { id = 1, name = "Item 1" }, + { id = 2, name = "Item 2" }, + { id = 3, name = "Item 3" } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle objects with array values", function() + local data = { + numbers = { 1, 2, 3, 4, 5 }, + strings = { "a", "b", "c" }, + booleans = { true, false, true } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) +end) + +describe("encode edge cases", function() + it("should handle very long strings", function() + local longString = string.rep("a", 10000) + local data = { str = longString } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle large arrays", function() + local largeArray = {} + for i = 1, 1000 do + largeArray[i] = i + end + local data = { arr = largeArray } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle sparse arrays as objects", function() + local sparseArray = {} + sparseArray[1] = "first" + sparseArray[5] = "fifth" + sparseArray[10] = "tenth" + local data = { sparse = sparseArray } + + -- simdjson treats sparse arrays as objects + local simdjsonEncoded = simdjson.encode(data) + assert.is_true(simdjsonEncoded:find('"sparse"') ~= nil) + + -- Verify it can be decoded back + local decoded = simdjson.parse(simdjsonEncoded) + assert.is_not_nil(decoded.sparse) + end) + + it("should encode keys with special characters", function() + local data = { + ["key with spaces"] = "value1", + ["key-with-dashes"] = "value2", + ["key_with_underscores"] = "value3", + ["key.with.dots"] = "value4" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should roundtrip encode and decode", function() + local original = { + name = "Test", + value = 42, + active = true, + items = { 1, 2, 3 }, + nested = { key = "value" } + } + local encoded = simdjson.encode(original) + local decoded = simdjson.parse(encoded) + + -- Compare individual fields since table equality is by reference + assert.are.same(original.name, decoded.name) + assert.are.same(original.value, decoded.value) + assert.are.same(original.active, decoded.active) + assert.are.same(original.items[1], decoded.items[1]) + assert.are.same(original.nested.key, decoded.nested.key) + end) + + it("basic string", function() + local original = "test string" + local simdjsonEncoded = simdjson.encode(original) + local cjsonEncoded = cjson.encode(original) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) diff --git a/spec/performance_spec.lua b/spec/performance_spec.lua new file mode 100644 index 0000000..7807a2a --- /dev/null +++ b/spec/performance_spec.lua @@ -0,0 +1,399 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + +-- Track wins +local simdjson_wins = 0 +local cjson_wins = 0 +local total_tests = 0 +local iterations = 10000 + +-- Helper function to measure time +local function measure_time(func, iterations) + iterations = iterations or 1 + collectgarbage("collect") -- Clean up before measurement + local start = os.clock() + for i = 1, iterations do func() end + local elapsed = os.clock() - start + return elapsed, elapsed / iterations +end + +-- Helper to format numbers +local function format_number(num) + if num < 0.001 then + return string.format("%.6f ms", num * 1000) + elseif num < 1 then + return string.format("%.3f ms", num * 1000) + else + return string.format("%.3f s", num) + end +end + +-- Helper to show comparison +local function show_comparison(name, simdjson_time, cjson_time) + local speedup = cjson_time / simdjson_time + local winner = speedup > 1 and "simdjson" or "cjson" + local ratio = speedup > 1 and speedup or (1 / speedup) + + -- Track wins + total_tests = total_tests + 1 + if winner == "simdjson" then + simdjson_wins = simdjson_wins + 1 + else + cjson_wins = cjson_wins + 1 + end + + -- Add newline before first result to separate from test marker + if total_tests == 1 then print() end + + print(string.format( + " %-30s | simdjson: %s | cjson: %s | %s is %.2fx faster", name, + format_number(simdjson_time), format_number(cjson_time), winner, + ratio)) +end + +describe("Performance Comparison: simdjson vs cjson", function() + it(string.format("Simple Object Encoding (%s iterations)", iterations), + function() + local simple_data = { name = "test", value = 42, active = true } + + local simdjson_time = measure_time(function() + simdjson.encode(simple_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(simple_data) + end, iterations) + show_comparison("Simple object", simdjson_time, cjson_time) + end) + + it(string.format("Array Encoding (%s iterations)", iterations), function() + local array_data = {} + for i = 1, 100 do array_data[i] = i end + array_data = { numbers = array_data } + + local simdjson_time = measure_time(function() + simdjson.encode(array_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(array_data) + end, iterations) + show_comparison("100-element array", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Encoding (%s iterations)", iterations), + function() + local nested_data = { + level1 = { level2 = { level3 = { level4 = { value = "deep" } } } } + } + + local simdjson_time = measure_time(function() + simdjson.encode(nested_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(nested_data) + end, iterations) + show_comparison("5-level nesting", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Encoding (%s iterations)", iterations), + function() + local nested_data = { + level1 = { + level2 = { + level3 = { + level4 = { + level5 = { + level6 = { + level7 = { + level8 = { + level9 = { + level10 = { value = "deep" } + } + } + } + } + } + } + } + } + } + } + + local simdjson_time = measure_time(function() + simdjson.encode(nested_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(nested_data) + end, iterations) + show_comparison("10-level nesting", simdjson_time, cjson_time) + end) + + it(string.format("String-Heavy Data (%s iterations)", iterations), + function() + local string_data = { + str1 = "The quick brown fox jumps over the lazy dog", + str2 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + str3 = "Pack my box with five dozen liquor jugs" + } + + local simdjson_time = measure_time(function() + simdjson.encode(string_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(string_data) + end, iterations) + show_comparison("String-heavy object", simdjson_time, cjson_time) + end) + + it(string.format("Mixed Type Array (%s iterations)", iterations), function() + local mixed_array = { + data = { 1, "two", 3.0, true, false, { nested = "value" } } + } + + local simdjson_time = measure_time(function() + simdjson.encode(mixed_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(mixed_array) + end, iterations) + show_comparison("Mixed type array", simdjson_time, cjson_time) + end) + + it(string.format("Large Object (%s iterations)", iterations), function() + local large_object = {} + for i = 1, 100 do large_object["key" .. i] = "value" .. i end + large_object = { data = large_object } + + local simdjson_time = measure_time(function() + simdjson.encode(large_object) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_object) + end, iterations) + show_comparison("100-key object", simdjson_time, cjson_time) + end) + + it(string.format("Large Array (%s iterations)", iterations), function() + local large_array = {} + for i = 1, 1000 do large_array[i] = i end + large_array = { data = large_array } + + local simdjson_time = measure_time(function() + simdjson.encode(large_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_array) + end, iterations) + + show_comparison("1000-element array", simdjson_time, cjson_time) + end) + + it(string.format("Large Objects (%s iterations)", iterations), function() + local large_array = {} + for i = 1, 1000 do large_array["a" .. i] = i end + large_array = { data = large_array } + + local simdjson_time = measure_time(function() + simdjson.encode(large_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_array) + end, iterations) + + show_comparison("1000-K/V pair object", simdjson_time, cjson_time) + end) + + it(string.format("Complex Realistic Data (%s iterations)", iterations), + function() + local realistic_data = { + users = { + { + id = 1, + name = "Alice Smith", + email = "alice@example.com", + active = true, + score = 95.5 + }, { + id = 2, + name = "Bob Jones", + email = "bob@example.com", + active = false, + score = 87.3 + }, { + id = 3, + name = "Carol White", + email = "carol@example.com", + active = true, + score = 92.1 + } + }, + metadata = { version = "1.0", timestamp = 1704197400, count = 3 }, + settings = { theme = "dark", language = "en", notifications = true } + } + + local simdjson_time = measure_time(function() + simdjson.encode(realistic_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(realistic_data) + end, iterations) + show_comparison("Realistic complex data", simdjson_time, cjson_time) + end) + + it(string.format("Simple JSON Parsing (%s iterations)", iterations), + function() + local simple_json = '{"name":"test","value":42,"active":true}' + + local simdjson_time = measure_time(function() + simdjson.parse(simple_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(simple_json) + end, 10000) + + show_comparison("Simple parsing", simdjson_time, cjson_time) + end) + + it(string.format("Array Parsing (%s iterations)", iterations), function() + local array_json = + '{"numbers":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}' + + local simdjson_time = measure_time(function() + simdjson.parse(array_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(array_json) + end, iterations) + + show_comparison("Array parsing", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Parsing (%s iterations)", iterations), + function() + local nested_json = + '{"level1":{"level2":{"level3":{"level4":{"value":"deep"}}}}}' + + local simdjson_time = measure_time(function() + simdjson.parse(nested_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(nested_json) + end, iterations) + show_comparison("Nested parsing", simdjson_time, cjson_time) + end) + + it(string.format("Large JSON Parsing (%s iterations)", iterations), + function() + local large_json_data = {} + for i = 1, 100 do large_json_data["key" .. i] = "value" .. i end + local large_json = cjson.encode({ data = large_json_data }) + + local simdjson_time = measure_time(function() + simdjson.parse(large_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(large_json) + end, iterations) + show_comparison("Large object parsing", simdjson_time, cjson_time) + end) + + it(string.format("Round-trip: Encode + Parse (%s iterations)", iterations), + function() + local roundtrip_data = { + id = 123, + name = "Test User", + values = { 1, 2, 3, 4, 5 }, + metadata = { active = true, score = 95.5 } + } + + local simdjson_time = measure_time(function() + local encoded = simdjson.encode(roundtrip_data) + simdjson.parse(encoded) + end, iterations) + + local cjson_time = measure_time(function() + local encoded = cjson.encode(roundtrip_data) + cjson.decode(encoded) + end, iterations) + + show_comparison("Round-trip", simdjson_time, cjson_time) + end) + + it(string.format("Special Characters (%s iterations)", iterations), + function() + local special_chars_data = { + escaped = 'test"with"quotes\nand\nnewlines\ttabs', + unicode = "Hello 世界 🌍" + } + + local simdjson_time = measure_time(function() + simdjson.encode(special_chars_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(special_chars_data) + end, iterations) + show_comparison("Special characters", simdjson_time, cjson_time) + end) + + it(string.format("Boolean Arrays (%s iterations)", iterations), function() + local bool_data = { + flags = { true, false, true, false, true, false, true, false } + } + + local simdjson_time = measure_time(function() + simdjson.encode(bool_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(bool_data) + end, iterations) + + show_comparison("Boolean arrays", simdjson_time, cjson_time) + end) + + it(string.format("Large Boolean Array (%s iterations)", iterations), function() + local bool_data = {} + local choices = { true, false } + for i = 1, 1000 do bool_data[i] = choices[math.random(2)] end + + + local simdjson_time = measure_time(function() + simdjson.encode(bool_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(bool_data) + end, iterations) + + show_comparison("Large boolean arrays", simdjson_time, cjson_time) + end) + + -- Print summary after all tests + after_each(function() end) -- No-op to ensure we're in test context + + teardown(function() + print("\n" .. string.rep("=", 80)) + print("Using SIMD implementation: " .. simdjson.activeImplementation()) + print(string.format("Performance Summary: %d total tests", total_tests)) + print(string.rep("=", 80)) + print(string.format(" simdjson wins: %d (%.1f%%)", simdjson_wins, + (simdjson_wins / total_tests) * 100)) + print(string.format(" cjson wins: %d (%.1f%%)", cjson_wins, + (cjson_wins / total_tests) * 100)) + print(string.rep("=", 80)) + end) +end) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 4004b88..1fd6bef 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -11,12 +12,23 @@ #define NDEBUG #define __OPTIMIZE__ 1 -#include "simdjson.h" #include "luasimdjson.h" +#include "simdjson.h" #define LUA_SIMDJSON_NAME "simdjson" #define LUA_SIMDJSON_VERSION "0.0.8" +// keys encode max depth configuration. +#define LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY "simdjson.maxEncodeDepth" +#define DEFAULT_MAX_ENCODE_DEPTH simdjson::DEFAULT_MAX_DEPTH + +// Encode buffer size reservation configuration +#define LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY "simdjson.encodeBufferSize" +#define DEFAULT_ENCODE_BUFFER_SIZE (16 * 1024) // 16KB +#define DEFAULT_MAX_ENCODE_BUFFER_SIZE simdjson::SIMDJSON_MAXSIZE_BYTES +// Max size for number to string conversion buffer +#define ENCODE_NUMBER_BUFFER_SIZE 32 + using namespace simdjson; #if !defined(luaL_newlibtable) && (!defined LUA_VERSION_NUM || LUA_VERSION_NUM <= 501) @@ -25,11 +37,9 @@ using namespace simdjson; ** Stolen from: http://lua-users.org/wiki/CompatibilityWithLuaFive ** Adapted from Lua 5.2.0 */ -static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) -{ +static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) { luaL_checkstack(L, nup + 1, "too many upvalues"); - for (; l->name != NULL; l++) - { /* fill the table with given functions */ + for (; l->name != NULL; l++) { /* fill the table with given functions */ int i; lua_pushstring(L, l->name); for (i = 0; i < nup; i++) /* copy upvalues to the top */ @@ -43,112 +53,102 @@ static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) ondemand::parser ondemand_parser; simdjson::padded_string jsonbuffer; +thread_local simdjson::builder::string_builder *encode_buffer = nullptr; // Reused across encode() calls +thread_local size_t encode_buffer_size = 0; // Track current buffer size -template -void convert_ondemand_element_to_table(lua_State *L, T &element) -{ +template void convert_ondemand_element_to_table(lua_State *L, T &element) { static_assert(std::is_base_of::value || std::is_base_of::value, "type parameter must be document or value"); - switch (element.type()) - { - - case ondemand::json_type::array: - { - int count = 1; - lua_newtable(L); - - for (ondemand::value child : element.get_array()) - { - lua_pushinteger(L, count); - convert_ondemand_element_to_table(L, child); - lua_settable(L, -3); - count = count + 1; - } - break; - } - - case ondemand::json_type::object: - lua_newtable(L); - for (ondemand::field field : element.get_object()) - { - std::string_view s = field.unescaped_key(); - lua_pushlstring(L, s.data(), s.size()); - convert_ondemand_element_to_table(L, field.value()); - lua_settable(L, -3); - } - break; + switch (element.type()) { + case ondemand::json_type::array: { + int count = 1; + lua_newtable(L); - case ondemand::json_type::number: - { - ondemand::number number = element.get_number(); - ondemand::number_type number_type = number.get_number_type(); - switch (number_type) - { - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: - lua_pushnumber(L, element.get_double()); + for (ondemand::value child : element.get_array()) { + lua_pushinteger(L, count); + convert_ondemand_element_to_table(L, child); + lua_settable(L, -3); + count = count + 1; + } break; + } - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: - lua_pushinteger(L, element.get_int64()); + case ondemand::json_type::object: + lua_newtable(L); + for (ondemand::field field : element.get_object()) { + std::string_view s = field.unescaped_key(); + lua_pushlstring(L, s.data(), s.size()); + convert_ondemand_element_to_table(L, field.value()); + lua_settable(L, -3); + } break; - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: - { -// a uint64 can be greater than an int64, so we must check how large and pass as a number -// if larger but LUA_MAXINTEGER (which is only defined in 5.3+) + case ondemand::json_type::number: { + ondemand::number number = element.get_number(); + ondemand::number_type number_type = number.get_number_type(); + switch (number_type) { + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: + lua_pushnumber(L, element.get_double()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: + lua_pushinteger(L, element.get_int64()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: { +// a uint64 can be greater than an int64, so we must check how large and pass as +// a number if larger but LUA_MAXINTEGER (which is only defined in 5.3+) #if defined(LUA_MAXINTEGER) - uint64_t actual_value = element.get_uint64(); - if (actual_value > LUA_MAXINTEGER) - { - lua_pushnumber(L, actual_value); - } - else - { - lua_pushinteger(L, actual_value); - } + uint64_t actual_value = element.get_uint64(); + if (actual_value > LUA_MAXINTEGER) { + lua_pushnumber(L, actual_value); + } else { + lua_pushinteger(L, actual_value); + } #else - lua_pushnumber(L, element.get_double()); + lua_pushnumber(L, element.get_double()); #endif + break; + } + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: + lua_pushnumber(L, element.get_double()); + break; + } break; } - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: - lua_pushnumber(L, element.get_double()); + case ondemand::json_type::string: { + std::string_view s = element.get_string(); + lua_pushlstring(L, s.data(), s.size()); break; } - break; - } - case ondemand::json_type::string: - { - std::string_view s = element.get_string(); - lua_pushlstring(L, s.data(), s.size()); - break; - } - - case ondemand::json_type::boolean: - lua_pushboolean(L, element.get_bool()); - break; + case ondemand::json_type::boolean: + lua_pushboolean(L, element.get_bool()); + break; - case ondemand::json_type::null: - // calling is_null().value() will trigger an exception if the value is invalid - if (element.is_null().value()) - { - lua_pushlightuserdata(L, NULL); - } - break; + case ondemand::json_type::null: + // calling is_null().value() will trigger an exception if the value + // is invalid + if (element.is_null().value()) { + lua_pushlightuserdata(L, NULL); + } + break; - case ondemand::json_type::unknown: - default: - luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported type encountered"); - break; + case ondemand::json_type::unknown: + default: + luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported " + "type " + "encountered"); + break; } } -// from https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding +// from +// https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding // Returns the default size of the page in bytes on this system. -long page_size() -{ +long page_size() { #ifdef _WIN32 SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); @@ -162,71 +162,54 @@ long page_size() // allows us to reuse a json buffer pretty safely // Returns true if the buffer + len + simdjson::SIMDJSON_PADDING crosses the // page boundary. -bool need_allocation(const char *buf, size_t len) -{ - return ((reinterpret_cast(buf + len - 1) % page_size()) < - simdjson::SIMDJSON_PADDING); +bool need_allocation(const char *buf, size_t len) { + return ((reinterpret_cast(buf + len - 1) % page_size()) < simdjson::SIMDJSON_PADDING); } -simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len, - simdjson::padded_string &jsonbuffer) -{ - if (need_allocation(buf, len)) - { // unlikely case +simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len, simdjson::padded_string &jsonbuffer) { + if (need_allocation(buf, len)) { // unlikely case jsonbuffer = simdjson::padded_string(buf, len); return jsonbuffer; - } - else - { // no reallcation needed (very likely) - return simdjson::padded_string_view(buf, len, - len + simdjson::SIMDJSON_PADDING); + } else { // no reallcation needed (very likely) + return simdjson::padded_string_view(buf, len, len + simdjson::SIMDJSON_PADDING); } } -static int parse(lua_State *L) -{ +static int parse(lua_State *L) { size_t json_str_len; const char *json_str = luaL_checklstring(L, 1, &json_str_len); ondemand::document doc; - try - { + try { // makes a padded_string_view for a bit of quickness! doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer)); convert_ondemand_element_to_table(L, doc); - } - catch (simdjson::simdjson_error &error) - { + } catch (simdjson::simdjson_error &error) { luaL_error(L, error.what()); } return 1; } -static int parse_file(lua_State *L) -{ +static int parse_file(lua_State *L) { const char *json_file = luaL_checkstring(L, 1); padded_string json_string; ondemand::document doc; - try - { + try { json_string = padded_string::load(json_file); doc = ondemand_parser.iterate(json_string); convert_ondemand_element_to_table(L, doc); - } - catch (simdjson::simdjson_error &error) - { + } catch (simdjson::simdjson_error &error) { luaL_error(L, error.what()); } return 1; } -static int active_implementation(lua_State *L) -{ +static int active_implementation(lua_State *L) { const auto &implementation = simdjson::get_active_implementation(); std::string name = implementation->name(); const std::string description = implementation->description(); @@ -237,113 +220,503 @@ static int active_implementation(lua_State *L) return 1; } +// Add forward declaration near the top after includes +static void serialize_data(lua_State *L, int current_depth, int max_depth, simdjson::builder::string_builder &builder); + +// Helper function to parse encode options from Lua table +static void parse_encode_options(lua_State *L, int table_index, int &max_depth, size_t &desired_buffer_size) { + // Check for maxDepth in options table + lua_getfield(L, table_index, "maxDepth"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + luaL_error(L, "maxDepth option must be a number"); + } + max_depth = lua_tointeger(L, -1); + if (max_depth < 1) { + luaL_error(L, "maxDepth must be at least 1"); + } + } + lua_pop(L, 1); + + // Check for bufferSize in options table + lua_getfield(L, table_index, "bufferSize"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + luaL_error(L, "bufferSize option must be a number"); + } + int buffer_size = lua_tointeger(L, -1); + if (buffer_size < 1) { + luaL_error(L, "bufferSize must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + luaL_error(L, "bufferSize must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + desired_buffer_size = buffer_size; + } + lua_pop(L, 1); +} + +// Helper function to get max encode depth from registry (with caching for performance) +static int get_max_depth(lua_State *L) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + int max_depth = DEFAULT_MAX_ENCODE_DEPTH; + if (lua_isnumber(L, -1)) { + max_depth = lua_tointeger(L, -1); + } + lua_pop(L, 1); + + return max_depth; +} + +// Helper function to set max encode depth in registry (and update cache) +static void set_max_depth(lua_State *L, int max_depth) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_pushinteger(L, max_depth); + lua_settable(L, LUA_REGISTRYINDEX); +} + +// Helper function to get encode buffer size from registry (with caching for performance) +static size_t get_encode_buffer_size(lua_State *L) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; + if (lua_isnumber(L, -1)) { + buffer_size = lua_tointeger(L, -1); + } + lua_pop(L, 1); + + return buffer_size; +} + +// Helper function to set encode buffer size in registry (and update cache) +static void set_encode_buffer_size(lua_State *L, size_t buffer_size) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_pushinteger(L, buffer_size); + lua_settable(L, LUA_REGISTRYINDEX); +} + +// Check if table on stack top is a valid array and return its length +// Returns -1 if not an array, otherwise returns maximum index +static int get_table_array_size(lua_State *L) { + double key_num; + int max_index = 0; + + lua_pushnil(L); + while (lua_next(L, -2) != 0) { + // Check if key is a number + if (lua_type(L, -2) == LUA_TNUMBER) { + key_num = lua_tonumber(L, -2); + // Check if it's a positive integer + if (std::floor(key_num) == key_num && key_num >= 1) { + if (static_cast(key_num) > max_index) { + max_index = static_cast(key_num); + } + lua_pop(L, 1); + continue; + } + } + + // Non-integer key found - not an array + lua_pop(L, 2); + return -1; + } + + // Return max_index if we found any valid integer keys (allows sparse arrays) + return max_index; +} + +// Helper function to format a number as a string +// Returns pointer to thread-local buffer and length +inline std::pair format_number_as_string(lua_State *L, int index) { + thread_local char buffer[ENCODE_NUMBER_BUFFER_SIZE]; + size_t len; + + // JSON numbers are represented as doubles, which have limited precision + // for integers beyond 2^53. Check this first regardless of Lua version. +#if defined(LUA_MAXINTEGER) + const double max_safe_int = LUA_MAXINTEGER; +#else + const double max_safe_int = 9007199254740992.0; // 2^53 +#endif + +#if LUA_VERSION_NUM >= 503 + // Lua 5.3+ has native integer type + if (lua_isinteger(L, index)) { + lua_Integer num = lua_tointeger(L, index); + // Check if the integer fits safely in a JSON number (double) + if (num > -max_safe_int && num < max_safe_int) { + // Optimized: Use std::to_string for faster integer conversion + std::string str = std::to_string(num); + len = str.size(); + if (len < sizeof(buffer)) { + memcpy(buffer, str.c_str(), len + 1); // Include null terminator for safety + return {buffer, len}; + } else { + // Fallback for very large numbers (rare) + len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); + return {buffer, len}; + } + } + // Too large for safe integer representation, format as float + len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); + return {buffer, len}; + } +#else + // For Lua 5.1/5.2, check if the number is an integer value + { + double num = lua_tonumber(L, index); + if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) { + if (num > -max_safe_int && num < max_safe_int) { + // Optimized: Use std::to_string for integers + std::string str = std::to_string(static_cast(num)); + len = str.size(); + if (len < sizeof(buffer)) { + memcpy(buffer, str.c_str(), len + 1); + return {buffer, len}; + } else { + len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); + return {buffer, len}; + } + } + } + } +#endif + + // For floats: Use snprintf to maintain original formatting (e.g., preserve trailing zeros) + lua_Number num = lua_tonumber(L, index); + len = snprintf(buffer, sizeof(buffer), "%.14g", num); + return {buffer, len}; +} + +// Serialize a Lua boolean as a JSON boolean +inline void serialize_append_bool(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + // check if it is really a boolean + if (lua_isboolean(L, lindex)) { + if (lua_toboolean(L, lindex)) { +// Use append_raw with string_view for batched append (more efficient than multiple char appends) +#if __cplusplus >= 202002L + builder.append(true); +#else + builder.append_raw(std::string_view("true", 4)); +#endif + } else { +#if __cplusplus >= 202002L + builder.append(false); +#else + builder.append_raw(std::string_view("false", 5)); +#endif + } + } else { + builder.append_null(); + } +}; + +// Serialize a Lua number as a JSON number +static void serialize_append_number(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + auto num_result = format_number_as_string(L, lindex); + const char *num_str = num_result.first; + size_t len = num_result.second; + // Use append_raw with string_view for numbers (no quotes) + builder.append_raw(std::string_view(num_str, len)); +}; + +// Serialize a Lua string with proper JSON escaping +static void serialize_append_string(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + size_t len; + const char *str = lua_tolstring(L, lindex, &len); + builder.escape_and_append_with_quotes(str); +}; + +// Serialize a Lua table with integer indices as a JSON array, handling sparse arrays with nulls for missing indices. +static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int table_index, int array_size, + int current_depth, int max_depth) { + bool first = true; + // Get the actual stack index if using relative indexing (but not registry) + if (table_index < 0 && table_index != LUA_REGISTRYINDEX) { + table_index = lua_gettop(L) + table_index + 1; + } + + builder.start_array(); + + for (int i = 1; i <= array_size; i++) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Push the value at index i onto the stack (or nil if missing) + lua_rawgeti(L, table_index, i); + + // If the value is nil, encode as null; otherwise, serialize normally + if (lua_isnil(L, -1)) { + builder.append_null(); + } else { + serialize_data(L, current_depth, max_depth, builder); + } + + // Pop the value from the stack + lua_pop(L, 1); + } + + builder.end_array(); +} + +// Serialize a Lua table as a JSON object. +static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int current_depth, int max_depth) { + builder.start_object(); + bool first = true; + + // Start iteration with nil key + lua_pushnil(L); + + while (lua_next(L, -2) != 0) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Cache key type to avoid multiple lua_type calls + int key_type = lua_type(L, -2); + + // Serialize the key + if (key_type == LUA_TSTRING) { + size_t key_len; + const char *key = lua_tolstring(L, -2, &key_len); + // Always use the proper escape function for string keys + builder.escape_and_append_with_quotes(std::string_view(key, key_len)); + } else if (key_type == LUA_TNUMBER) { + auto key_result = format_number_as_string(L, -2); + // Numeric keys are formatted as strings with quotes + builder.escape_and_append_with_quotes(std::string_view(key_result.first, key_result.second)); + } else { + const char *type_name = lua_typename(L, key_type); + luaL_error(L, "unsupported key type in table for serialization: %s", type_name); + } + + builder.append_colon(); + + // Serialize the value (it's already on top of stack) + serialize_data(L, current_depth, max_depth, builder); + // Pop value, keep key for next iteration + lua_pop(L, 1); + } + + builder.end_object(); +} + +// Main serialization dispatcher: converts Lua values to JSON based on their type +static void serialize_data(lua_State *L, int current_depth, int max_depth, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder) { + // Check depth to prevent stack overflow + if (current_depth > max_depth) { + luaL_error(L, "maximum nesting depth exceeded (limit: %d)", max_depth); + } + + switch (lua_type(L, -1)) { + case LUA_TSTRING: { + serialize_append_string(L, builder, -1); + } break; + case LUA_TNUMBER: { + serialize_append_number(L, builder, -1); + } break; + case LUA_TBOOLEAN: { + serialize_append_bool(L, builder, -1); + } break; + case LUA_TTABLE: { + current_depth++; + int array_size = get_table_array_size(L); + if (array_size > 0) { + // Handle as array + serialize_append_array(L, builder, -1, array_size, current_depth, max_depth); + } else { + // Handle as object + serialize_append_object(L, builder, current_depth, max_depth); + } + } break; + case LUA_TNIL: { + // Treat Lua nil as JSON null + builder.append_null(); + } break; + case LUA_TLIGHTUSERDATA: { + // Treat lightuserdata NULL as JSON null + if (lua_touserdata(L, -1) == NULL) { + builder.append_null(); + } else { + luaL_error(L, "unsupported lightuserdata value for serialization"); + } + } break; + default: { + const char *type_name = lua_typename(L, lua_type(L, -1)); + luaL_error(L, "unsupported Lua data type for serialization: %s", type_name); + } + } +}; + +// encode Lua data types into JSON string +static int encode(lua_State *L) { + // the output string once the building is done. + std::string_view json; + + int num_args = lua_gettop(L); + luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments"); + + // Get max_depth and buffer_size from options table if provided, otherwise use global settings + int max_depth = get_max_depth(L); + size_t desired_buffer_size = get_encode_buffer_size(L); + + if (num_args == 2) { + luaL_checktype(L, 2, LUA_TTABLE); + parse_encode_options(L, 2, max_depth, desired_buffer_size); + lua_pop(L, 1); // Remove options table, leaving data on top + } + + // Get desired buffer size and recreate buffer if size changed + if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) { + if (encode_buffer != nullptr) { + delete encode_buffer; + } + encode_buffer = new SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder(desired_buffer_size); + encode_buffer_size = desired_buffer_size; + } + + // Reuse buffer - clear it but retain capacity, this should mean successive calls + // are efficient in most cases. + encode_buffer->clear(); + + serialize_data(L, 0, max_depth, *encode_buffer); + auto v_err = encode_buffer->view().get(json); + if (v_err) { + return luaL_error(L, "failed to get JSON view from buffer: %s", simdjson::error_message(v_err)); + } + + // validate utf-8 + if (!encode_buffer->validate_unicode()) { + return luaL_error(L, "encoded JSON contains invalid UTF-8 sequences"); + } + + lua_pushlstring(L, json.data(), json.size()); + return 1; +}; + +// Set maximum nesting depth for encoding +static int setMaxEncodeDepth(lua_State *L) { + int max_depth = luaL_checkinteger(L, 1); + if (max_depth < 1) { + return luaL_error(L, "Maximum encode depth must be at least 1"); + } + set_max_depth(L, max_depth); + return 0; +} + +// Get current maximum nesting depth for encoding +static int getMaxEncodeDepth(lua_State *L) { + lua_pushinteger(L, get_max_depth(L)); + return 1; +} + +// Set encode buffer initial capacity in bytes +static int setEncodeBufferSize(lua_State *L) { + int buffer_size = luaL_checkinteger(L, 1); + if (buffer_size < 1) { + return luaL_error(L, "Encode buffer size must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + return luaL_error(L, "Encode buffer size must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + set_encode_buffer_size(L, buffer_size); + return 0; +} + +// Get encode buffer initial capacity in bytes +static int getEncodeBufferSize(lua_State *L) { + lua_pushinteger(L, get_encode_buffer_size(L)); + return 1; +} + // ParsedObject as C++ class #define LUA_MYOBJECT "ParsedObject" -class ParsedObject -{ +class ParsedObject { private: simdjson::padded_string json_string; ondemand::document doc; std::unique_ptr parser; public: - ParsedObject(const char *json_file) - : json_string(padded_string::load(json_file)), - parser(new ondemand::parser{}) - { + ParsedObject(const char *json_file) : json_string(padded_string::load(json_file)), parser(new ondemand::parser{}) { this->doc = this->parser.get()->iterate(json_string); } - ParsedObject(const char *json_str, size_t json_str_len) - : json_string(json_str, json_str_len), - parser(new ondemand::parser{}) - { + ParsedObject(const char *json_str, size_t json_str_len) : json_string(json_str, json_str_len), parser(new ondemand::parser{}) { this->doc = this->parser.get()->iterate(json_string); } - ~ParsedObject() {} - ondemand::document *get_doc() { return &(this->doc); } + ~ParsedObject() { + } + ondemand::document *get_doc() { + return &(this->doc); + } }; -static int ParsedObject_delete(lua_State *L) -{ +static int ParsedObject_delete(lua_State *L) { delete *reinterpret_cast(lua_touserdata(L, 1)); return 0; } -static int ParsedObject_open(lua_State *L) -{ +static int ParsedObject_open(lua_State *L) { size_t json_str_len; const char *json_str = luaL_checklstring(L, 1, &json_str_len); - try - { - ParsedObject **parsedObject = - (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); + try { + ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); *parsedObject = new ParsedObject(json_str, json_str_len); luaL_getmetatable(L, LUA_MYOBJECT); lua_setmetatable(L, -2); - } - catch (simdjson::simdjson_error &error) - { + } catch (simdjson::simdjson_error &error) { luaL_error(L, error.what()); } return 1; } -static int ParsedObject_open_file(lua_State *L) -{ +static int ParsedObject_open_file(lua_State *L) { const char *json_file = luaL_checkstring(L, 1); - try - { - ParsedObject **parsedObject = - (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); + try { + ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); *parsedObject = new ParsedObject(json_file); luaL_getmetatable(L, LUA_MYOBJECT); lua_setmetatable(L, -2); - } - catch (simdjson::simdjson_error &error) - { + } catch (simdjson::simdjson_error &error) { luaL_error(L, error.what()); } return 1; } -static int ParsedObject_atPointer(lua_State *L) -{ - ondemand::document *document = - (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT))) - ->get_doc(); +static int ParsedObject_atPointer(lua_State *L) { + ondemand::document *document = (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))->get_doc(); const char *pointer = luaL_checkstring(L, 2); - try - { + try { ondemand::value returned_element = document->at_pointer(pointer); convert_ondemand_element_to_table(L, returned_element); - } - catch (simdjson::simdjson_error &error) - { + } catch (simdjson::simdjson_error &error) { luaL_error(L, error.what()); } return 1; } -static int ParsedObject_newindex(lua_State *L) -{ - luaL_error(L, "This should be treated as a read-only table. We may one day add array access for the elements, and it'll likely not be modifiable."); +static int ParsedObject_newindex(lua_State *L) { + luaL_error(L, "This should be treated as a read-only table. We may one day " + "add array " + "access for the elements, and it'll likely not be modifiable."); return 1; } static const struct luaL_Reg arraylib_m[] = { - {"at", ParsedObject_atPointer}, - {"atPointer", ParsedObject_atPointer}, - {"__newindex", ParsedObject_newindex}, - {"__gc", ParsedObject_delete}, - {NULL, NULL}}; - -int luaopen_simdjson(lua_State *L) -{ + {"at", ParsedObject_atPointer}, {"atPointer", ParsedObject_atPointer}, {"__newindex", ParsedObject_newindex}, {"__gc", ParsedObject_delete}, {NULL, NULL}}; + +int luaopen_simdjson(lua_State *L) { luaL_newmetatable(L, LUA_MYOBJECT); lua_pushvalue(L, -1); /* duplicates the metatable */ lua_setfield(L, -2, "__index"); diff --git a/src/luasimdjson.h b/src/luasimdjson.h index 7f92718..85d91af 100644 --- a/src/luasimdjson.h +++ b/src/luasimdjson.h @@ -7,20 +7,30 @@ #endif extern "C" { - static int parse(lua_State*); - static int parse_file(lua_State*); - static int active_implementation(lua_State*); - static int ParsedObject_open(lua_State*); - static int ParsedObject_open_file(lua_State*); + static int parse(lua_State *); + static int parse_file(lua_State *); + static int active_implementation(lua_State *); + static int ParsedObject_open(lua_State *); + static int ParsedObject_open_file(lua_State *); + static int encode(lua_State *); + static int setMaxEncodeDepth(lua_State *); + static int getMaxEncodeDepth(lua_State *); + static int setEncodeBufferSize(lua_State *); + static int getEncodeBufferSize(lua_State *); - static const struct luaL_Reg luasimdjson[] = { - {"parse", parse}, - {"parseFile", parse_file}, - {"activeImplementation", active_implementation}, - {"open", ParsedObject_open}, - {"openFile", ParsedObject_open_file}, + static const struct luaL_Reg luasimdjson[] = { + {"parse", parse}, + {"parseFile", parse_file}, + {"activeImplementation", active_implementation}, + {"open", ParsedObject_open}, + {"openFile", ParsedObject_open_file}, + {"encode", encode}, + {"setMaxEncodeDepth", setMaxEncodeDepth}, + {"getMaxEncodeDepth", getMaxEncodeDepth}, + {"setEncodeBufferSize", setEncodeBufferSize}, + {"getEncodeBufferSize", getEncodeBufferSize}, - {NULL, NULL}, - }; - LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State*); + {NULL, NULL}, + }; + LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State *); }