From 0bc4c374649b01e7f9bcf14b3899d3bd0535e72f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 17:46:52 +1100 Subject: [PATCH 01/21] initial commit for adding encode() using the simdjson::string_builder pattern --- .clang-format | 16 + spec/compile_spec.lua | 20 +- spec/encode_security_spec.lua | 396 +++++++++++++++ spec/encode_spec.lua | 414 +++++++++++++++ spec/performance_spec.lua | 399 +++++++++++++++ src/luasimdjson.cpp | 930 +++++++++++++++++++++++----------- src/luasimdjson.h | 10 + 7 files changed, 1892 insertions(+), 293 deletions(-) create mode 100644 .clang-format create mode 100644 spec/encode_security_spec.lua create mode 100644 spec/encode_spec.lua create mode 100644 spec/performance_spec.lua diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..91c33a0 --- /dev/null +++ b/.clang-format @@ -0,0 +1,16 @@ +# see https://clang.llvm.org/docs/ClangFormatStyleOptions.html +--- +Language: Cpp +Standard: c++11 + +IndentWidth: 4 +TabWidth: 4 +UseTab: Always +ColumnLimit: 100 +BreakBeforeBraces: Attach +IndentExternBlock: Indent +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AllowShortFunctionsOnASingleLine: None +IndentCaseLabels: true +PointerAlignment: Left diff --git a/spec/compile_spec.lua b/spec/compile_spec.lua index d9627e2..f5a46fa 100644 --- a/spec/compile_spec.lua +++ b/spec/compile_spec.lua @@ -81,9 +81,8 @@ end) local major, minor = _VERSION:match('([%d]+)%.(%d+)') if tonumber(major) >= 5 and tonumber(minor) >= 3 then - describe("Make sure ints and floats parse correctly", function () + describe("Make sure ints and floats parse correctly", function() it("should handle decoding numbers appropriately", function() - local numberCheck = simdjson.parse([[ { "float": 1.2, @@ -101,7 +100,6 @@ if tonumber(major) >= 5 and tonumber(minor) >= 3 then assert.are.same("float", math.type(numberCheck["one_above_max_signed_integer"])) assert.are.same("integer", math.type(numberCheck["min_unsigned_integer"])) assert.are.same("float", math.type(numberCheck["max_unsigned_integer"])) - end) end) end @@ -129,3 +127,19 @@ describe("Make sure invalid files are not accepted", function() end) end end) + +describe("Active implementation function", function() + it("should return a valid implementation name", function() + local impl = simdjson.activeImplementation() + assert.is_not_nil(impl) + assert.is_string(impl) + assert.is_truthy(impl:match("%w+")) -- Contains at least one word character + assert.is_true(#impl > 0) -- Non-empty string + end) + + it("should contain implementation details", function() + local impl = simdjson.activeImplementation() + -- Implementation string should have format like "arm64 (ARM NEON)" or "haswell (Intel AVX2)" + assert.is_truthy(impl:match("%(.*%)")) -- Contains parentheses with description + end) +end) diff --git a/spec/encode_security_spec.lua b/spec/encode_security_spec.lua new file mode 100644 index 0000000..ebdc810 --- /dev/null +++ b/spec/encode_security_spec.lua @@ -0,0 +1,396 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + +describe("encode() security and edge cases", function() + describe("String injection and escaping", function() + it("should properly escape quote characters", function() + local data = { value = 'test"with"quotes' } + local encoded = simdjson.encode(data) + assert.is_true(encoded:find('\\"') ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end) + + it("should properly escape backslashes", function() + local data = { value = 'test\\with\\backslashes' } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end) + + it("should properly escape control characters", function() + local test_cases = { + { str = "line1\nline2", name = "newline" }, + { str = "tab\there", name = "tab" }, + { str = "return\rhere", name = "carriage return" }, + { str = "backspace\bhere", name = "backspace" }, + { str = "form\ffeed", name = "form feed" }, + } + + for _, test in ipairs(test_cases) do + local data = { value = test.str } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end + end) + + it("should handle strings with null bytes", function() + -- Note: null bytes may be truncated as C strings are null-terminated + local data = { value = "before\x00after" } + local encoded = simdjson.encode(data) + -- Verify encoding doesn't crash and produces valid JSON + assert.is_true(encoded:find("before") ~= nil) + local decoded = simdjson.parse(encoded) + -- String may be truncated at null byte + assert.is_true(decoded.value == "before" or decoded.value == "before\x00after") + end) + + it("should handle common control characters safely", function() + -- Test specific control characters that should be properly escaped + local test_chars = { + { char = "\t", name = "tab", escape = "\\t" }, + { char = "\n", name = "newline", escape = "\\n" }, + { char = "\r", name = "carriage return", escape = "\\r" }, + { char = "\b", name = "backspace", escape = "\\b" }, + { char = "\f", name = "form feed", escape = "\\f" }, + } + + for _, test in ipairs(test_chars) do + local data = { value = "before" .. test.char .. "after" } + local encoded = simdjson.encode(data) + -- Verify the character is properly escaped in JSON + assert.is_true(encoded:find("before") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.value, decoded.value) + end + end) + end) + + describe("Potential XSS and HTML injection", function() + it("should handle HTML/XML special characters", function() + local data = { + html = "", + xml = "", + tags = "
test
", + entities = "<>&"'" + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.html, decoded.html) + assert.are.same(data.xml, decoded.xml) + assert.are.same(data.tags, decoded.tags) + assert.are.same(data.entities, decoded.entities) + end) + + it("should not execute embedded JavaScript", function() + local malicious = { + js = "'; alert('xss'); //", + comment = "/* comment */ code", + injection = "\"); malicious(); //" + } + local encoded = simdjson.encode(malicious) + -- Verify it's properly escaped + assert.is_true(encoded:find("alert") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(malicious.js, decoded.js) + end) + end) + + describe("Key injection and object vulnerabilities", function() + it("should handle keys with special characters", function() + local data = { + ["key'with'quotes"] = "value1", + ['key"with"doublequotes'] = "value2", + ["key\\with\\backslash"] = "value3", + ["key\nwith\nnewline"] = "value4", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data["key'with'quotes"], decoded["key'with'quotes"]) + assert.are.same(data['key"with"doublequotes'], decoded['key"with"doublequotes']) + end) + + it("should handle prototype pollution keys", function() + -- Common prototype pollution attack keys + local data = { + ["__proto__"] = "should_be_safe", + ["constructor"] = "safe_value", + ["prototype"] = "another_safe" + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data["__proto__"], decoded["__proto__"]) + assert.are.same(data["constructor"], decoded["constructor"]) + end) + + it("should handle empty string keys", function() + local data = { [""] = "empty_key_value" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data[""], decoded[""]) + end) + + it("should handle very long keys", function() + local long_key = string.rep("a", 10000) + local data = { [long_key] = "value" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data[long_key], decoded[long_key]) + end) + end) + + describe("Number vulnerabilities", function() + it("should handle very large integers without overflow", function() + local data = { + max_int = 9007199254740991, -- Max safe integer in JavaScript + min_int = -9007199254740991, + large_pos = 9223372036854775807, -- Max int64 + large_neg = -9223372036854775808, -- Min int64 + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + -- Allow for precision loss on very large numbers + assert.is_true(math.abs(decoded.max_int - data.max_int) < 1) + end) + + it("should handle floating point edge cases", function() + local data = { + zero = 0.0, + very_small = 1e-308, + very_large = 1e308, + negative = -123.456, + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.zero, decoded.zero) + end) + + it("should handle many decimal places", function() + local data = { pi = 3.14159265358979323846264338327950288 } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + -- Check that precision is maintained reasonably + assert.is_true(math.abs(decoded.pi - 3.141592653589793) < 0.000001) + end) + end) + + describe("Nested structure vulnerabilities", function() + it("should enforce max depth to prevent stack overflow", function() + -- Create a very deep structure + local function create_deep(depth) + if depth == 0 then + return "bottom" + end + return { nested = create_deep(depth - 1) } + end + + local deep = create_deep(50) + + -- Should succeed with high limit + local success1 = pcall(function() + simdjson.encode(deep, 100) + end) + assert.is_true(success1) + + -- Should fail with low limit + local success2 = pcall(function() + simdjson.encode(deep, 10) + end) + assert.is_false(success2) + end) + + it("should handle wide objects without issues", function() + -- Create object with many keys + local wide = {} + for i = 1, 1000 do + wide["key" .. i] = "value" .. i + end + local encoded = simdjson.encode(wide) + local decoded = simdjson.parse(encoded) + assert.are.same(wide["key500"], decoded["key500"]) + end) + + it("should handle wide arrays without issues", function() + local wide = {} + for i = 1, 1000 do + wide[i] = i + end + local encoded = simdjson.encode(wide) + local decoded = simdjson.parse(encoded) + assert.are.same(#wide, #decoded) + assert.are.same(wide[500], decoded[500]) + end) + end) + + describe("Memory and performance vulnerabilities", function() + it("should handle very long strings", function() + -- Create a 1MB string + local long_string = string.rep("x", 1024 * 1024) + local data = { large = long_string } + local encoded = simdjson.encode(data) + assert.is_true(#encoded > 1024 * 1024) + local decoded = simdjson.parse(encoded) + assert.are.same(#long_string, #decoded.large) + end) + + it("should handle arrays with many elements", function() + local large_array = {} + for i = 1, 10000 do + large_array[i] = i + end + local data = { arr = large_array } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(#large_array, #decoded.arr) + assert.are.same(large_array[5000], decoded.arr[5000]) + end) + + it("should handle mixed large structure", function() + local data = { + strings = {}, + numbers = {}, + objects = {} + } + for i = 1, 100 do + data.strings[i] = string.rep("test", 100) + data.numbers[i] = i * 1.5 + data.objects[i] = { id = i, name = "item" .. i } + end + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(#data.strings, #decoded.strings) + end) + end) + + describe("Unicode and encoding vulnerabilities", function() + it("should handle various Unicode characters", function() + local data = { + emoji = "😀🎉🔥💯", + chinese = "你好世界", + arabic = "مرحبا", + russian = "Привет", + mixed = "Hello 世界 🌍", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.emoji, decoded.emoji) + assert.are.same(data.chinese, decoded.chinese) + assert.are.same(data.mixed, decoded.mixed) + end) + + it("should handle Unicode escapes", function() + -- String with Unicode escape sequences + local data = { unicode = "test\\u0041\\u0042\\u0043" } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.unicode, decoded.unicode) + end) + + it("should handle zero-width and special Unicode", function() + local data = { + zero_width = "test\u{200B}here", -- Zero-width space + rtl_mark = "test\u{200F}mark", -- Right-to-left mark + combining = "e\u{0301}", -- e with acute accent (combining) + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(data.zero_width, decoded.zero_width) + end) + end) + + describe("Malformed or unexpected input", function() + it("should handle empty structures", function() + local data = { + empty_object = {}, + empty_array = {}, + empty_string = "", + } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(type(decoded.empty_object), "table") + assert.are.same(decoded.empty_string, "") + end) + + it("should handle boolean edge cases", function() + local data = { + true_val = true, + false_val = false, + bool_array = { true, false, true, false }, + } + local encoded = simdjson.encode(data) + assert.is_true(encoded:find("true") ~= nil) + assert.is_true(encoded:find("false") ~= nil) + local decoded = simdjson.parse(encoded) + assert.are.same(data.true_val, decoded.true_val) + assert.are.same(data.false_val, decoded.false_val) + end) + + it("should consistently handle repeated encoding", function() + local data = { test = "value", num = 42 } + local encoded1 = simdjson.encode(data) + local encoded2 = simdjson.encode(data) + local encoded3 = simdjson.encode(data) + + local decoded1 = simdjson.parse(encoded1) + local decoded2 = simdjson.parse(encoded2) + local decoded3 = simdjson.parse(encoded3) + + assert.are.same(decoded1.test, decoded2.test) + assert.are.same(decoded2.test, decoded3.test) + end) + end) + + describe("SQL and NoSQL injection patterns", function() + it("should safely handle SQL injection patterns", function() + local injection_patterns = { + "'; DROP TABLE users; --", + "1' OR '1'='1", + "admin'--", + "' OR 1=1--", + "'; EXEC sp_MSForEachTable 'DROP TABLE ?'; --", + } + + for _, pattern in ipairs(injection_patterns) do + local data = { query = pattern } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(pattern, decoded.query) + end + end) + + it("should safely handle NoSQL injection patterns", function() + local nosql_patterns = { + "{'$gt': ''}", + "{'$ne': null}", + "{'$where': 'this.password.length > 0'}", + } + + for _, pattern in ipairs(nosql_patterns) do + local data = { filter = pattern } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(pattern, decoded.filter) + end + end) + end) + + describe("Path traversal and file inclusion", function() + it("should handle path traversal strings", function() + local paths = { + "../../etc/passwd", + "..\\..\\windows\\system32", + "/etc/passwd", + "C:\\Windows\\System32\\config\\SAM", + "../../../../../etc/shadow", + } + + for _, path in ipairs(paths) do + local data = { path = path } + local encoded = simdjson.encode(data) + local decoded = simdjson.parse(encoded) + assert.are.same(path, decoded.path) + end + end) + end) +end) diff --git a/spec/encode_spec.lua b/spec/encode_spec.lua new file mode 100644 index 0000000..dbe9471 --- /dev/null +++ b/spec/encode_spec.lua @@ -0,0 +1,414 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + + +describe("encode numbers correctly", function() + it("should encode numbers the same as cjson", function() + local testData = { + float = 1.2, + min_signed_integer = -9223372036854775808, + max_signed_integer = 9223372036854775807, + one_above_max_signed_integer = 9223372036854775808, + min_unsigned_integer = 0, + max_unsigned_integer = 18446744073709551615 + } + + for k, v in pairs(testData) do + local td = { [k] = v } + local simdjsonEncoded = simdjson.encode(td) + local cjsonEncoded = cjson.encode(td) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + + local cjsonEncode = cjson.encode(testData) + local simdjsonEncode = simdjson.encode(testData) + assert.are.same(cjsonEncode, simdjsonEncode) + end) + + it("should encode special float values", function() + local testCases = { + { value = 0.0, name = "zero" }, + { value = 3.14159265358979, name = "pi" }, + { value = 2.718281828459045, name = "e" }, + { value = 1.23e-10, name = "small scientific" }, + { value = 1.23e10, name = "large scientific" }, + { value = -123.456, name = "negative float" }, + } + + for _, test in ipairs(testCases) do + local data = { value = test.value } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode array of numbers", function() + local numbers = { 1, 2, 3, 4, 5, -1, -2, 0, 1.5, 2.7 } + local data = { numbers = numbers } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode strings correctly", function() + it("should encode simple strings", function() + local testCases = { + { str = "hello", name = "simple" }, + { str = "", name = "empty" }, + { str = "hello world", name = "with space" }, + { str = "123", name = "numeric string" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode strings with special characters", function() + local testCases = { + { str = "hello\nworld", name = "newline" }, + { str = "hello\tworld", name = "tab" }, + { str = "hello\rworld", name = "carriage return" }, + { str = "hello\"world", name = "quote" }, + { str = "hello\\world", name = "backslash" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode forward slash without escaping", function() + -- simdjson doesn't escape forward slashes (which is valid JSON) + local data = { str = "hello/world" } + local simdjsonEncoded = simdjson.encode(data) + assert.are.same('{"str":"hello/world"}', simdjsonEncoded) + end) + + it("should encode unicode strings", function() + local testCases = { + { str = "Hello 世界", name = "chinese" }, + { str = "Hello मुndi", name = "hindi" }, + { str = "Hello 🌍", name = "emoji" }, + { str = "café", name = "accented" }, + } + + for _, test in ipairs(testCases) do + local data = { str = test.str } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode array of strings", function() + local strings = { "one", "two", "three", "", "with space" } + local data = { strings = strings } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode booleans correctly", function() + it("should encode boolean values", function() + local data1 = { value = true } + assert.are.same(cjson.encode(data1), simdjson.encode(data1)) + + local data2 = { value = false } + assert.are.same(cjson.encode(data2), simdjson.encode(data2)) + end) + + it("should encode boolean arrays", function() + local bools = { true, false, true, false } + local data = { bools = bools } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode mixed boolean and other types", function() + local mixed = { true, 1, "test", false, 2.5 } + local data = { mixed = mixed } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode arrays correctly", function() + it("should encode empty arrays", function() + local data = { arr = {} } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode nested arrays", function() + local data = { + nested = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode deeply nested arrays", function() + local data = { arr = { { { { { 1 } } } } } } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode arrays with mixed types", function() + local data = { + mixed = { 1, "two", 3.0, true, false, { nested = "value" } } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) + +describe("encode objects correctly", function() + it("should encode empty objects", function() + local data = {} + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode objects with string keys", function() + local data = { + key1 = "value1", + key2 = "value2", + key3 = "value3" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + -- Note: key order may differ, so we decode and compare + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should encode objects with numeric keys", function() + local data = { + ["1"] = "one", + ["2"] = "two", + ["3"] = "three" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should encode nested objects", function() + local data = { + outer = { + middle = { + inner = { + value = "deep" + } + } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should encode objects with mixed value types", function() + local data = { + string = "value", + number = 42, + float = 3.14, + bool_true = true, + bool_false = false, + array = { 1, 2, 3 }, + object = { nested = "value" } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) +end) + +describe("encode complex json types", function() + it("should encode complex json types the same as cjson", function() + local testData = { + object = { + key1 = "value1", + key2 = 2, + key3 = { nestedKey = "nestedValue" } + }, + mixed = { + "string", + 123, + true, + { nestedArray = { 1, 2, 3 } }, + { nestedObject = { key = "value" } } + }, + mixed_complex = { + array = { "abc", 123, true }, + object = { key = "value", number = 456 }, + nested_object = { + inner_key = { 1, 2, 3, { deep_key = "deep_value" } } + } + }, + bools = { true, false, true, false } + } + + for k, v in pairs(testData) do + local td = { [k] = v } + local simdjsonEncoded = simdjson.encode(td) + local cjsonEncoded = cjson.encode(td) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end + end) + + it("should encode complex nested structures", function() + local data = { + users = { + { + id = 1, + name = "Alice", + active = true, + scores = { 95, 87, 92 } + }, + { + id = 2, + name = "Bob", + active = false, + scores = { 78, 85, 90 } + } + }, + metadata = { + version = "1.0", + count = 2 + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle arrays of objects", function() + local data = { + items = { + { id = 1, name = "Item 1" }, + { id = 2, name = "Item 2" }, + { id = 3, name = "Item 3" } + } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle objects with array values", function() + local data = { + numbers = { 1, 2, 3, 4, 5 }, + strings = { "a", "b", "c" }, + booleans = { true, false, true } + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) +end) + +describe("encode edge cases", function() + it("should handle very long strings", function() + local longString = string.rep("a", 10000) + local data = { str = longString } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle large arrays", function() + local largeArray = {} + for i = 1, 1000 do + largeArray[i] = i + end + local data = { arr = largeArray } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) + + it("should handle sparse arrays as objects", function() + local sparseArray = {} + sparseArray[1] = "first" + sparseArray[5] = "fifth" + sparseArray[10] = "tenth" + local data = { sparse = sparseArray } + + -- simdjson treats sparse arrays as objects + local simdjsonEncoded = simdjson.encode(data) + assert.is_true(simdjsonEncoded:find('"sparse"') ~= nil) + + -- Verify it can be decoded back + local decoded = simdjson.parse(simdjsonEncoded) + assert.is_not_nil(decoded.sparse) + end) + + it("should encode keys with special characters", function() + local data = { + ["key with spaces"] = "value1", + ["key-with-dashes"] = "value2", + ["key_with_underscores"] = "value3", + ["key.with.dots"] = "value4" + } + local simdjsonEncoded = simdjson.encode(data) + local cjsonEncoded = cjson.encode(data) + local simdjsonDecoded = simdjson.parse(simdjsonEncoded) + local cjsonDecoded = cjson.decode(cjsonEncoded) + assert.are.same(cjsonDecoded, simdjsonDecoded) + end) + + it("should roundtrip encode and decode", function() + local original = { + name = "Test", + value = 42, + active = true, + items = { 1, 2, 3 }, + nested = { key = "value" } + } + local encoded = simdjson.encode(original) + local decoded = simdjson.parse(encoded) + + -- Compare individual fields since table equality is by reference + assert.are.same(original.name, decoded.name) + assert.are.same(original.value, decoded.value) + assert.are.same(original.active, decoded.active) + assert.are.same(original.items[1], decoded.items[1]) + assert.are.same(original.nested.key, decoded.nested.key) + end) + + it("basic string", function() + local original = "test string" + local simdjsonEncoded = simdjson.encode(original) + local cjsonEncoded = cjson.encode(original) + assert.are.same(cjsonEncoded, simdjsonEncoded) + end) +end) diff --git a/spec/performance_spec.lua b/spec/performance_spec.lua new file mode 100644 index 0000000..7807a2a --- /dev/null +++ b/spec/performance_spec.lua @@ -0,0 +1,399 @@ +local simdjson = require("simdjson") +local cjson = require("cjson") + +-- Track wins +local simdjson_wins = 0 +local cjson_wins = 0 +local total_tests = 0 +local iterations = 10000 + +-- Helper function to measure time +local function measure_time(func, iterations) + iterations = iterations or 1 + collectgarbage("collect") -- Clean up before measurement + local start = os.clock() + for i = 1, iterations do func() end + local elapsed = os.clock() - start + return elapsed, elapsed / iterations +end + +-- Helper to format numbers +local function format_number(num) + if num < 0.001 then + return string.format("%.6f ms", num * 1000) + elseif num < 1 then + return string.format("%.3f ms", num * 1000) + else + return string.format("%.3f s", num) + end +end + +-- Helper to show comparison +local function show_comparison(name, simdjson_time, cjson_time) + local speedup = cjson_time / simdjson_time + local winner = speedup > 1 and "simdjson" or "cjson" + local ratio = speedup > 1 and speedup or (1 / speedup) + + -- Track wins + total_tests = total_tests + 1 + if winner == "simdjson" then + simdjson_wins = simdjson_wins + 1 + else + cjson_wins = cjson_wins + 1 + end + + -- Add newline before first result to separate from test marker + if total_tests == 1 then print() end + + print(string.format( + " %-30s | simdjson: %s | cjson: %s | %s is %.2fx faster", name, + format_number(simdjson_time), format_number(cjson_time), winner, + ratio)) +end + +describe("Performance Comparison: simdjson vs cjson", function() + it(string.format("Simple Object Encoding (%s iterations)", iterations), + function() + local simple_data = { name = "test", value = 42, active = true } + + local simdjson_time = measure_time(function() + simdjson.encode(simple_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(simple_data) + end, iterations) + show_comparison("Simple object", simdjson_time, cjson_time) + end) + + it(string.format("Array Encoding (%s iterations)", iterations), function() + local array_data = {} + for i = 1, 100 do array_data[i] = i end + array_data = { numbers = array_data } + + local simdjson_time = measure_time(function() + simdjson.encode(array_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(array_data) + end, iterations) + show_comparison("100-element array", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Encoding (%s iterations)", iterations), + function() + local nested_data = { + level1 = { level2 = { level3 = { level4 = { value = "deep" } } } } + } + + local simdjson_time = measure_time(function() + simdjson.encode(nested_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(nested_data) + end, iterations) + show_comparison("5-level nesting", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Encoding (%s iterations)", iterations), + function() + local nested_data = { + level1 = { + level2 = { + level3 = { + level4 = { + level5 = { + level6 = { + level7 = { + level8 = { + level9 = { + level10 = { value = "deep" } + } + } + } + } + } + } + } + } + } + } + + local simdjson_time = measure_time(function() + simdjson.encode(nested_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(nested_data) + end, iterations) + show_comparison("10-level nesting", simdjson_time, cjson_time) + end) + + it(string.format("String-Heavy Data (%s iterations)", iterations), + function() + local string_data = { + str1 = "The quick brown fox jumps over the lazy dog", + str2 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + str3 = "Pack my box with five dozen liquor jugs" + } + + local simdjson_time = measure_time(function() + simdjson.encode(string_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(string_data) + end, iterations) + show_comparison("String-heavy object", simdjson_time, cjson_time) + end) + + it(string.format("Mixed Type Array (%s iterations)", iterations), function() + local mixed_array = { + data = { 1, "two", 3.0, true, false, { nested = "value" } } + } + + local simdjson_time = measure_time(function() + simdjson.encode(mixed_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(mixed_array) + end, iterations) + show_comparison("Mixed type array", simdjson_time, cjson_time) + end) + + it(string.format("Large Object (%s iterations)", iterations), function() + local large_object = {} + for i = 1, 100 do large_object["key" .. i] = "value" .. i end + large_object = { data = large_object } + + local simdjson_time = measure_time(function() + simdjson.encode(large_object) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_object) + end, iterations) + show_comparison("100-key object", simdjson_time, cjson_time) + end) + + it(string.format("Large Array (%s iterations)", iterations), function() + local large_array = {} + for i = 1, 1000 do large_array[i] = i end + large_array = { data = large_array } + + local simdjson_time = measure_time(function() + simdjson.encode(large_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_array) + end, iterations) + + show_comparison("1000-element array", simdjson_time, cjson_time) + end) + + it(string.format("Large Objects (%s iterations)", iterations), function() + local large_array = {} + for i = 1, 1000 do large_array["a" .. i] = i end + large_array = { data = large_array } + + local simdjson_time = measure_time(function() + simdjson.encode(large_array) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(large_array) + end, iterations) + + show_comparison("1000-K/V pair object", simdjson_time, cjson_time) + end) + + it(string.format("Complex Realistic Data (%s iterations)", iterations), + function() + local realistic_data = { + users = { + { + id = 1, + name = "Alice Smith", + email = "alice@example.com", + active = true, + score = 95.5 + }, { + id = 2, + name = "Bob Jones", + email = "bob@example.com", + active = false, + score = 87.3 + }, { + id = 3, + name = "Carol White", + email = "carol@example.com", + active = true, + score = 92.1 + } + }, + metadata = { version = "1.0", timestamp = 1704197400, count = 3 }, + settings = { theme = "dark", language = "en", notifications = true } + } + + local simdjson_time = measure_time(function() + simdjson.encode(realistic_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(realistic_data) + end, iterations) + show_comparison("Realistic complex data", simdjson_time, cjson_time) + end) + + it(string.format("Simple JSON Parsing (%s iterations)", iterations), + function() + local simple_json = '{"name":"test","value":42,"active":true}' + + local simdjson_time = measure_time(function() + simdjson.parse(simple_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(simple_json) + end, 10000) + + show_comparison("Simple parsing", simdjson_time, cjson_time) + end) + + it(string.format("Array Parsing (%s iterations)", iterations), function() + local array_json = + '{"numbers":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]}' + + local simdjson_time = measure_time(function() + simdjson.parse(array_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(array_json) + end, iterations) + + show_comparison("Array parsing", simdjson_time, cjson_time) + end) + + it(string.format("Nested Object Parsing (%s iterations)", iterations), + function() + local nested_json = + '{"level1":{"level2":{"level3":{"level4":{"value":"deep"}}}}}' + + local simdjson_time = measure_time(function() + simdjson.parse(nested_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(nested_json) + end, iterations) + show_comparison("Nested parsing", simdjson_time, cjson_time) + end) + + it(string.format("Large JSON Parsing (%s iterations)", iterations), + function() + local large_json_data = {} + for i = 1, 100 do large_json_data["key" .. i] = "value" .. i end + local large_json = cjson.encode({ data = large_json_data }) + + local simdjson_time = measure_time(function() + simdjson.parse(large_json) + end, iterations) + + local cjson_time = measure_time(function() + cjson.decode(large_json) + end, iterations) + show_comparison("Large object parsing", simdjson_time, cjson_time) + end) + + it(string.format("Round-trip: Encode + Parse (%s iterations)", iterations), + function() + local roundtrip_data = { + id = 123, + name = "Test User", + values = { 1, 2, 3, 4, 5 }, + metadata = { active = true, score = 95.5 } + } + + local simdjson_time = measure_time(function() + local encoded = simdjson.encode(roundtrip_data) + simdjson.parse(encoded) + end, iterations) + + local cjson_time = measure_time(function() + local encoded = cjson.encode(roundtrip_data) + cjson.decode(encoded) + end, iterations) + + show_comparison("Round-trip", simdjson_time, cjson_time) + end) + + it(string.format("Special Characters (%s iterations)", iterations), + function() + local special_chars_data = { + escaped = 'test"with"quotes\nand\nnewlines\ttabs', + unicode = "Hello 世界 🌍" + } + + local simdjson_time = measure_time(function() + simdjson.encode(special_chars_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(special_chars_data) + end, iterations) + show_comparison("Special characters", simdjson_time, cjson_time) + end) + + it(string.format("Boolean Arrays (%s iterations)", iterations), function() + local bool_data = { + flags = { true, false, true, false, true, false, true, false } + } + + local simdjson_time = measure_time(function() + simdjson.encode(bool_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(bool_data) + end, iterations) + + show_comparison("Boolean arrays", simdjson_time, cjson_time) + end) + + it(string.format("Large Boolean Array (%s iterations)", iterations), function() + local bool_data = {} + local choices = { true, false } + for i = 1, 1000 do bool_data[i] = choices[math.random(2)] end + + + local simdjson_time = measure_time(function() + simdjson.encode(bool_data) + end, iterations) + + local cjson_time = measure_time(function() + cjson.encode(bool_data) + end, iterations) + + show_comparison("Large boolean arrays", simdjson_time, cjson_time) + end) + + -- Print summary after all tests + after_each(function() end) -- No-op to ensure we're in test context + + teardown(function() + print("\n" .. string.rep("=", 80)) + print("Using SIMD implementation: " .. simdjson.activeImplementation()) + print(string.format("Performance Summary: %d total tests", total_tests)) + print(string.rep("=", 80)) + print(string.format(" simdjson wins: %d (%.1f%%)", simdjson_wins, + (simdjson_wins / total_tests) * 100)) + print(string.format(" cjson wins: %d (%.1f%%)", cjson_wins, + (cjson_wins / total_tests) * 100)) + print(string.rep("=", 80)) + end) +end) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 4004b88..ceda063 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -1,9 +1,10 @@ -#include +#include #include +#include #ifdef _WIN32 -#include #include +#include #else #include #endif @@ -11,12 +12,21 @@ #define NDEBUG #define __OPTIMIZE__ 1 -#include "simdjson.h" #include "luasimdjson.h" +#include "simdjson.h" #define LUA_SIMDJSON_NAME "simdjson" #define LUA_SIMDJSON_VERSION "0.0.8" +// keys encode max depth configuration. +#define LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY "simdjson.max_encode_depth" +#define DEFAULT_MAX_ENCODE_DEPTH simdjson::DEFAULT_MAX_DEPTH + +// Encode buffer size reservation configuration +#define LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY "simdjson.encode_buffer_size" +#define DEFAULT_ENCODE_BUFFER_SIZE (16 * 1024) // 16KB +#define DEFAULT_MAX_ENCODE_BUFFER_SIZE simdjson::SIMDJSON_MAXSIZE_BYTES + using namespace simdjson; #if !defined(luaL_newlibtable) && (!defined LUA_VERSION_NUM || LUA_VERSION_NUM <= 501) @@ -25,342 +35,682 @@ using namespace simdjson; ** Stolen from: http://lua-users.org/wiki/CompatibilityWithLuaFive ** Adapted from Lua 5.2.0 */ -static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) -{ - luaL_checkstack(L, nup + 1, "too many upvalues"); - for (; l->name != NULL; l++) - { /* fill the table with given functions */ - int i; - lua_pushstring(L, l->name); - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -(nup + 1)); - lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ - lua_settable(L, -(nup + 3)); - } - lua_pop(L, nup); /* remove upvalues */ +static void luaL_setfuncs(lua_State* L, const luaL_Reg* l, int nup) { + luaL_checkstack(L, nup + 1, "too many upvalues"); + for (; l->name != NULL; l++) { /* fill the table with given functions */ + int i; + lua_pushstring(L, l->name); + for (i = 0; i < nup; i++) /* copy upvalues to the top */ + lua_pushvalue(L, -(nup + 1)); + lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ + lua_settable(L, -(nup + 3)); + } + lua_pop(L, nup); /* remove upvalues */ } #endif ondemand::parser ondemand_parser; simdjson::padded_string jsonbuffer; - -template -void convert_ondemand_element_to_table(lua_State *L, T &element) -{ - static_assert(std::is_base_of::value || std::is_base_of::value, "type parameter must be document or value"); - - switch (element.type()) - { - - case ondemand::json_type::array: - { - int count = 1; - lua_newtable(L); - - for (ondemand::value child : element.get_array()) - { - lua_pushinteger(L, count); - convert_ondemand_element_to_table(L, child); - lua_settable(L, -3); - count = count + 1; - } - break; - } - - case ondemand::json_type::object: - lua_newtable(L); - for (ondemand::field field : element.get_object()) - { - std::string_view s = field.unescaped_key(); - lua_pushlstring(L, s.data(), s.size()); - convert_ondemand_element_to_table(L, field.value()); - lua_settable(L, -3); - } - break; - - case ondemand::json_type::number: - { - ondemand::number number = element.get_number(); - ondemand::number_type number_type = number.get_number_type(); - switch (number_type) - { - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: - lua_pushnumber(L, element.get_double()); - break; - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: - lua_pushinteger(L, element.get_int64()); - break; - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: - { -// a uint64 can be greater than an int64, so we must check how large and pass as a number -// if larger but LUA_MAXINTEGER (which is only defined in 5.3+) +thread_local simdjson::builder::string_builder* encode_buffer = + nullptr; // Reused across encode() calls +thread_local size_t encode_buffer_size = 0; // Track current buffer size + +template void convert_ondemand_element_to_table(lua_State* L, T& element) { + static_assert(std::is_base_of::value || + std::is_base_of::value, + "type parameter must be document or value"); + + switch (element.type()) { + case ondemand::json_type::array: { + int count = 1; + lua_newtable(L); + + for (ondemand::value child : element.get_array()) { + lua_pushinteger(L, count); + convert_ondemand_element_to_table(L, child); + lua_settable(L, -3); + count = count + 1; + } + break; + } + + case ondemand::json_type::object: + lua_newtable(L); + for (ondemand::field field : element.get_object()) { + std::string_view s = field.unescaped_key(); + lua_pushlstring(L, s.data(), s.size()); + convert_ondemand_element_to_table(L, field.value()); + lua_settable(L, -3); + } + break; + + case ondemand::json_type::number: { + ondemand::number number = element.get_number(); + ondemand::number_type number_type = number.get_number_type(); + switch (number_type) { + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: + lua_pushnumber(L, element.get_double()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: + lua_pushinteger(L, element.get_int64()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: { +// a uint64 can be greater than an int64, so we must check how large and pass as +// a number if larger but LUA_MAXINTEGER (which is only defined in 5.3+) #if defined(LUA_MAXINTEGER) - uint64_t actual_value = element.get_uint64(); - if (actual_value > LUA_MAXINTEGER) - { - lua_pushnumber(L, actual_value); - } - else - { - lua_pushinteger(L, actual_value); - } + uint64_t actual_value = element.get_uint64(); + if (actual_value > LUA_MAXINTEGER) { + lua_pushnumber(L, actual_value); + } else { + lua_pushinteger(L, actual_value); + } #else - lua_pushnumber(L, element.get_double()); + lua_pushnumber(L, element.get_double()); #endif - break; - } - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: - lua_pushnumber(L, element.get_double()); - break; - } - break; - } - - case ondemand::json_type::string: - { - std::string_view s = element.get_string(); - lua_pushlstring(L, s.data(), s.size()); - break; - } - - case ondemand::json_type::boolean: - lua_pushboolean(L, element.get_bool()); - break; - - case ondemand::json_type::null: - // calling is_null().value() will trigger an exception if the value is invalid - if (element.is_null().value()) - { - lua_pushlightuserdata(L, NULL); - } - break; - - case ondemand::json_type::unknown: - default: - luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported type encountered"); - break; - } + break; + } + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: + lua_pushnumber(L, element.get_double()); + break; + } + break; + } + + case ondemand::json_type::string: { + std::string_view s = element.get_string(); + lua_pushlstring(L, s.data(), s.size()); + break; + } + + case ondemand::json_type::boolean: + lua_pushboolean(L, element.get_bool()); + break; + + case ondemand::json_type::null: + // calling is_null().value() will trigger an exception if the value + // is invalid + if (element.is_null().value()) { + lua_pushlightuserdata(L, NULL); + } + break; + + case ondemand::json_type::unknown: + default: + luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported " + "type " + "encountered"); + break; + } } -// from https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding +// from +// https://github.com/simdjson/simdjson/blob/master/doc/performance.md#free-padding // Returns the default size of the page in bytes on this system. -long page_size() -{ +long page_size() { #ifdef _WIN32 - SYSTEM_INFO sysInfo; - GetSystemInfo(&sysInfo); - long pagesize = sysInfo.dwPageSize; + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + long pagesize = sysInfo.dwPageSize; #else - long pagesize = sysconf(_SC_PAGESIZE); + long pagesize = sysconf(_SC_PAGESIZE); #endif - return pagesize; + return pagesize; } // allows us to reuse a json buffer pretty safely // Returns true if the buffer + len + simdjson::SIMDJSON_PADDING crosses the // page boundary. -bool need_allocation(const char *buf, size_t len) -{ - return ((reinterpret_cast(buf + len - 1) % page_size()) < - simdjson::SIMDJSON_PADDING); +bool need_allocation(const char* buf, size_t len) { + return ((reinterpret_cast(buf + len - 1) % page_size()) < + simdjson::SIMDJSON_PADDING); +} + +simdjson::padded_string_view get_padded_string_view(const char* buf, size_t len, + simdjson::padded_string& jsonbuffer) { + if (need_allocation(buf, len)) { // unlikely case + jsonbuffer = simdjson::padded_string(buf, len); + return jsonbuffer; + } else { // no reallcation needed (very likely) + return simdjson::padded_string_view(buf, len, len + simdjson::SIMDJSON_PADDING); + } +} + +static int parse(lua_State* L) { + size_t json_str_len; + const char* json_str = luaL_checklstring(L, 1, &json_str_len); + + ondemand::document doc; + + try { + // makes a padded_string_view for a bit of quickness! + doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer)); + convert_ondemand_element_to_table(L, doc); + } catch (simdjson::simdjson_error& error) { + luaL_error(L, error.what()); + } + + return 1; +} + +static int parse_file(lua_State* L) { + const char* json_file = luaL_checkstring(L, 1); + + padded_string json_string; + ondemand::document doc; + + try { + json_string = padded_string::load(json_file); + doc = ondemand_parser.iterate(json_string); + convert_ondemand_element_to_table(L, doc); + } catch (simdjson::simdjson_error& error) { + luaL_error(L, error.what()); + } + + return 1; +} + +static int active_implementation(lua_State* L) { + const auto& implementation = simdjson::get_active_implementation(); + std::string name = implementation->name(); + const std::string description = implementation->description(); + const std::string implementation_name = name + " (" + description + ")"; + + lua_pushlstring(L, implementation_name.data(), implementation_name.size()); + + return 1; +} + +// Add forward declaration near the top after includes +static void serialize_data(lua_State* L, int current_depth, int max_depth, + simdjson::builder::string_builder& builder); + +// Helper function to get max encode depth from registry +static int get_max_depth(lua_State* L) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + int max_depth = DEFAULT_MAX_ENCODE_DEPTH; + if (lua_isnumber(L, -1)) { + max_depth = lua_tointeger(L, -1); + } + lua_pop(L, 1); + + return max_depth; +} + +// Helper function to set max encode depth in registry +static void set_max_depth(lua_State* L, int max_depth) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_pushinteger(L, max_depth); + lua_settable(L, LUA_REGISTRYINDEX); +} + +// Helper function to get encode buffer size from registry +static size_t get_encode_buffer_size(lua_State* L) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; + if (lua_isnumber(L, -1)) { + buffer_size = lua_tointeger(L, -1); + } + lua_pop(L, 1); + + return buffer_size; } -simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len, - simdjson::padded_string &jsonbuffer) -{ - if (need_allocation(buf, len)) - { // unlikely case - jsonbuffer = simdjson::padded_string(buf, len); - return jsonbuffer; - } - else - { // no reallcation needed (very likely) - return simdjson::padded_string_view(buf, len, - len + simdjson::SIMDJSON_PADDING); - } +// Helper function to set encode buffer size in registry +static void set_encode_buffer_size(lua_State* L, size_t buffer_size) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_pushinteger(L, buffer_size); + lua_settable(L, LUA_REGISTRYINDEX); } -static int parse(lua_State *L) -{ - size_t json_str_len; - const char *json_str = luaL_checklstring(L, 1, &json_str_len); - - ondemand::document doc; - - try - { - // makes a padded_string_view for a bit of quickness! - doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer)); - convert_ondemand_element_to_table(L, doc); - } - catch (simdjson::simdjson_error &error) - { - luaL_error(L, error.what()); - } - - return 1; +// Check if table on stack top is a valid array and return its length +// Returns -1 if not an array, otherwise returns maximum index +static int get_table_array_size(lua_State* L) { + double key_num; + int max_index = 0; + int element_count = 0; + + lua_pushnil(L); + while (lua_next(L, -2) != 0) { + // Check if key is a number + if (lua_type(L, -2) == LUA_TNUMBER) { + key_num = lua_tonumber(L, -2); + // Check if it's a positive integer + if (floor(key_num) == key_num && key_num >= 1) { + if (static_cast(key_num) > max_index) { + max_index = static_cast(key_num); + } + element_count++; + lua_pop(L, 1); + continue; + } + } + + // Non-integer key found - not an array + lua_pop(L, 2); + return -1; + } + + // Check if array is contiguous (element count should equal max index) + if (element_count > 0 && element_count != max_index) { + return -1; + } + + return max_index; } -static int parse_file(lua_State *L) -{ - const char *json_file = luaL_checkstring(L, 1); - - padded_string json_string; - ondemand::document doc; - - try - { - json_string = padded_string::load(json_file); - doc = ondemand_parser.iterate(json_string); - convert_ondemand_element_to_table(L, doc); - } - catch (simdjson::simdjson_error &error) - { - luaL_error(L, error.what()); - } - - return 1; +// Helper function to format a number as a string +// Returns pointer to thread-local buffer and length +inline std::pair format_number_as_string(lua_State* L, int index) { + thread_local char buffer[32]; + size_t len; + + // JSON numbers are represented as doubles, which have limited precision + // for integers beyond 2^53. Check this first regardless of Lua version. +#if defined(LUA_MAXINTEGER) + const double max_safe_int = LUA_MAXINTEGER; +#else + const double max_safe_int = 9007199254740992.0; // 2^53 +#endif + +#if LUA_VERSION_NUM >= 503 + // Lua 5.3+ has native integer type + if (lua_isinteger(L, index)) { + lua_Integer num = lua_tointeger(L, index); + // Check if the integer fits safely in a JSON number (double) + if (num > -max_safe_int && num < max_safe_int) { + len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); + return {buffer, len}; + } + // Too large for safe integer representation, format as float + len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); + return {buffer, len}; + } +#else + // For Lua 5.1/5.2, check if the number is an integer value + { + double num = lua_tonumber(L, index); + if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) { + if (num > -max_safe_int && num < max_safe_int) { + len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); + return {buffer, len}; + } + } + } +#endif + + // For floats or large numbers, convert to string with %.14g + lua_Number num = lua_tonumber(L, index); + len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); + return {buffer, len}; } -static int active_implementation(lua_State *L) -{ - const auto &implementation = simdjson::get_active_implementation(); - std::string name = implementation->name(); - const std::string description = implementation->description(); - const std::string implementation_name = name + " (" + description + ")"; +inline void serialize_append_bool(lua_State* L, + SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, + int lindex) { + // check if it is really a boolean + if (lua_isboolean(L, lindex)) { + if (lua_toboolean(L, lindex)) { +// Use append_raw with string_view for batched append (more efficient than multiple char appends) +#if __cplusplus >= 202002L + builder.append(true); +#else + builder.append_raw(std::string_view("true", 4)); +#endif + } else { +#if __cplusplus >= 202002L + builder.append(false); +#else + builder.append_raw(std::string_view("false", 5)); +#endif + } + } else { + builder.append_null(); + } +}; - lua_pushlstring(L, implementation_name.data(), implementation_name.size()); +static void serialize_append_number( + lua_State* L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, int lindex) { + auto num_result = format_number_as_string(L, lindex); + const char* num_str = num_result.first; + size_t len = num_result.second; + // Use append_raw with string_view for numbers (no quotes) + builder.append_raw(std::string_view(num_str, len)); +}; + +static void serialize_append_string( + lua_State* L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, int lindex) { + size_t len; + const char* str = lua_tolstring(L, lindex, &len); + builder.escape_and_append_with_quotes(str); +}; - return 1; +static void +serialize_append_array(lua_State* L, + SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, + int table_index, int array_size, int current_depth, int max_depth) { + bool first = true; + // Get the actual stack index if using relative indexing + if (table_index < 0 && table_index > LUA_REGISTRYINDEX) { + table_index = lua_gettop(L) + table_index + 1; + } + + builder.start_array(); + + for (int i = 1; i <= array_size; i++) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Push the value at index i onto the stack + lua_rawgeti(L, table_index, i); + + // Serialize the value + serialize_data(L, current_depth, max_depth, builder); + // Pop the value from the stack + lua_pop(L, 1); + } + + builder.end_array(); +} + +static void +serialize_append_object(lua_State* L, + SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, + int current_depth, int max_depth) { + builder.start_object(); + bool first = true; + + // Start iteration with nil key + lua_pushnil(L); + + while (lua_next(L, -2) != 0) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Cache key type to avoid multiple lua_type calls + int key_type = lua_type(L, -2); + + // Serialize the key + if (key_type == LUA_TSTRING) { + size_t key_len; + const char* key = lua_tolstring(L, -2, &key_len); + // Always use the proper escape function for string keys + builder.escape_and_append_with_quotes(std::string_view(key, key_len)); + } else if (key_type == LUA_TNUMBER) { + auto key_result = format_number_as_string(L, -2); + const char* key_str = key_result.first; + size_t key_len = key_result.second; + // Numeric keys are formatted as strings with quotes + builder.append('"'); + for (size_t i = 0; i < key_len; i++) { + builder.append(key_str[i]); + } + builder.append('"'); + } else { + const char* type_name = lua_typename(L, key_type); + luaL_error(L, "unsupported key type in table for serialization: %s", type_name); + } + + builder.append_colon(); + + // Serialize the value (it's already on top of stack) + serialize_data(L, current_depth, max_depth, builder); + // Pop value, keep key for next iteration + lua_pop(L, 1); + } + + builder.end_object(); +} + +static void serialize_data(lua_State* L, int current_depth, int max_depth, + SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder) { + // Check depth to prevent stack overflow + if (current_depth > max_depth) { + luaL_error(L, "maximum nesting depth exceeded (limit: %d)", max_depth); + } + + switch (lua_type(L, -1)) { + case LUA_TSTRING: { + serialize_append_string(L, builder, -1); + } break; + case LUA_TNUMBER: { + serialize_append_number(L, builder, -1); + } break; + case LUA_TBOOLEAN: { + serialize_append_bool(L, builder, -1); + } break; + case LUA_TTABLE: { + current_depth++; + int array_size = get_table_array_size(L); + if (array_size > 0) { + // Handle as array + serialize_append_array(L, builder, -1, array_size, current_depth, max_depth); + } else { + // Handle as object + serialize_append_object(L, builder, current_depth, max_depth); + } + } break; + case LUA_TNIL: { + // Treat Lua nil as JSON null + builder.append_null(); + } break; + case LUA_TLIGHTUSERDATA: { + // Treat lightuserdata NULL as JSON null + if (lua_touserdata(L, -1) == NULL) { + builder.append_null(); + } else { + luaL_error(L, "unsupported lightuserdata value for serialization"); + } + } break; + default: { + const char* type_name = lua_typename(L, lua_type(L, -1)); + luaL_error(L, "unsupported Lua data type for serialization: %s", type_name); + } + } +}; + +// encode Lua data types into JSON string +static int encode(lua_State* L) { + // the output string once the building is done. + std::string_view json; + + int num_args = lua_gettop(L); + luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments"); + + // Get max_depth: use second argument if provided, otherwise use global setting + int max_depth; + if (num_args == 2) { + max_depth = luaL_checkinteger(L, 2); + if (max_depth < 1) { + return luaL_error(L, "maximum depth must be at least 1"); + } + lua_pop(L, 1); // Remove max_depth argument, leaving table on top + } else { + max_depth = get_max_depth(L); + } + + // Get desired buffer size and recreate buffer if size changed + size_t desired_buffer_size = get_encode_buffer_size(L); + if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) { + if (encode_buffer != nullptr) { + delete encode_buffer; + } + encode_buffer = + new SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder(desired_buffer_size); + encode_buffer_size = desired_buffer_size; + } + + // Reuse buffer - clear it but retain capacity, this should mean successive calls + // are efficient in most cases. + encode_buffer->clear(); + + serialize_data(L, 0, max_depth, *encode_buffer); + auto v_err = encode_buffer->view().get(json); + if (v_err) { + return luaL_error(L, "failed to get JSON view from buffer: %s", + simdjson::error_message(v_err)); + } + + // validate utf-8 + if (!encode_buffer->validate_unicode()) { + return luaL_error(L, "encoded JSON contains invalid UTF-8 sequences"); + } + + lua_pushlstring(L, json.data(), json.size()); + return 1; +}; + +// Set maximum nesting depth for encoding +static int setMaxEncodeDepth(lua_State* L) { + int max_depth = luaL_checkinteger(L, 1); + if (max_depth < 1) { + return luaL_error(L, "Maximum encode depth must be at least 1"); + } + set_max_depth(L, max_depth); + return 0; +} + +// Get current maximum nesting depth for encoding +static int getMaxEncodeDepth(lua_State* L) { + lua_pushinteger(L, get_max_depth(L)); + return 1; +} + +// Set encode buffer initial capacity in bytes +static int setEncodeBufferSize(lua_State* L) { + int buffer_size = luaL_checkinteger(L, 1); + if (buffer_size < 1) { + return luaL_error(L, "Encode buffer size must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + return luaL_error(L, "Encode buffer size must not exceed %zu", + (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + set_encode_buffer_size(L, buffer_size); + return 0; +} + +// Get encode buffer initial capacity in bytes +static int getEncodeBufferSize(lua_State* L) { + lua_pushinteger(L, get_encode_buffer_size(L)); + return 1; } // ParsedObject as C++ class #define LUA_MYOBJECT "ParsedObject" -class ParsedObject -{ -private: - simdjson::padded_string json_string; - ondemand::document doc; - std::unique_ptr parser; - -public: - ParsedObject(const char *json_file) - : json_string(padded_string::load(json_file)), - parser(new ondemand::parser{}) - { - this->doc = this->parser.get()->iterate(json_string); - } - ParsedObject(const char *json_str, size_t json_str_len) - : json_string(json_str, json_str_len), - parser(new ondemand::parser{}) - { - this->doc = this->parser.get()->iterate(json_string); - } - ~ParsedObject() {} - ondemand::document *get_doc() { return &(this->doc); } +class ParsedObject { + private: + simdjson::padded_string json_string; + ondemand::document doc; + std::unique_ptr parser; + + public: + ParsedObject(const char* json_file) + : json_string(padded_string::load(json_file)), parser(new ondemand::parser{}) { + this->doc = this->parser.get()->iterate(json_string); + } + ParsedObject(const char* json_str, size_t json_str_len) + : json_string(json_str, json_str_len), parser(new ondemand::parser{}) { + this->doc = this->parser.get()->iterate(json_string); + } + ~ParsedObject() { + } + ondemand::document* get_doc() { + return &(this->doc); + } }; -static int ParsedObject_delete(lua_State *L) -{ - delete *reinterpret_cast(lua_touserdata(L, 1)); - return 0; +static int ParsedObject_delete(lua_State* L) { + delete *reinterpret_cast(lua_touserdata(L, 1)); + return 0; } -static int ParsedObject_open(lua_State *L) -{ - size_t json_str_len; - const char *json_str = luaL_checklstring(L, 1, &json_str_len); - - try - { - ParsedObject **parsedObject = - (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); - *parsedObject = new ParsedObject(json_str, json_str_len); - luaL_getmetatable(L, LUA_MYOBJECT); - lua_setmetatable(L, -2); - } - catch (simdjson::simdjson_error &error) - { - luaL_error(L, error.what()); - } - return 1; +static int ParsedObject_open(lua_State* L) { + size_t json_str_len; + const char* json_str = luaL_checklstring(L, 1, &json_str_len); + + try { + ParsedObject** parsedObject = (ParsedObject**)(lua_newuserdata(L, sizeof(ParsedObject*))); + *parsedObject = new ParsedObject(json_str, json_str_len); + luaL_getmetatable(L, LUA_MYOBJECT); + lua_setmetatable(L, -2); + } catch (simdjson::simdjson_error& error) { + luaL_error(L, error.what()); + } + return 1; } -static int ParsedObject_open_file(lua_State *L) -{ - const char *json_file = luaL_checkstring(L, 1); - - try - { - ParsedObject **parsedObject = - (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); - *parsedObject = new ParsedObject(json_file); - luaL_getmetatable(L, LUA_MYOBJECT); - lua_setmetatable(L, -2); - } - catch (simdjson::simdjson_error &error) - { - luaL_error(L, error.what()); - } - - return 1; +static int ParsedObject_open_file(lua_State* L) { + const char* json_file = luaL_checkstring(L, 1); + + try { + ParsedObject** parsedObject = (ParsedObject**)(lua_newuserdata(L, sizeof(ParsedObject*))); + *parsedObject = new ParsedObject(json_file); + luaL_getmetatable(L, LUA_MYOBJECT); + lua_setmetatable(L, -2); + } catch (simdjson::simdjson_error& error) { + luaL_error(L, error.what()); + } + + return 1; } -static int ParsedObject_atPointer(lua_State *L) -{ - ondemand::document *document = - (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT))) - ->get_doc(); - const char *pointer = luaL_checkstring(L, 2); - - try - { - ondemand::value returned_element = document->at_pointer(pointer); - convert_ondemand_element_to_table(L, returned_element); - } - catch (simdjson::simdjson_error &error) - { - luaL_error(L, error.what()); - } - - return 1; +static int ParsedObject_atPointer(lua_State* L) { + ondemand::document* document = + (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))->get_doc(); + const char* pointer = luaL_checkstring(L, 2); + + try { + ondemand::value returned_element = document->at_pointer(pointer); + convert_ondemand_element_to_table(L, returned_element); + } catch (simdjson::simdjson_error& error) { + luaL_error(L, error.what()); + } + + return 1; } -static int ParsedObject_newindex(lua_State *L) -{ - luaL_error(L, "This should be treated as a read-only table. We may one day add array access for the elements, and it'll likely not be modifiable."); - return 1; +static int ParsedObject_newindex(lua_State* L) { + luaL_error(L, "This should be treated as a read-only table. We may one day " + "add array " + "access for the elements, and it'll likely not be modifiable."); + return 1; } -static const struct luaL_Reg arraylib_m[] = { - {"at", ParsedObject_atPointer}, - {"atPointer", ParsedObject_atPointer}, - {"__newindex", ParsedObject_newindex}, - {"__gc", ParsedObject_delete}, - {NULL, NULL}}; +static const struct luaL_Reg arraylib_m[] = {{"at", ParsedObject_atPointer}, + {"atPointer", ParsedObject_atPointer}, + {"__newindex", ParsedObject_newindex}, + {"__gc", ParsedObject_delete}, + {NULL, NULL}}; -int luaopen_simdjson(lua_State *L) -{ - luaL_newmetatable(L, LUA_MYOBJECT); - lua_pushvalue(L, -1); /* duplicates the metatable */ - lua_setfield(L, -2, "__index"); - luaL_setfuncs(L, arraylib_m, 0); +int luaopen_simdjson(lua_State* L) { + luaL_newmetatable(L, LUA_MYOBJECT); + lua_pushvalue(L, -1); /* duplicates the metatable */ + lua_setfield(L, -2, "__index"); + luaL_setfuncs(L, arraylib_m, 0); - // luaL_newlib(L, luasimdjson); + // luaL_newlib(L, luasimdjson); - lua_newtable(L); - luaL_setfuncs(L, luasimdjson, 0); + lua_newtable(L); + luaL_setfuncs(L, luasimdjson, 0); - lua_pushlightuserdata(L, NULL); - lua_setfield(L, -2, "null"); + lua_pushlightuserdata(L, NULL); + lua_setfield(L, -2, "null"); - lua_pushliteral(L, LUA_SIMDJSON_NAME); - lua_setfield(L, -2, "_NAME"); - lua_pushliteral(L, LUA_SIMDJSON_VERSION); - lua_setfield(L, -2, "_VERSION"); + lua_pushliteral(L, LUA_SIMDJSON_NAME); + lua_setfield(L, -2, "_NAME"); + lua_pushliteral(L, LUA_SIMDJSON_VERSION); + lua_setfield(L, -2, "_VERSION"); - return 1; + return 1; } diff --git a/src/luasimdjson.h b/src/luasimdjson.h index 7f92718..1fee934 100644 --- a/src/luasimdjson.h +++ b/src/luasimdjson.h @@ -12,6 +12,11 @@ extern "C" { static int active_implementation(lua_State*); static int ParsedObject_open(lua_State*); static int ParsedObject_open_file(lua_State*); + static int encode(lua_State*); + static int setMaxEncodeDepth(lua_State*); + static int getMaxEncodeDepth(lua_State*); + static int setEncodeBufferSize(lua_State*); + static int getEncodeBufferSize(lua_State*); static const struct luaL_Reg luasimdjson[] = { {"parse", parse}, @@ -19,6 +24,11 @@ extern "C" { {"activeImplementation", active_implementation}, {"open", ParsedObject_open}, {"openFile", ParsedObject_open_file}, + {"encode", encode}, + {"setMaxEncodeDepth", setMaxEncodeDepth}, + {"getMaxEncodeDepth", getMaxEncodeDepth}, + {"setEncodeBufferSize", setEncodeBufferSize}, + {"getEncodeBufferSize", getEncodeBufferSize}, {NULL, NULL}, }; From b2e8e41b59cf7e50a98c4988c80cf49cfa164c37 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:04:11 +1100 Subject: [PATCH 02/21] fix header include ordering that broke due to clang-format misconfiguration --- .clang-format | 2 ++ src/luasimdjson.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index 91c33a0..df8a2dd 100644 --- a/.clang-format +++ b/.clang-format @@ -3,6 +3,8 @@ Language: Cpp Standard: c++11 +SortIncludes: false + IndentWidth: 4 TabWidth: 4 UseTab: Always diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index ceda063..80a6b5b 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -1,6 +1,6 @@ #include -#include #include +#include #ifdef _WIN32 #include From b46fc24b7cbd19f5237d7aaad88c4c02ae31a021 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:10:53 +1100 Subject: [PATCH 03/21] Update tests to use byte sequences so that they are compatible from Lua 5.1 to 5.4 and LuaJIT --- spec/encode_security_spec.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/encode_security_spec.lua b/spec/encode_security_spec.lua index ebdc810..4920168 100644 --- a/spec/encode_security_spec.lua +++ b/spec/encode_security_spec.lua @@ -289,9 +289,9 @@ describe("encode() security and edge cases", function() it("should handle zero-width and special Unicode", function() local data = { - zero_width = "test\u{200B}here", -- Zero-width space - rtl_mark = "test\u{200F}mark", -- Right-to-left mark - combining = "e\u{0301}", -- e with acute accent (combining) + zero_width = "test\226\128\139here", -- Zero-width space (U+200B) + rtl_mark = "test\226\128\143mark", -- Right-to-left mark (U+200F) + combining = "e\204\129", -- e with acute accent combining (U+0301) } local encoded = simdjson.encode(data) local decoded = simdjson.parse(encoded) From aec7d12531ad6a52d1fd7e070a3039fda145289c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:16:09 +1100 Subject: [PATCH 04/21] fix error on windows builds requring architecture flags to be added to the compiler syntax --- Makefile.win | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Makefile.win b/Makefile.win index 4eafad4..00a13bf 100644 --- a/Makefile.win +++ b/Makefile.win @@ -7,6 +7,25 @@ LDFLAGS = $(LIBFLAG) LDLIBS = $(LUA_LIBDIR)/$(LUALIB) !endif +# Windows-specific settings +ifeq ($(OS),Windows_NT) + # Detect architecture + ifeq ($(PROCESSOR_ARCHITECTURE),AMD64) + ARCH_FLAG = /D_AMD64_ + else ifeq ($(PROCESSOR_ARCHITECTURE),x86) + ARCH_FLAG = /D_X86_ + else ifeq ($(PROCESSOR_ARCHITECTURE),ARM64) + ARCH_FLAG = /D_ARM64_ + else + # Default to AMD64 if detection fails + ARCH_FLAG = /D_AMD64_ + endif + + # Update CXXFLAGS to include architecture + CXXFLAGS = -I$(LUA_INCDIR) -EHsc -std:c++17 /nologo /MD /O2 $(ARCH_FLAG) +endif + + TARGET = simdjson.dll all: $(TARGET) From e719803e574aa36711e8e568f2a93225c3d6f845 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:21:19 +1100 Subject: [PATCH 05/21] nmake not gnu make syntax --- .github/workflows/ci.yml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28873b7..a641c78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,37 +10,43 @@ jobs: os: [linux, macos, macos-arm64] lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1] include: - - os: linux - runner: ubuntu-latest - - os: macos - runner: macos-15-intel - - os: macos-arm64 - runner: macos-latest + - os: linux + runner: ubuntu-latest + - os: macos + runner: macos-15-intel + - os: macos-arm64 + runner: macos-latest exclude: - - os: macos-arm64 - lua: luajit=2.0 + - os: macos-arm64 + lua: luajit=2.0 name: ${{ matrix.os }} (${{ matrix.lua }}) runs-on: ${{ matrix.runner }} steps: # Checks-out the repository under $GITHUB_WORKSPACE. - uses: actions/checkout@v6 - - name: Install libreadline + - name: Install libreadline if: runner.os == 'Linux' run: | sudo apt-get install -y libreadline-dev - name: Install Lua (${{ matrix.lua }}) run: | - pip install git+https://github.com/luarocks/hererocks + pipx install git+https://github.com/luarocks/hererocks + pipx ensurepath + export PATH=$PATH:/root/.local/bin:$HOME/.local/bin hererocks lua_install -r^ --${{ matrix.lua }} env: MACOSX_DEPLOYMENT_TARGET: 11.0 - name: Build lua-simdjson + shell: bash run: | + set -e source lua_install/bin/activate luarocks make - name: Run tests + shell: bash run: | + set -e source lua_install/bin/activate luarocks install lua-cjson2 luarocks install busted @@ -51,7 +57,7 @@ jobs: fail-fast: false matrix: lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1] - target: [mingw,vs] + target: [mingw, vs] runs-on: windows-2022 steps: # Checks-out the repository under $GITHUB_WORKSPACE. From 4426e276113ea5a3efa878e38f0279af73883435 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:22:30 +1100 Subject: [PATCH 06/21] correctly fix the syntax --- Makefile.win | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/Makefile.win b/Makefile.win index 00a13bf..6b80955 100644 --- a/Makefile.win +++ b/Makefile.win @@ -7,23 +7,19 @@ LDFLAGS = $(LIBFLAG) LDLIBS = $(LUA_LIBDIR)/$(LUALIB) !endif -# Windows-specific settings -ifeq ($(OS),Windows_NT) - # Detect architecture - ifeq ($(PROCESSOR_ARCHITECTURE),AMD64) - ARCH_FLAG = /D_AMD64_ - else ifeq ($(PROCESSOR_ARCHITECTURE),x86) - ARCH_FLAG = /D_X86_ - else ifeq ($(PROCESSOR_ARCHITECTURE),ARM64) - ARCH_FLAG = /D_ARM64_ - else - # Default to AMD64 if detection fails - ARCH_FLAG = /D_AMD64_ - endif - - # Update CXXFLAGS to include architecture - CXXFLAGS = -I$(LUA_INCDIR) -EHsc -std:c++17 /nologo /MD /O2 $(ARCH_FLAG) -endif +# Detect architecture for Windows +!ifndef ARCH_FLAG +!if "$(PROCESSOR_ARCHITECTURE)" == "AMD64" || "$(PROCESSOR_ARCHITEW6432)" == "AMD64" +ARCH_FLAG = /D_AMD64_ +!else if "$(PROCESSOR_ARCHITECTURE)" == "x86" +ARCH_FLAG = /D_X86_ +!else if "$(PROCESSOR_ARCHITECTURE)" == "ARM64" +ARCH_FLAG = /D_ARM64_ +!else +# Default to AMD64 if detection fails +ARCH_FLAG = /D_AMD64_ +!endif +!endif TARGET = simdjson.dll From 79064eb992077cc79112f5cbe54bd6eb05fd1a86 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:26:33 +1100 Subject: [PATCH 07/21] add ARCH_FLAG to CXX line --- Makefile.win | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.win b/Makefile.win index 6b80955..d115812 100644 --- a/Makefile.win +++ b/Makefile.win @@ -30,7 +30,7 @@ src/luasimdjson.obj: src/luasimdjson.h src/simdjson.h src/simdjson.obj: src/simdjson.h .cpp.obj:: - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -Fo:"src\\" + $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(ARCH_FLAG) -c $< -Fo:"src\\" $(TARGET): $(OBJ) $(LD) $(LDFLAGS) $** -out:$@ $(LDLIBS) From 46e73029ab562cb292df4bee61b3722c0284a615 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:37:08 +1100 Subject: [PATCH 08/21] add encode documentation to README.md --- README.md | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 98e345a..9155efb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # lua-simdjson + [![Build Status](https://github.com/FourierTransformer/lua-simdjson/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/FourierTransformer/lua-simdjson/actions?query=branch%3Amaster) A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer. @@ -6,26 +7,32 @@ A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is Current simdjson version: 4.2.3 ## Installation + If all the requirements are met, lua-simdjson can be install via luarocks with: -``` +```bash luarocks install lua-simdjson ``` + Otherwise it can be installed manually by pulling the repo and running luarocks make. ## Requirements - * lua-simdjson only works on 64bit systems. - * a Lua build environment with support for C++11 - * g++ version 7+ and clang++ version 6+ or newer should work! + +* lua-simdjson only works on 64bit systems. +* a Lua build environment with support for C++11 + * g++ version 7+ and clang++ version 6+ or newer should work! ## Parsing + There are two main ways to parse JSON in lua-simdjson: + 1. With `parse`: this parses JSON and returns a Lua table with the parsed values 2. With `open`: this reads in the JSON and keeps it in simdjson's internal format. The values can then be accessed using a JSON pointer (examples below) Both of these methods also have support to read files on disc with `parseFile` and `openFile` respectively. If handling JSON from disk, these methods should be used and are incredibly fast. ## Typing + * lua-simdjson uses `simdjson.null` to represent `null` values from parsed JSON. * Any application should use that for comparison as needed. * it uses `lua_pushnumber` and `lua_pushinteger` for JSON floats and ints respectively, so your Lua version may handle that slightly differently. @@ -33,7 +40,9 @@ Both of these methods also have support to read files on disc with `parseFile` a * All other types map as expected. ### Parse some JSON + The `parse` methods will return a normal Lua table that can be interacted with. + ```lua local simdjson = require("simdjson") local response = simdjson.parse([[ @@ -61,7 +70,9 @@ print(fileResponse["statuses"][1]["id"]) ``` ### Open some json + The `open` methods currently require the use of a JSON pointer, but are very quick. They are best used when you only need a part of a response. In the example below, it could be useful for just getting the `Thumnail` object with `:atPointer("/Image/Thumbnail")` which will then only create a Lua table with those specific values. + ```lua local simdjson = require("simdjson") local response = simdjson.open([[ @@ -93,10 +104,75 @@ The `open` and `parse` codeblocks should print out the same values. It's worth n This lazy style of using the simdjson data structure could also be used with array access in the future. +## Encoding + +The `encode` method converts Lua tables into JSON strings. It supports nested tables, arrays, and all standard JSON types. + +```lua +local simdjson = require("simdjson") + +-- Encode a simple table +local data = { + name = "John Doe", + age = 30, + active = true, + score = 95.5 +} +local json = simdjson.encode(data) +print(json) -- {"name":"John Doe","age":30,"active":true,"score":95.5} + +-- Encode nested structures +local complex = { + user = { + id = 123, + tags = {"lua", "json", "fast"} + }, + metadata = { + created = "2024-01-01", + count = 42 + } +} +local json = simdjson.encode(complex) + +-- Use simdjson.null for JSON null values +local withNull = { + value = simdjson.null, + name = "test" +} +local json = simdjson.encode(withNull) -- {"value":null,"name":"test"} + +-- Optional: specify maximum nesting depth (default is 1024) +local deepData = { level1 = { level2 = { level3 = "value" } } } +local json = simdjson.encode(deepData, 10) -- max depth of 10 +``` + +You can also configure global encoding settings: + +```lua +-- Set maximum nesting depth globally (default: 1024) +simdjson.setMaxEncodeDepth(512) +local currentDepth = simdjson.getMaxEncodeDepth() + +-- Set encode buffer size in bytes (default: 16KB) +simdjson.setEncodeBufferSize(32 * 1024) -- 32KB +local currentSize = simdjson.getEncodeBufferSize() +``` + +**Encoding behavior:** + +* Tables with consecutive integer keys starting at 1 are encoded as JSON arrays +* All other tables are encoded as JSON objects +* Numbers are formatted as integers when possible, or floats with 14 digits of precision +* Integers larger than 2^53 are encoded in scientific notation for JSON compatibility +* Strings are automatically escaped according to JSON specifications +* `simdjson.null` represents JSON `null` + ## Error Handling + lua-simdjson will error out with any errors from simdjson encountered while parsing. They are very good at helping identify what has gone wrong during parsing. ## Benchmarks + I ran some benchmarks against lua-cjson, rapidjson, and dkjson. For each test, I loaded the JSON into memory, and then had the parsers go through each file 100 times and took the average time it took to parse to a Lua table. You can see all the results in the [benchmark](benchmark/) folder. I've included a sample output run via Lua (the LuaJIT graph looks very similar, also in the benchmark folder). The y-axis is logarithmic, so every half step down is twice as fast. ![Lua Performance Column Chart](benchmark/lua-perf.png) @@ -116,7 +192,7 @@ lua-simdjson, like the simdjson library performs better on more modern hardware. * since it's an external module, it's not quite as easy to just grab the file and go (dkjson has you covered here!) ## Philosophy -I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. The big _thing_ that's missing so far is encoding a lua table to JSON. I may add in an encoder at some point. +I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. ## Licenses * The jsonexamples, src/simdjson.cpp, src/simdjson.h are unmodified from the released version simdjson under the Apache License 2.0. From 233e04e06e874de3d5454d72445d69678b04615f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 18:46:27 +1100 Subject: [PATCH 09/21] update clang-format to move pointer alignemnt right instead of left --- .clang-format | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index df8a2dd..8f793bf 100644 --- a/.clang-format +++ b/.clang-format @@ -15,4 +15,5 @@ AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AllowShortFunctionsOnASingleLine: None IndentCaseLabels: true -PointerAlignment: Left +PointerAlignment: Right +SpaceBeforeParens: ControlStatements From 0c4e66b333ceecd07fb29c7f4b4e35c77c2a3855 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 2 Jan 2026 19:01:33 +1100 Subject: [PATCH 10/21] return formatting to what has been used upstream --- .clang-format | 8 +- src/luasimdjson.cpp | 1059 +++++++++++++++++++++---------------------- src/luasimdjson.h | 48 +- 3 files changed, 545 insertions(+), 570 deletions(-) diff --git a/.clang-format b/.clang-format index 8f793bf..958627b 100644 --- a/.clang-format +++ b/.clang-format @@ -5,10 +5,10 @@ Standard: c++11 SortIncludes: false -IndentWidth: 4 -TabWidth: 4 -UseTab: Always -ColumnLimit: 100 +IndentWidth: 2 +TabWidth: 2 +UseTab: Never +ColumnLimit: 160 BreakBeforeBraces: Attach IndentExternBlock: Indent AlignConsecutiveAssignments: false diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 80a6b5b..078dbe3 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -35,115 +35,112 @@ using namespace simdjson; ** Stolen from: http://lua-users.org/wiki/CompatibilityWithLuaFive ** Adapted from Lua 5.2.0 */ -static void luaL_setfuncs(lua_State* L, const luaL_Reg* l, int nup) { - luaL_checkstack(L, nup + 1, "too many upvalues"); - for (; l->name != NULL; l++) { /* fill the table with given functions */ - int i; - lua_pushstring(L, l->name); - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -(nup + 1)); - lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ - lua_settable(L, -(nup + 3)); - } - lua_pop(L, nup); /* remove upvalues */ +static void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) { + luaL_checkstack(L, nup + 1, "too many upvalues"); + for (; l->name != NULL; l++) { /* fill the table with given functions */ + int i; + lua_pushstring(L, l->name); + for (i = 0; i < nup; i++) /* copy upvalues to the top */ + lua_pushvalue(L, -(nup + 1)); + lua_pushcclosure(L, l->func, nup); /* closure with those upvalues */ + lua_settable(L, -(nup + 3)); + } + lua_pop(L, nup); /* remove upvalues */ } #endif ondemand::parser ondemand_parser; simdjson::padded_string jsonbuffer; -thread_local simdjson::builder::string_builder* encode_buffer = - nullptr; // Reused across encode() calls -thread_local size_t encode_buffer_size = 0; // Track current buffer size - -template void convert_ondemand_element_to_table(lua_State* L, T& element) { - static_assert(std::is_base_of::value || - std::is_base_of::value, - "type parameter must be document or value"); - - switch (element.type()) { - case ondemand::json_type::array: { - int count = 1; - lua_newtable(L); - - for (ondemand::value child : element.get_array()) { - lua_pushinteger(L, count); - convert_ondemand_element_to_table(L, child); - lua_settable(L, -3); - count = count + 1; - } - break; - } - - case ondemand::json_type::object: - lua_newtable(L); - for (ondemand::field field : element.get_object()) { - std::string_view s = field.unescaped_key(); - lua_pushlstring(L, s.data(), s.size()); - convert_ondemand_element_to_table(L, field.value()); - lua_settable(L, -3); - } - break; - - case ondemand::json_type::number: { - ondemand::number number = element.get_number(); - ondemand::number_type number_type = number.get_number_type(); - switch (number_type) { - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: - lua_pushnumber(L, element.get_double()); - break; - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: - lua_pushinteger(L, element.get_int64()); - break; - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: { +thread_local simdjson::builder::string_builder *encode_buffer = nullptr; // Reused across encode() calls +thread_local size_t encode_buffer_size = 0; // Track current buffer size + +template void convert_ondemand_element_to_table(lua_State *L, T &element) { + static_assert(std::is_base_of::value || std::is_base_of::value, "type parameter must be document or value"); + + switch (element.type()) { + case ondemand::json_type::array: { + int count = 1; + lua_newtable(L); + + for (ondemand::value child : element.get_array()) { + lua_pushinteger(L, count); + convert_ondemand_element_to_table(L, child); + lua_settable(L, -3); + count = count + 1; + } + break; + } + + case ondemand::json_type::object: + lua_newtable(L); + for (ondemand::field field : element.get_object()) { + std::string_view s = field.unescaped_key(); + lua_pushlstring(L, s.data(), s.size()); + convert_ondemand_element_to_table(L, field.value()); + lua_settable(L, -3); + } + break; + + case ondemand::json_type::number: { + ondemand::number number = element.get_number(); + ondemand::number_type number_type = number.get_number_type(); + switch (number_type) { + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::floating_point_number: + lua_pushnumber(L, element.get_double()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::signed_integer: + lua_pushinteger(L, element.get_int64()); + break; + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::unsigned_integer: { // a uint64 can be greater than an int64, so we must check how large and pass as // a number if larger but LUA_MAXINTEGER (which is only defined in 5.3+) #if defined(LUA_MAXINTEGER) - uint64_t actual_value = element.get_uint64(); - if (actual_value > LUA_MAXINTEGER) { - lua_pushnumber(L, actual_value); - } else { - lua_pushinteger(L, actual_value); - } + uint64_t actual_value = element.get_uint64(); + if (actual_value > LUA_MAXINTEGER) { + lua_pushnumber(L, actual_value); + } else { + lua_pushinteger(L, actual_value); + } #else - lua_pushnumber(L, element.get_double()); + lua_pushnumber(L, element.get_double()); #endif - break; - } - - case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: - lua_pushnumber(L, element.get_double()); - break; - } - break; - } - - case ondemand::json_type::string: { - std::string_view s = element.get_string(); - lua_pushlstring(L, s.data(), s.size()); - break; - } - - case ondemand::json_type::boolean: - lua_pushboolean(L, element.get_bool()); - break; - - case ondemand::json_type::null: - // calling is_null().value() will trigger an exception if the value - // is invalid - if (element.is_null().value()) { - lua_pushlightuserdata(L, NULL); - } - break; - - case ondemand::json_type::unknown: - default: - luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported " - "type " - "encountered"); - break; - } + break; + } + + case SIMDJSON_BUILTIN_IMPLEMENTATION::number_type::big_integer: + lua_pushnumber(L, element.get_double()); + break; + } + break; + } + + case ondemand::json_type::string: { + std::string_view s = element.get_string(); + lua_pushlstring(L, s.data(), s.size()); + break; + } + + case ondemand::json_type::boolean: + lua_pushboolean(L, element.get_bool()); + break; + + case ondemand::json_type::null: + // calling is_null().value() will trigger an exception if the value + // is invalid + if (element.is_null().value()) { + lua_pushlightuserdata(L, NULL); + } + break; + + case ondemand::json_type::unknown: + default: + luaL_error(L, "simdjson::ondemand::json_type::unknown or unsupported " + "type " + "encountered"); + break; + } } // from @@ -151,566 +148,544 @@ template void convert_ondemand_element_to_table(lua_State* L, T& el // Returns the default size of the page in bytes on this system. long page_size() { #ifdef _WIN32 - SYSTEM_INFO sysInfo; - GetSystemInfo(&sysInfo); - long pagesize = sysInfo.dwPageSize; + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + long pagesize = sysInfo.dwPageSize; #else - long pagesize = sysconf(_SC_PAGESIZE); + long pagesize = sysconf(_SC_PAGESIZE); #endif - return pagesize; + return pagesize; } // allows us to reuse a json buffer pretty safely // Returns true if the buffer + len + simdjson::SIMDJSON_PADDING crosses the // page boundary. -bool need_allocation(const char* buf, size_t len) { - return ((reinterpret_cast(buf + len - 1) % page_size()) < - simdjson::SIMDJSON_PADDING); +bool need_allocation(const char *buf, size_t len) { + return ((reinterpret_cast(buf + len - 1) % page_size()) < simdjson::SIMDJSON_PADDING); } -simdjson::padded_string_view get_padded_string_view(const char* buf, size_t len, - simdjson::padded_string& jsonbuffer) { - if (need_allocation(buf, len)) { // unlikely case - jsonbuffer = simdjson::padded_string(buf, len); - return jsonbuffer; - } else { // no reallcation needed (very likely) - return simdjson::padded_string_view(buf, len, len + simdjson::SIMDJSON_PADDING); - } +simdjson::padded_string_view get_padded_string_view(const char *buf, size_t len, simdjson::padded_string &jsonbuffer) { + if (need_allocation(buf, len)) { // unlikely case + jsonbuffer = simdjson::padded_string(buf, len); + return jsonbuffer; + } else { // no reallcation needed (very likely) + return simdjson::padded_string_view(buf, len, len + simdjson::SIMDJSON_PADDING); + } } -static int parse(lua_State* L) { - size_t json_str_len; - const char* json_str = luaL_checklstring(L, 1, &json_str_len); +static int parse(lua_State *L) { + size_t json_str_len; + const char *json_str = luaL_checklstring(L, 1, &json_str_len); - ondemand::document doc; + ondemand::document doc; - try { - // makes a padded_string_view for a bit of quickness! - doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer)); - convert_ondemand_element_to_table(L, doc); - } catch (simdjson::simdjson_error& error) { - luaL_error(L, error.what()); - } + try { + // makes a padded_string_view for a bit of quickness! + doc = ondemand_parser.iterate(get_padded_string_view(json_str, json_str_len, jsonbuffer)); + convert_ondemand_element_to_table(L, doc); + } catch (simdjson::simdjson_error &error) { + luaL_error(L, error.what()); + } - return 1; + return 1; } -static int parse_file(lua_State* L) { - const char* json_file = luaL_checkstring(L, 1); +static int parse_file(lua_State *L) { + const char *json_file = luaL_checkstring(L, 1); - padded_string json_string; - ondemand::document doc; + padded_string json_string; + ondemand::document doc; - try { - json_string = padded_string::load(json_file); - doc = ondemand_parser.iterate(json_string); - convert_ondemand_element_to_table(L, doc); - } catch (simdjson::simdjson_error& error) { - luaL_error(L, error.what()); - } + try { + json_string = padded_string::load(json_file); + doc = ondemand_parser.iterate(json_string); + convert_ondemand_element_to_table(L, doc); + } catch (simdjson::simdjson_error &error) { + luaL_error(L, error.what()); + } - return 1; + return 1; } -static int active_implementation(lua_State* L) { - const auto& implementation = simdjson::get_active_implementation(); - std::string name = implementation->name(); - const std::string description = implementation->description(); - const std::string implementation_name = name + " (" + description + ")"; +static int active_implementation(lua_State *L) { + const auto &implementation = simdjson::get_active_implementation(); + std::string name = implementation->name(); + const std::string description = implementation->description(); + const std::string implementation_name = name + " (" + description + ")"; - lua_pushlstring(L, implementation_name.data(), implementation_name.size()); + lua_pushlstring(L, implementation_name.data(), implementation_name.size()); - return 1; + return 1; } // Add forward declaration near the top after includes -static void serialize_data(lua_State* L, int current_depth, int max_depth, - simdjson::builder::string_builder& builder); +static void serialize_data(lua_State *L, int current_depth, int max_depth, simdjson::builder::string_builder &builder); // Helper function to get max encode depth from registry -static int get_max_depth(lua_State* L) { - lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); +static int get_max_depth(lua_State *L) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); - int max_depth = DEFAULT_MAX_ENCODE_DEPTH; - if (lua_isnumber(L, -1)) { - max_depth = lua_tointeger(L, -1); - } - lua_pop(L, 1); + int max_depth = DEFAULT_MAX_ENCODE_DEPTH; + if (lua_isnumber(L, -1)) { + max_depth = lua_tointeger(L, -1); + } + lua_pop(L, 1); - return max_depth; + return max_depth; } // Helper function to set max encode depth in registry -static void set_max_depth(lua_State* L, int max_depth) { - lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); - lua_pushinteger(L, max_depth); - lua_settable(L, LUA_REGISTRYINDEX); +static void set_max_depth(lua_State *L, int max_depth) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_pushinteger(L, max_depth); + lua_settable(L, LUA_REGISTRYINDEX); } // Helper function to get encode buffer size from registry -static size_t get_encode_buffer_size(lua_State* L) { - lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); +static size_t get_encode_buffer_size(lua_State *L) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); - size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; - if (lua_isnumber(L, -1)) { - buffer_size = lua_tointeger(L, -1); - } - lua_pop(L, 1); + size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; + if (lua_isnumber(L, -1)) { + buffer_size = lua_tointeger(L, -1); + } + lua_pop(L, 1); - return buffer_size; + return buffer_size; } // Helper function to set encode buffer size in registry -static void set_encode_buffer_size(lua_State* L, size_t buffer_size) { - lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); - lua_pushinteger(L, buffer_size); - lua_settable(L, LUA_REGISTRYINDEX); +static void set_encode_buffer_size(lua_State *L, size_t buffer_size) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_pushinteger(L, buffer_size); + lua_settable(L, LUA_REGISTRYINDEX); } // Check if table on stack top is a valid array and return its length // Returns -1 if not an array, otherwise returns maximum index -static int get_table_array_size(lua_State* L) { - double key_num; - int max_index = 0; - int element_count = 0; - - lua_pushnil(L); - while (lua_next(L, -2) != 0) { - // Check if key is a number - if (lua_type(L, -2) == LUA_TNUMBER) { - key_num = lua_tonumber(L, -2); - // Check if it's a positive integer - if (floor(key_num) == key_num && key_num >= 1) { - if (static_cast(key_num) > max_index) { - max_index = static_cast(key_num); - } - element_count++; - lua_pop(L, 1); - continue; - } - } - - // Non-integer key found - not an array - lua_pop(L, 2); - return -1; - } - - // Check if array is contiguous (element count should equal max index) - if (element_count > 0 && element_count != max_index) { - return -1; - } - - return max_index; +static int get_table_array_size(lua_State *L) { + double key_num; + int max_index = 0; + int element_count = 0; + + lua_pushnil(L); + while (lua_next(L, -2) != 0) { + // Check if key is a number + if (lua_type(L, -2) == LUA_TNUMBER) { + key_num = lua_tonumber(L, -2); + // Check if it's a positive integer + if (floor(key_num) == key_num && key_num >= 1) { + if (static_cast(key_num) > max_index) { + max_index = static_cast(key_num); + } + element_count++; + lua_pop(L, 1); + continue; + } + } + + // Non-integer key found - not an array + lua_pop(L, 2); + return -1; + } + + // Check if array is contiguous (element count should equal max index) + if (element_count > 0 && element_count != max_index) { + return -1; + } + + return max_index; } // Helper function to format a number as a string // Returns pointer to thread-local buffer and length -inline std::pair format_number_as_string(lua_State* L, int index) { - thread_local char buffer[32]; - size_t len; +inline std::pair format_number_as_string(lua_State *L, int index) { + thread_local char buffer[32]; + size_t len; - // JSON numbers are represented as doubles, which have limited precision - // for integers beyond 2^53. Check this first regardless of Lua version. + // JSON numbers are represented as doubles, which have limited precision + // for integers beyond 2^53. Check this first regardless of Lua version. #if defined(LUA_MAXINTEGER) - const double max_safe_int = LUA_MAXINTEGER; + const double max_safe_int = LUA_MAXINTEGER; #else - const double max_safe_int = 9007199254740992.0; // 2^53 + const double max_safe_int = 9007199254740992.0; // 2^53 #endif #if LUA_VERSION_NUM >= 503 - // Lua 5.3+ has native integer type - if (lua_isinteger(L, index)) { - lua_Integer num = lua_tointeger(L, index); - // Check if the integer fits safely in a JSON number (double) - if (num > -max_safe_int && num < max_safe_int) { - len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); - return {buffer, len}; - } - // Too large for safe integer representation, format as float - len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); - return {buffer, len}; - } + // Lua 5.3+ has native integer type + if (lua_isinteger(L, index)) { + lua_Integer num = lua_tointeger(L, index); + // Check if the integer fits safely in a JSON number (double) + if (num > -max_safe_int && num < max_safe_int) { + len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); + return {buffer, len}; + } + // Too large for safe integer representation, format as float + len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); + return {buffer, len}; + } #else - // For Lua 5.1/5.2, check if the number is an integer value - { - double num = lua_tonumber(L, index); - if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) { - if (num > -max_safe_int && num < max_safe_int) { - len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); - return {buffer, len}; - } - } - } + // For Lua 5.1/5.2, check if the number is an integer value + { + double num = lua_tonumber(L, index); + if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) { + if (num > -max_safe_int && num < max_safe_int) { + len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); + return {buffer, len}; + } + } + } #endif - // For floats or large numbers, convert to string with %.14g - lua_Number num = lua_tonumber(L, index); - len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); - return {buffer, len}; + // For floats or large numbers, convert to string with %.14g + lua_Number num = lua_tonumber(L, index); + len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); + return {buffer, len}; } -inline void serialize_append_bool(lua_State* L, - SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, - int lindex) { - // check if it is really a boolean - if (lua_isboolean(L, lindex)) { - if (lua_toboolean(L, lindex)) { +inline void serialize_append_bool(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + // check if it is really a boolean + if (lua_isboolean(L, lindex)) { + if (lua_toboolean(L, lindex)) { // Use append_raw with string_view for batched append (more efficient than multiple char appends) #if __cplusplus >= 202002L - builder.append(true); + builder.append(true); #else - builder.append_raw(std::string_view("true", 4)); + builder.append_raw(std::string_view("true", 4)); #endif - } else { + } else { #if __cplusplus >= 202002L - builder.append(false); + builder.append(false); #else - builder.append_raw(std::string_view("false", 5)); + builder.append_raw(std::string_view("false", 5)); #endif - } - } else { - builder.append_null(); - } + } + } else { + builder.append_null(); + } }; -static void serialize_append_number( - lua_State* L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, int lindex) { - auto num_result = format_number_as_string(L, lindex); - const char* num_str = num_result.first; - size_t len = num_result.second; - // Use append_raw with string_view for numbers (no quotes) - builder.append_raw(std::string_view(num_str, len)); +static void serialize_append_number(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + auto num_result = format_number_as_string(L, lindex); + const char *num_str = num_result.first; + size_t len = num_result.second; + // Use append_raw with string_view for numbers (no quotes) + builder.append_raw(std::string_view(num_str, len)); }; -static void serialize_append_string( - lua_State* L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, int lindex) { - size_t len; - const char* str = lua_tolstring(L, lindex, &len); - builder.escape_and_append_with_quotes(str); +static void serialize_append_string(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { + size_t len; + const char *str = lua_tolstring(L, lindex, &len); + builder.escape_and_append_with_quotes(str); }; -static void -serialize_append_array(lua_State* L, - SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, - int table_index, int array_size, int current_depth, int max_depth) { - bool first = true; - // Get the actual stack index if using relative indexing - if (table_index < 0 && table_index > LUA_REGISTRYINDEX) { - table_index = lua_gettop(L) + table_index + 1; - } - - builder.start_array(); - - for (int i = 1; i <= array_size; i++) { - if (!first) { - builder.append_comma(); - } - first = false; - - // Push the value at index i onto the stack - lua_rawgeti(L, table_index, i); - - // Serialize the value - serialize_data(L, current_depth, max_depth, builder); - // Pop the value from the stack - lua_pop(L, 1); - } - - builder.end_array(); +static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int table_index, int array_size, + int current_depth, int max_depth) { + bool first = true; + // Get the actual stack index if using relative indexing + if (table_index < 0 && table_index > LUA_REGISTRYINDEX) { + table_index = lua_gettop(L) + table_index + 1; + } + + builder.start_array(); + + for (int i = 1; i <= array_size; i++) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Push the value at index i onto the stack + lua_rawgeti(L, table_index, i); + + // Serialize the value + serialize_data(L, current_depth, max_depth, builder); + // Pop the value from the stack + lua_pop(L, 1); + } + + builder.end_array(); } -static void -serialize_append_object(lua_State* L, - SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder, - int current_depth, int max_depth) { - builder.start_object(); - bool first = true; - - // Start iteration with nil key - lua_pushnil(L); - - while (lua_next(L, -2) != 0) { - if (!first) { - builder.append_comma(); - } - first = false; - - // Cache key type to avoid multiple lua_type calls - int key_type = lua_type(L, -2); - - // Serialize the key - if (key_type == LUA_TSTRING) { - size_t key_len; - const char* key = lua_tolstring(L, -2, &key_len); - // Always use the proper escape function for string keys - builder.escape_and_append_with_quotes(std::string_view(key, key_len)); - } else if (key_type == LUA_TNUMBER) { - auto key_result = format_number_as_string(L, -2); - const char* key_str = key_result.first; - size_t key_len = key_result.second; - // Numeric keys are formatted as strings with quotes - builder.append('"'); - for (size_t i = 0; i < key_len; i++) { - builder.append(key_str[i]); - } - builder.append('"'); - } else { - const char* type_name = lua_typename(L, key_type); - luaL_error(L, "unsupported key type in table for serialization: %s", type_name); - } - - builder.append_colon(); - - // Serialize the value (it's already on top of stack) - serialize_data(L, current_depth, max_depth, builder); - // Pop value, keep key for next iteration - lua_pop(L, 1); - } - - builder.end_object(); +static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int current_depth, int max_depth) { + builder.start_object(); + bool first = true; + + // Start iteration with nil key + lua_pushnil(L); + + while (lua_next(L, -2) != 0) { + if (!first) { + builder.append_comma(); + } + first = false; + + // Cache key type to avoid multiple lua_type calls + int key_type = lua_type(L, -2); + + // Serialize the key + if (key_type == LUA_TSTRING) { + size_t key_len; + const char *key = lua_tolstring(L, -2, &key_len); + // Always use the proper escape function for string keys + builder.escape_and_append_with_quotes(std::string_view(key, key_len)); + } else if (key_type == LUA_TNUMBER) { + auto key_result = format_number_as_string(L, -2); + const char *key_str = key_result.first; + size_t key_len = key_result.second; + // Numeric keys are formatted as strings with quotes + builder.append('"'); + for (size_t i = 0; i < key_len; i++) { + builder.append(key_str[i]); + } + builder.append('"'); + } else { + const char *type_name = lua_typename(L, key_type); + luaL_error(L, "unsupported key type in table for serialization: %s", type_name); + } + + builder.append_colon(); + + // Serialize the value (it's already on top of stack) + serialize_data(L, current_depth, max_depth, builder); + // Pop value, keep key for next iteration + lua_pop(L, 1); + } + + builder.end_object(); } -static void serialize_data(lua_State* L, int current_depth, int max_depth, - SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder& builder) { - // Check depth to prevent stack overflow - if (current_depth > max_depth) { - luaL_error(L, "maximum nesting depth exceeded (limit: %d)", max_depth); - } - - switch (lua_type(L, -1)) { - case LUA_TSTRING: { - serialize_append_string(L, builder, -1); - } break; - case LUA_TNUMBER: { - serialize_append_number(L, builder, -1); - } break; - case LUA_TBOOLEAN: { - serialize_append_bool(L, builder, -1); - } break; - case LUA_TTABLE: { - current_depth++; - int array_size = get_table_array_size(L); - if (array_size > 0) { - // Handle as array - serialize_append_array(L, builder, -1, array_size, current_depth, max_depth); - } else { - // Handle as object - serialize_append_object(L, builder, current_depth, max_depth); - } - } break; - case LUA_TNIL: { - // Treat Lua nil as JSON null - builder.append_null(); - } break; - case LUA_TLIGHTUSERDATA: { - // Treat lightuserdata NULL as JSON null - if (lua_touserdata(L, -1) == NULL) { - builder.append_null(); - } else { - luaL_error(L, "unsupported lightuserdata value for serialization"); - } - } break; - default: { - const char* type_name = lua_typename(L, lua_type(L, -1)); - luaL_error(L, "unsupported Lua data type for serialization: %s", type_name); - } - } +static void serialize_data(lua_State *L, int current_depth, int max_depth, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder) { + // Check depth to prevent stack overflow + if (current_depth > max_depth) { + luaL_error(L, "maximum nesting depth exceeded (limit: %d)", max_depth); + } + + switch (lua_type(L, -1)) { + case LUA_TSTRING: { + serialize_append_string(L, builder, -1); + } break; + case LUA_TNUMBER: { + serialize_append_number(L, builder, -1); + } break; + case LUA_TBOOLEAN: { + serialize_append_bool(L, builder, -1); + } break; + case LUA_TTABLE: { + current_depth++; + int array_size = get_table_array_size(L); + if (array_size > 0) { + // Handle as array + serialize_append_array(L, builder, -1, array_size, current_depth, max_depth); + } else { + // Handle as object + serialize_append_object(L, builder, current_depth, max_depth); + } + } break; + case LUA_TNIL: { + // Treat Lua nil as JSON null + builder.append_null(); + } break; + case LUA_TLIGHTUSERDATA: { + // Treat lightuserdata NULL as JSON null + if (lua_touserdata(L, -1) == NULL) { + builder.append_null(); + } else { + luaL_error(L, "unsupported lightuserdata value for serialization"); + } + } break; + default: { + const char *type_name = lua_typename(L, lua_type(L, -1)); + luaL_error(L, "unsupported Lua data type for serialization: %s", type_name); + } + } }; // encode Lua data types into JSON string -static int encode(lua_State* L) { - // the output string once the building is done. - std::string_view json; - - int num_args = lua_gettop(L); - luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments"); - - // Get max_depth: use second argument if provided, otherwise use global setting - int max_depth; - if (num_args == 2) { - max_depth = luaL_checkinteger(L, 2); - if (max_depth < 1) { - return luaL_error(L, "maximum depth must be at least 1"); - } - lua_pop(L, 1); // Remove max_depth argument, leaving table on top - } else { - max_depth = get_max_depth(L); - } - - // Get desired buffer size and recreate buffer if size changed - size_t desired_buffer_size = get_encode_buffer_size(L); - if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) { - if (encode_buffer != nullptr) { - delete encode_buffer; - } - encode_buffer = - new SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder(desired_buffer_size); - encode_buffer_size = desired_buffer_size; - } - - // Reuse buffer - clear it but retain capacity, this should mean successive calls - // are efficient in most cases. - encode_buffer->clear(); - - serialize_data(L, 0, max_depth, *encode_buffer); - auto v_err = encode_buffer->view().get(json); - if (v_err) { - return luaL_error(L, "failed to get JSON view from buffer: %s", - simdjson::error_message(v_err)); - } - - // validate utf-8 - if (!encode_buffer->validate_unicode()) { - return luaL_error(L, "encoded JSON contains invalid UTF-8 sequences"); - } - - lua_pushlstring(L, json.data(), json.size()); - return 1; +static int encode(lua_State *L) { + // the output string once the building is done. + std::string_view json; + + int num_args = lua_gettop(L); + luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments"); + + // Get max_depth: use second argument if provided, otherwise use global setting + int max_depth; + if (num_args == 2) { + max_depth = luaL_checkinteger(L, 2); + if (max_depth < 1) { + return luaL_error(L, "maximum depth must be at least 1"); + } + lua_pop(L, 1); // Remove max_depth argument, leaving table on top + } else { + max_depth = get_max_depth(L); + } + + // Get desired buffer size and recreate buffer if size changed + size_t desired_buffer_size = get_encode_buffer_size(L); + if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) { + if (encode_buffer != nullptr) { + delete encode_buffer; + } + encode_buffer = new SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder(desired_buffer_size); + encode_buffer_size = desired_buffer_size; + } + + // Reuse buffer - clear it but retain capacity, this should mean successive calls + // are efficient in most cases. + encode_buffer->clear(); + + serialize_data(L, 0, max_depth, *encode_buffer); + auto v_err = encode_buffer->view().get(json); + if (v_err) { + return luaL_error(L, "failed to get JSON view from buffer: %s", simdjson::error_message(v_err)); + } + + // validate utf-8 + if (!encode_buffer->validate_unicode()) { + return luaL_error(L, "encoded JSON contains invalid UTF-8 sequences"); + } + + lua_pushlstring(L, json.data(), json.size()); + return 1; }; // Set maximum nesting depth for encoding -static int setMaxEncodeDepth(lua_State* L) { - int max_depth = luaL_checkinteger(L, 1); - if (max_depth < 1) { - return luaL_error(L, "Maximum encode depth must be at least 1"); - } - set_max_depth(L, max_depth); - return 0; +static int setMaxEncodeDepth(lua_State *L) { + int max_depth = luaL_checkinteger(L, 1); + if (max_depth < 1) { + return luaL_error(L, "Maximum encode depth must be at least 1"); + } + set_max_depth(L, max_depth); + return 0; } // Get current maximum nesting depth for encoding -static int getMaxEncodeDepth(lua_State* L) { - lua_pushinteger(L, get_max_depth(L)); - return 1; +static int getMaxEncodeDepth(lua_State *L) { + lua_pushinteger(L, get_max_depth(L)); + return 1; } // Set encode buffer initial capacity in bytes -static int setEncodeBufferSize(lua_State* L) { - int buffer_size = luaL_checkinteger(L, 1); - if (buffer_size < 1) { - return luaL_error(L, "Encode buffer size must be at least 1"); - } - if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { - return luaL_error(L, "Encode buffer size must not exceed %zu", - (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); - } - set_encode_buffer_size(L, buffer_size); - return 0; +static int setEncodeBufferSize(lua_State *L) { + int buffer_size = luaL_checkinteger(L, 1); + if (buffer_size < 1) { + return luaL_error(L, "Encode buffer size must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + return luaL_error(L, "Encode buffer size must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + set_encode_buffer_size(L, buffer_size); + return 0; } // Get encode buffer initial capacity in bytes -static int getEncodeBufferSize(lua_State* L) { - lua_pushinteger(L, get_encode_buffer_size(L)); - return 1; +static int getEncodeBufferSize(lua_State *L) { + lua_pushinteger(L, get_encode_buffer_size(L)); + return 1; } // ParsedObject as C++ class #define LUA_MYOBJECT "ParsedObject" class ParsedObject { - private: - simdjson::padded_string json_string; - ondemand::document doc; - std::unique_ptr parser; - - public: - ParsedObject(const char* json_file) - : json_string(padded_string::load(json_file)), parser(new ondemand::parser{}) { - this->doc = this->parser.get()->iterate(json_string); - } - ParsedObject(const char* json_str, size_t json_str_len) - : json_string(json_str, json_str_len), parser(new ondemand::parser{}) { - this->doc = this->parser.get()->iterate(json_string); - } - ~ParsedObject() { - } - ondemand::document* get_doc() { - return &(this->doc); - } +private: + simdjson::padded_string json_string; + ondemand::document doc; + std::unique_ptr parser; + +public: + ParsedObject(const char *json_file) : json_string(padded_string::load(json_file)), parser(new ondemand::parser{}) { + this->doc = this->parser.get()->iterate(json_string); + } + ParsedObject(const char *json_str, size_t json_str_len) : json_string(json_str, json_str_len), parser(new ondemand::parser{}) { + this->doc = this->parser.get()->iterate(json_string); + } + ~ParsedObject() { + } + ondemand::document *get_doc() { + return &(this->doc); + } }; -static int ParsedObject_delete(lua_State* L) { - delete *reinterpret_cast(lua_touserdata(L, 1)); - return 0; +static int ParsedObject_delete(lua_State *L) { + delete *reinterpret_cast(lua_touserdata(L, 1)); + return 0; } -static int ParsedObject_open(lua_State* L) { - size_t json_str_len; - const char* json_str = luaL_checklstring(L, 1, &json_str_len); - - try { - ParsedObject** parsedObject = (ParsedObject**)(lua_newuserdata(L, sizeof(ParsedObject*))); - *parsedObject = new ParsedObject(json_str, json_str_len); - luaL_getmetatable(L, LUA_MYOBJECT); - lua_setmetatable(L, -2); - } catch (simdjson::simdjson_error& error) { - luaL_error(L, error.what()); - } - return 1; +static int ParsedObject_open(lua_State *L) { + size_t json_str_len; + const char *json_str = luaL_checklstring(L, 1, &json_str_len); + + try { + ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); + *parsedObject = new ParsedObject(json_str, json_str_len); + luaL_getmetatable(L, LUA_MYOBJECT); + lua_setmetatable(L, -2); + } catch (simdjson::simdjson_error &error) { + luaL_error(L, error.what()); + } + return 1; } -static int ParsedObject_open_file(lua_State* L) { - const char* json_file = luaL_checkstring(L, 1); +static int ParsedObject_open_file(lua_State *L) { + const char *json_file = luaL_checkstring(L, 1); - try { - ParsedObject** parsedObject = (ParsedObject**)(lua_newuserdata(L, sizeof(ParsedObject*))); - *parsedObject = new ParsedObject(json_file); - luaL_getmetatable(L, LUA_MYOBJECT); - lua_setmetatable(L, -2); - } catch (simdjson::simdjson_error& error) { - luaL_error(L, error.what()); - } + try { + ParsedObject **parsedObject = (ParsedObject **)(lua_newuserdata(L, sizeof(ParsedObject *))); + *parsedObject = new ParsedObject(json_file); + luaL_getmetatable(L, LUA_MYOBJECT); + lua_setmetatable(L, -2); + } catch (simdjson::simdjson_error &error) { + luaL_error(L, error.what()); + } - return 1; + return 1; } -static int ParsedObject_atPointer(lua_State* L) { - ondemand::document* document = - (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))->get_doc(); - const char* pointer = luaL_checkstring(L, 2); +static int ParsedObject_atPointer(lua_State *L) { + ondemand::document *document = (*reinterpret_cast(luaL_checkudata(L, 1, LUA_MYOBJECT)))->get_doc(); + const char *pointer = luaL_checkstring(L, 2); - try { - ondemand::value returned_element = document->at_pointer(pointer); - convert_ondemand_element_to_table(L, returned_element); - } catch (simdjson::simdjson_error& error) { - luaL_error(L, error.what()); - } + try { + ondemand::value returned_element = document->at_pointer(pointer); + convert_ondemand_element_to_table(L, returned_element); + } catch (simdjson::simdjson_error &error) { + luaL_error(L, error.what()); + } - return 1; + return 1; } -static int ParsedObject_newindex(lua_State* L) { - luaL_error(L, "This should be treated as a read-only table. We may one day " - "add array " - "access for the elements, and it'll likely not be modifiable."); - return 1; +static int ParsedObject_newindex(lua_State *L) { + luaL_error(L, "This should be treated as a read-only table. We may one day " + "add array " + "access for the elements, and it'll likely not be modifiable."); + return 1; } -static const struct luaL_Reg arraylib_m[] = {{"at", ParsedObject_atPointer}, - {"atPointer", ParsedObject_atPointer}, - {"__newindex", ParsedObject_newindex}, - {"__gc", ParsedObject_delete}, - {NULL, NULL}}; +static const struct luaL_Reg arraylib_m[] = { + {"at", ParsedObject_atPointer}, {"atPointer", ParsedObject_atPointer}, {"__newindex", ParsedObject_newindex}, {"__gc", ParsedObject_delete}, {NULL, NULL}}; -int luaopen_simdjson(lua_State* L) { - luaL_newmetatable(L, LUA_MYOBJECT); - lua_pushvalue(L, -1); /* duplicates the metatable */ - lua_setfield(L, -2, "__index"); - luaL_setfuncs(L, arraylib_m, 0); +int luaopen_simdjson(lua_State *L) { + luaL_newmetatable(L, LUA_MYOBJECT); + lua_pushvalue(L, -1); /* duplicates the metatable */ + lua_setfield(L, -2, "__index"); + luaL_setfuncs(L, arraylib_m, 0); - // luaL_newlib(L, luasimdjson); + // luaL_newlib(L, luasimdjson); - lua_newtable(L); - luaL_setfuncs(L, luasimdjson, 0); + lua_newtable(L); + luaL_setfuncs(L, luasimdjson, 0); - lua_pushlightuserdata(L, NULL); - lua_setfield(L, -2, "null"); + lua_pushlightuserdata(L, NULL); + lua_setfield(L, -2, "null"); - lua_pushliteral(L, LUA_SIMDJSON_NAME); - lua_setfield(L, -2, "_NAME"); - lua_pushliteral(L, LUA_SIMDJSON_VERSION); - lua_setfield(L, -2, "_VERSION"); + lua_pushliteral(L, LUA_SIMDJSON_NAME); + lua_setfield(L, -2, "_NAME"); + lua_pushliteral(L, LUA_SIMDJSON_VERSION); + lua_setfield(L, -2, "_VERSION"); - return 1; + return 1; } diff --git a/src/luasimdjson.h b/src/luasimdjson.h index 1fee934..85d91af 100644 --- a/src/luasimdjson.h +++ b/src/luasimdjson.h @@ -7,30 +7,30 @@ #endif extern "C" { - static int parse(lua_State*); - static int parse_file(lua_State*); - static int active_implementation(lua_State*); - static int ParsedObject_open(lua_State*); - static int ParsedObject_open_file(lua_State*); - static int encode(lua_State*); - static int setMaxEncodeDepth(lua_State*); - static int getMaxEncodeDepth(lua_State*); - static int setEncodeBufferSize(lua_State*); - static int getEncodeBufferSize(lua_State*); + static int parse(lua_State *); + static int parse_file(lua_State *); + static int active_implementation(lua_State *); + static int ParsedObject_open(lua_State *); + static int ParsedObject_open_file(lua_State *); + static int encode(lua_State *); + static int setMaxEncodeDepth(lua_State *); + static int getMaxEncodeDepth(lua_State *); + static int setEncodeBufferSize(lua_State *); + static int getEncodeBufferSize(lua_State *); - static const struct luaL_Reg luasimdjson[] = { - {"parse", parse}, - {"parseFile", parse_file}, - {"activeImplementation", active_implementation}, - {"open", ParsedObject_open}, - {"openFile", ParsedObject_open_file}, - {"encode", encode}, - {"setMaxEncodeDepth", setMaxEncodeDepth}, - {"getMaxEncodeDepth", getMaxEncodeDepth}, - {"setEncodeBufferSize", setEncodeBufferSize}, - {"getEncodeBufferSize", getEncodeBufferSize}, + static const struct luaL_Reg luasimdjson[] = { + {"parse", parse}, + {"parseFile", parse_file}, + {"activeImplementation", active_implementation}, + {"open", ParsedObject_open}, + {"openFile", ParsedObject_open_file}, + {"encode", encode}, + {"setMaxEncodeDepth", setMaxEncodeDepth}, + {"getMaxEncodeDepth", getMaxEncodeDepth}, + {"setEncodeBufferSize", setEncodeBufferSize}, + {"getEncodeBufferSize", getEncodeBufferSize}, - {NULL, NULL}, - }; - LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State*); + {NULL, NULL}, + }; + LUASIMDJSON_EXPORT int luaopen_simdjson(lua_State *); } From ffe2c5b9bc03549ae40a47a26a78954035f5446b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 14:34:38 +1100 Subject: [PATCH 11/21] move encode configuration to a Lua table with snakeCase keys --- src/luasimdjson.cpp | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 078dbe3..3d9f90d 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -511,20 +511,47 @@ static int encode(lua_State *L) { int num_args = lua_gettop(L); luaL_argcheck(L, num_args >= 1 && num_args <= 2, num_args, "expected 1 or 2 arguments"); - // Get max_depth: use second argument if provided, otherwise use global setting - int max_depth; + // Get max_depth and buffer_size from options table if provided, otherwise use global settings + int max_depth = get_max_depth(L); + size_t desired_buffer_size = get_encode_buffer_size(L); + if (num_args == 2) { - max_depth = luaL_checkinteger(L, 2); - if (max_depth < 1) { - return luaL_error(L, "maximum depth must be at least 1"); + luaL_checktype(L, 2, LUA_TTABLE); + + // Check for maxDepth in options table + lua_getfield(L, 2, "maxDepth"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + return luaL_error(L, "maxDepth option must be a number"); + } + max_depth = lua_tointeger(L, -1); + if (max_depth < 1) { + return luaL_error(L, "maxDepth must be at least 1"); + } } - lua_pop(L, 1); // Remove max_depth argument, leaving table on top - } else { - max_depth = get_max_depth(L); + lua_pop(L, 1); + + // Check for buffer_size in options table + lua_getfield(L, 2, "buffer_size"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + return luaL_error(L, "buffer_size option must be a number"); + } + int buffer_size = lua_tointeger(L, -1); + if (buffer_size < 1) { + return luaL_error(L, "buffer_size must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + return luaL_error(L, "buffer_size must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + desired_buffer_size = buffer_size; + } + lua_pop(L, 1); + + lua_pop(L, 1); // Remove options table, leaving data on top } // Get desired buffer size and recreate buffer if size changed - size_t desired_buffer_size = get_encode_buffer_size(L); if (encode_buffer == nullptr || encode_buffer_size != desired_buffer_size) { if (encode_buffer != nullptr) { delete encode_buffer; From 5cb5bca420413c3e52a05ae28ac6adeeebd1df57 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 14:35:56 +1100 Subject: [PATCH 12/21] update the ordering of the windows includes because they got re-ordered when formatting and broke the builds --- src/luasimdjson.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 3d9f90d..8658f6d 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -3,8 +3,8 @@ #include #ifdef _WIN32 -#include #include +#include #else #include #endif @@ -532,17 +532,17 @@ static int encode(lua_State *L) { lua_pop(L, 1); // Check for buffer_size in options table - lua_getfield(L, 2, "buffer_size"); + lua_getfield(L, 2, "bufferSize"); if (!lua_isnil(L, -1)) { if (!lua_isnumber(L, -1)) { - return luaL_error(L, "buffer_size option must be a number"); + return luaL_error(L, "bufferSize option must be a number"); } int buffer_size = lua_tointeger(L, -1); if (buffer_size < 1) { - return luaL_error(L, "buffer_size must be at least 1"); + return luaL_error(L, "bufferSize must be at least 1"); } if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { - return luaL_error(L, "buffer_size must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + return luaL_error(L, "bufferSize must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); } desired_buffer_size = buffer_size; } From add11637d519dd58fa55c30852ca46b36e9f2a42 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 14:40:55 +1100 Subject: [PATCH 13/21] Update readme to reflect changes in how configuration values are being passed into encode() --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9155efb..5ebd3bf 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,15 @@ local withNull = { } local json = simdjson.encode(withNull) -- {"value":null,"name":"test"} --- Optional: specify maximum nesting depth (default is 1024) +-- Optional: specify encoding options with a configuration table local deepData = { level1 = { level2 = { level3 = "value" } } } -local json = simdjson.encode(deepData, 10) -- max depth of 10 +local json = simdjson.encode(deepData, {maxDepth = 10}) -- max depth of 10 + +-- You can also specify bufferSize per-call (default: 16KB) +local json = simdjson.encode(data, {bufferSize = 32 * 1024}) -- 32KB buffer + +-- Or combine both options +local json = simdjson.encode(deepData, {maxDepth = 10, bufferSize = 8192}) ``` You can also configure global encoding settings: From 0dbbe318f7f98c50c9c0f3e9b136c331f4bca005 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 14:41:30 +1100 Subject: [PATCH 14/21] refactor the configuration table handling into it's own function to make the code easier to read --- src/luasimdjson.cpp | 65 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 8658f6d..edde796 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -221,6 +221,39 @@ static int active_implementation(lua_State *L) { // Add forward declaration near the top after includes static void serialize_data(lua_State *L, int current_depth, int max_depth, simdjson::builder::string_builder &builder); +// Helper function to parse encode options from Lua table +static void parse_encode_options(lua_State *L, int table_index, int &max_depth, size_t &desired_buffer_size) { + // Check for maxDepth in options table + lua_getfield(L, table_index, "maxDepth"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + luaL_error(L, "maxDepth option must be a number"); + } + max_depth = lua_tointeger(L, -1); + if (max_depth < 1) { + luaL_error(L, "maxDepth must be at least 1"); + } + } + lua_pop(L, 1); + + // Check for bufferSize in options table + lua_getfield(L, table_index, "bufferSize"); + if (!lua_isnil(L, -1)) { + if (!lua_isnumber(L, -1)) { + luaL_error(L, "bufferSize option must be a number"); + } + int buffer_size = lua_tointeger(L, -1); + if (buffer_size < 1) { + luaL_error(L, "bufferSize must be at least 1"); + } + if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { + luaL_error(L, "bufferSize must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); + } + desired_buffer_size = buffer_size; + } + lua_pop(L, 1); +} + // Helper function to get max encode depth from registry static int get_max_depth(lua_State *L) { lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); @@ -517,37 +550,7 @@ static int encode(lua_State *L) { if (num_args == 2) { luaL_checktype(L, 2, LUA_TTABLE); - - // Check for maxDepth in options table - lua_getfield(L, 2, "maxDepth"); - if (!lua_isnil(L, -1)) { - if (!lua_isnumber(L, -1)) { - return luaL_error(L, "maxDepth option must be a number"); - } - max_depth = lua_tointeger(L, -1); - if (max_depth < 1) { - return luaL_error(L, "maxDepth must be at least 1"); - } - } - lua_pop(L, 1); - - // Check for buffer_size in options table - lua_getfield(L, 2, "bufferSize"); - if (!lua_isnil(L, -1)) { - if (!lua_isnumber(L, -1)) { - return luaL_error(L, "bufferSize option must be a number"); - } - int buffer_size = lua_tointeger(L, -1); - if (buffer_size < 1) { - return luaL_error(L, "bufferSize must be at least 1"); - } - if ((size_t)buffer_size > DEFAULT_MAX_ENCODE_BUFFER_SIZE) { - return luaL_error(L, "bufferSize must not exceed %zu", (size_t)DEFAULT_MAX_ENCODE_BUFFER_SIZE); - } - desired_buffer_size = buffer_size; - } - lua_pop(L, 1); - + parse_encode_options(L, 2, max_depth, desired_buffer_size); lua_pop(L, 1); // Remove options table, leaving data on top } From f101d794b9a82dec91fbdb00bf53b71de51a6937 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 14:57:48 +1100 Subject: [PATCH 15/21] fix tests to correctly use the table input for configuration --- spec/encode_security_spec.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/encode_security_spec.lua b/spec/encode_security_spec.lua index 4920168..83e7853 100644 --- a/spec/encode_security_spec.lua +++ b/spec/encode_security_spec.lua @@ -189,13 +189,13 @@ describe("encode() security and edge cases", function() -- Should succeed with high limit local success1 = pcall(function() - simdjson.encode(deep, 100) + simdjson.encode(deep, { maxDepth = 100 }) end) assert.is_true(success1) -- Should fail with low limit local success2 = pcall(function() - simdjson.encode(deep, 10) + simdjson.encode(deep, { maxDepth = 10 }) end) assert.is_false(success2) end) From f5d95a704e07d1b79a05f044bf6ac293368277de Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 15:36:29 +1100 Subject: [PATCH 16/21] revert makefile after reordering windows includes --- Makefile.win | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/Makefile.win b/Makefile.win index d115812..83c6592 100644 --- a/Makefile.win +++ b/Makefile.win @@ -7,21 +7,6 @@ LDFLAGS = $(LIBFLAG) LDLIBS = $(LUA_LIBDIR)/$(LUALIB) !endif -# Detect architecture for Windows -!ifndef ARCH_FLAG -!if "$(PROCESSOR_ARCHITECTURE)" == "AMD64" || "$(PROCESSOR_ARCHITEW6432)" == "AMD64" -ARCH_FLAG = /D_AMD64_ -!else if "$(PROCESSOR_ARCHITECTURE)" == "x86" -ARCH_FLAG = /D_X86_ -!else if "$(PROCESSOR_ARCHITECTURE)" == "ARM64" -ARCH_FLAG = /D_ARM64_ -!else -# Default to AMD64 if detection fails -ARCH_FLAG = /D_AMD64_ -!endif -!endif - - TARGET = simdjson.dll all: $(TARGET) @@ -30,7 +15,7 @@ src/luasimdjson.obj: src/luasimdjson.h src/simdjson.h src/simdjson.obj: src/simdjson.h .cpp.obj:: - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(ARCH_FLAG) -c $< -Fo:"src\\" + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -Fo:"src\\" $(TARGET): $(OBJ) $(LD) $(LDFLAGS) $** -out:$@ $(LDLIBS) @@ -39,4 +24,4 @@ clean: del *.dll src\*.obj *.lib *.exp 2>nul install: $(TARGET) - copy $(TARGET) $(INST_LIBDIR) + copy $(TARGET) $(INST_LIBDIR) \ No newline at end of file From f58513e926e3a603d5adcfbb9947ebffb8d45a28 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 15:53:48 +1100 Subject: [PATCH 17/21] simplify serialize_append_object() numerical key handling --- src/luasimdjson.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index edde796..f75b622 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -466,14 +466,8 @@ static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATIO builder.escape_and_append_with_quotes(std::string_view(key, key_len)); } else if (key_type == LUA_TNUMBER) { auto key_result = format_number_as_string(L, -2); - const char *key_str = key_result.first; - size_t key_len = key_result.second; // Numeric keys are formatted as strings with quotes - builder.append('"'); - for (size_t i = 0; i < key_len; i++) { - builder.append(key_str[i]); - } - builder.append('"'); + builder.escape_and_append_with_quotes(std::string_view(key_result.first, key_result.second)); } else { const char *type_name = lua_typename(L, key_type); luaL_error(L, "unsupported key type in table for serialization: %s", type_name); From a247816e0761df07c0f35bd3fce4bb5b01c47e99 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 19:35:51 +1100 Subject: [PATCH 18/21] fix sparse array encoding --- src/luasimdjson.cpp | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index f75b622..cb45526 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -19,13 +19,15 @@ #define LUA_SIMDJSON_VERSION "0.0.8" // keys encode max depth configuration. -#define LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY "simdjson.max_encode_depth" +#define LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY "simdjson.maxEncodeDepth" #define DEFAULT_MAX_ENCODE_DEPTH simdjson::DEFAULT_MAX_DEPTH // Encode buffer size reservation configuration -#define LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY "simdjson.encode_buffer_size" +#define LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY "simdjson.encodeBufferSize" #define DEFAULT_ENCODE_BUFFER_SIZE (16 * 1024) // 16KB #define DEFAULT_MAX_ENCODE_BUFFER_SIZE simdjson::SIMDJSON_MAXSIZE_BYTES +// Max size for number to string conversion buffer +#define ENCODE_NUMBER_BUFFER_SIZE 32 using namespace simdjson; @@ -301,7 +303,6 @@ static void set_encode_buffer_size(lua_State *L, size_t buffer_size) { static int get_table_array_size(lua_State *L) { double key_num; int max_index = 0; - int element_count = 0; lua_pushnil(L); while (lua_next(L, -2) != 0) { @@ -309,11 +310,10 @@ static int get_table_array_size(lua_State *L) { if (lua_type(L, -2) == LUA_TNUMBER) { key_num = lua_tonumber(L, -2); // Check if it's a positive integer - if (floor(key_num) == key_num && key_num >= 1) { + if (std::floor(key_num) == key_num && key_num >= 1) { if (static_cast(key_num) > max_index) { max_index = static_cast(key_num); } - element_count++; lua_pop(L, 1); continue; } @@ -324,18 +324,14 @@ static int get_table_array_size(lua_State *L) { return -1; } - // Check if array is contiguous (element count should equal max index) - if (element_count > 0 && element_count != max_index) { - return -1; - } - + // Return max_index if we found any valid integer keys (allows sparse arrays) return max_index; } // Helper function to format a number as a string // Returns pointer to thread-local buffer and length inline std::pair format_number_as_string(lua_State *L, int index) { - thread_local char buffer[32]; + thread_local char buffer[ENCODE_NUMBER_BUFFER_SIZE]; size_t len; // JSON numbers are represented as doubles, which have limited precision @@ -378,6 +374,7 @@ inline std::pair format_number_as_string(lua_State *L, int return {buffer, len}; } +// Serialize a Lua boolean as a JSON boolean inline void serialize_append_bool(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { // check if it is really a boolean if (lua_isboolean(L, lindex)) { @@ -400,6 +397,7 @@ inline void serialize_append_bool(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION: } }; +// Serialize a Lua number as a JSON number static void serialize_append_number(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { auto num_result = format_number_as_string(L, lindex); const char *num_str = num_result.first; @@ -408,17 +406,19 @@ static void serialize_append_number(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATIO builder.append_raw(std::string_view(num_str, len)); }; +// Serialize a Lua string with proper JSON escaping static void serialize_append_string(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int lindex) { size_t len; const char *str = lua_tolstring(L, lindex, &len); builder.escape_and_append_with_quotes(str); }; +// Serialize a Lua table with integer indices as a JSON array. static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int table_index, int array_size, int current_depth, int max_depth) { bool first = true; - // Get the actual stack index if using relative indexing - if (table_index < 0 && table_index > LUA_REGISTRYINDEX) { + // Get the actual stack index if using relative indexing (but not registry) + if (table_index < 0 && table_index != LUA_REGISTRYINDEX) { table_index = lua_gettop(L) + table_index + 1; } @@ -430,11 +430,16 @@ static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION } first = false; - // Push the value at index i onto the stack + // Push the value at index i onto the stack (or nil if missing) lua_rawgeti(L, table_index, i); - // Serialize the value - serialize_data(L, current_depth, max_depth, builder); + // If the value is nil, encode as null; otherwise, serialize normally + if (lua_isnil(L, -1)) { + builder.append_null(); + } else { + serialize_data(L, current_depth, max_depth, builder); + } + // Pop the value from the stack lua_pop(L, 1); } @@ -442,6 +447,7 @@ static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION builder.end_array(); } +// Serialize a Lua table as a JSON object. static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int current_depth, int max_depth) { builder.start_object(); bool first = true; From 8c9f0f0289554a1ae52dbb337e5b827b8a31d67c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 19:49:15 +1100 Subject: [PATCH 19/21] add cache to encoding settings to avoid lua registry lookups for every encode() call, potentially many times over --- src/luasimdjson.cpp | 50 +++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index cb45526..63e4ccb 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -256,43 +256,49 @@ static void parse_encode_options(lua_State *L, int table_index, int &max_depth, lua_pop(L, 1); } -// Helper function to get max encode depth from registry +// Helper function to get max encode depth from registry (with caching) +static int max_encode_depth_cache = -1; // -1 means not cached static int get_max_depth(lua_State *L) { - lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); - - int max_depth = DEFAULT_MAX_ENCODE_DEPTH; - if (lua_isnumber(L, -1)) { - max_depth = lua_tointeger(L, -1); + if (max_encode_depth_cache == -1) { + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + if (lua_isnumber(L, -1)) { + max_encode_depth_cache = lua_tointeger(L, -1); + } else { + max_encode_depth_cache = DEFAULT_MAX_ENCODE_DEPTH; + } + lua_pop(L, 1); } - lua_pop(L, 1); - - return max_depth; + return max_encode_depth_cache; } -// Helper function to set max encode depth in registry +// Helper function to set max encode depth in registry (and update cache) static void set_max_depth(lua_State *L, int max_depth) { + max_encode_depth_cache = max_depth; lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); lua_pushinteger(L, max_depth); lua_settable(L, LUA_REGISTRYINDEX); } -// Helper function to get encode buffer size from registry +// Helper function to get encode buffer size from registry (with caching) +static size_t encode_buffer_size_cache = 0; // 0 means not cached static size_t get_encode_buffer_size(lua_State *L) { - lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); - - size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; - if (lua_isnumber(L, -1)) { - buffer_size = lua_tointeger(L, -1); + if (encode_buffer_size_cache == 0) { + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + if (lua_isnumber(L, -1)) { + encode_buffer_size_cache = lua_tointeger(L, -1); + } else { + encode_buffer_size_cache = DEFAULT_ENCODE_BUFFER_SIZE; + } + lua_pop(L, 1); } - lua_pop(L, 1); - - return buffer_size; + return encode_buffer_size_cache; } -// Helper function to set encode buffer size in registry +// Helper function to set encode buffer size in registry (and update cache) static void set_encode_buffer_size(lua_State *L, size_t buffer_size) { + encode_buffer_size_cache = buffer_size; lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); lua_pushinteger(L, buffer_size); lua_settable(L, LUA_REGISTRYINDEX); From d1a57e7218a3320ad70fc457a5404049f60acaef Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 20:27:57 +1100 Subject: [PATCH 20/21] fix sparse array handling as arrays not objects --- src/luasimdjson.cpp | 88 +++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 63e4ccb..15805a7 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -256,49 +256,43 @@ static void parse_encode_options(lua_State *L, int table_index, int &max_depth, lua_pop(L, 1); } -// Helper function to get max encode depth from registry (with caching) -static int max_encode_depth_cache = -1; // -1 means not cached +// Helper function to get max encode depth from registry (with caching for performance) static int get_max_depth(lua_State *L) { - if (max_encode_depth_cache == -1) { - lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); - if (lua_isnumber(L, -1)) { - max_encode_depth_cache = lua_tointeger(L, -1); - } else { - max_encode_depth_cache = DEFAULT_MAX_ENCODE_DEPTH; - } - lua_pop(L, 1); + lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + int max_depth = DEFAULT_MAX_ENCODE_DEPTH; + if (lua_isnumber(L, -1)) { + max_depth = lua_tointeger(L, -1); } - return max_encode_depth_cache; + lua_pop(L, 1); + + return max_depth; } // Helper function to set max encode depth in registry (and update cache) static void set_max_depth(lua_State *L, int max_depth) { - max_encode_depth_cache = max_depth; lua_pushstring(L, LUA_SIMDJSON_MAX_ENCODE_DEPTH_KEY); lua_pushinteger(L, max_depth); lua_settable(L, LUA_REGISTRYINDEX); } -// Helper function to get encode buffer size from registry (with caching) -static size_t encode_buffer_size_cache = 0; // 0 means not cached +// Helper function to get encode buffer size from registry (with caching for performance) static size_t get_encode_buffer_size(lua_State *L) { - if (encode_buffer_size_cache == 0) { - lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); - lua_gettable(L, LUA_REGISTRYINDEX); - if (lua_isnumber(L, -1)) { - encode_buffer_size_cache = lua_tointeger(L, -1); - } else { - encode_buffer_size_cache = DEFAULT_ENCODE_BUFFER_SIZE; - } - lua_pop(L, 1); + lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); + lua_gettable(L, LUA_REGISTRYINDEX); + + size_t buffer_size = DEFAULT_ENCODE_BUFFER_SIZE; + if (lua_isnumber(L, -1)) { + buffer_size = lua_tointeger(L, -1); } - return encode_buffer_size_cache; + lua_pop(L, 1); + + return buffer_size; } // Helper function to set encode buffer size in registry (and update cache) static void set_encode_buffer_size(lua_State *L, size_t buffer_size) { - encode_buffer_size_cache = buffer_size; lua_pushstring(L, LUA_SIMDJSON_ENCODE_BUFFER_SIZE_KEY); lua_pushinteger(L, buffer_size); lua_settable(L, LUA_REGISTRYINDEX); @@ -354,8 +348,17 @@ inline std::pair format_number_as_string(lua_State *L, int lua_Integer num = lua_tointeger(L, index); // Check if the integer fits safely in a JSON number (double) if (num > -max_safe_int && num < max_safe_int) { - len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); - return {buffer, len}; + // Optimized: Use std::to_string for faster integer conversion + std::string str = std::to_string(num); + len = str.size(); + if (len < sizeof(buffer)) { + memcpy(buffer, str.c_str(), len + 1); // Include null terminator for safety + return {buffer, len}; + } else { + // Fallback for very large numbers (rare) + len = snprintf(buffer, sizeof(buffer), "%lld", (long long)num); + return {buffer, len}; + } } // Too large for safe integer representation, format as float len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); @@ -367,17 +370,33 @@ inline std::pair format_number_as_string(lua_State *L, int double num = lua_tonumber(L, index); if (std::floor(num) == num && num <= LLONG_MAX && num >= LLONG_MIN) { if (num > -max_safe_int && num < max_safe_int) { - len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); - return {buffer, len}; + // Optimized: Use std::to_string for integers + std::string str = std::to_string(static_cast(num)); + len = str.size(); + if (len < sizeof(buffer)) { + memcpy(buffer, str.c_str(), len + 1); + return {buffer, len}; + } else { + len = snprintf(buffer, sizeof(buffer), "%lld", static_cast(num)); + return {buffer, len}; + } } } } #endif - // For floats or large numbers, convert to string with %.14g + // For floats: Use std::to_string for speed, but ensure JSON-compatible format lua_Number num = lua_tonumber(L, index); - len = snprintf(buffer, sizeof(buffer), "%.14g", (double)num); - return {buffer, len}; + std::string str = std::to_string(num); + len = str.size(); + if (len < sizeof(buffer)) { + memcpy(buffer, str.c_str(), len + 1); + return {buffer, len}; + } else { + // Fallback if too long (rare) + len = snprintf(buffer, sizeof(buffer), "%.14g", num); + return {buffer, len}; + } } // Serialize a Lua boolean as a JSON boolean @@ -419,7 +438,7 @@ static void serialize_append_string(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATIO builder.escape_and_append_with_quotes(str); }; -// Serialize a Lua table with integer indices as a JSON array. +// Serialize a Lua table with integer indices as a JSON array, handling sparse arrays with nulls for missing indices. static void serialize_append_array(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder, int table_index, int array_size, int current_depth, int max_depth) { bool first = true; @@ -496,6 +515,7 @@ static void serialize_append_object(lua_State *L, SIMDJSON_BUILTIN_IMPLEMENTATIO builder.end_object(); } +// Main serialization dispatcher: converts Lua values to JSON based on their type static void serialize_data(lua_State *L, int current_depth, int max_depth, SIMDJSON_BUILTIN_IMPLEMENTATION::builder::string_builder &builder) { // Check depth to prevent stack overflow if (current_depth > max_depth) { From 9567aa41308a829941ad7437fecf33eed643dff3 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 4 Jan 2026 20:37:36 +1100 Subject: [PATCH 21/21] revert string format experiment --- src/luasimdjson.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/luasimdjson.cpp b/src/luasimdjson.cpp index 15805a7..1fd6bef 100644 --- a/src/luasimdjson.cpp +++ b/src/luasimdjson.cpp @@ -385,18 +385,10 @@ inline std::pair format_number_as_string(lua_State *L, int } #endif - // For floats: Use std::to_string for speed, but ensure JSON-compatible format + // For floats: Use snprintf to maintain original formatting (e.g., preserve trailing zeros) lua_Number num = lua_tonumber(L, index); - std::string str = std::to_string(num); - len = str.size(); - if (len < sizeof(buffer)) { - memcpy(buffer, str.c_str(), len + 1); - return {buffer, len}; - } else { - // Fallback if too long (rare) - len = snprintf(buffer, sizeof(buffer), "%.14g", num); - return {buffer, len}; - } + len = snprintf(buffer, sizeof(buffer), "%.14g", num); + return {buffer, len}; } // Serialize a Lua boolean as a JSON boolean