Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0bc4c37
initial commit for adding encode() using the simdjson::string_builder…
jlambatl Jan 2, 2026
b2e8e41
fix header include ordering that broke due to clang-format misconfigu…
jlambatl Jan 2, 2026
b46fc24
Update tests to use byte sequences so that they are compatible from L…
jlambatl Jan 2, 2026
aec7d12
fix error on windows builds requring architecture flags to be added t…
jlambatl Jan 2, 2026
e719803
nmake not gnu make syntax
jlambatl Jan 2, 2026
4426e27
correctly fix the syntax
jlambatl Jan 2, 2026
79064eb
add ARCH_FLAG to CXX line
jlambatl Jan 2, 2026
46e7302
add encode documentation to README.md
jlambatl Jan 2, 2026
233e04e
update clang-format to move pointer alignemnt right instead of left
jlambatl Jan 2, 2026
0c4e66b
return formatting to what has been used upstream
jlambatl Jan 2, 2026
ffe2c5b
move encode configuration to a Lua table with snakeCase keys
jlambatl Jan 4, 2026
5cb5bca
update the ordering of the windows includes because they got re-order…
jlambatl Jan 4, 2026
add1163
Update readme to reflect changes in how configuration values are bein…
jlambatl Jan 4, 2026
0dbbe31
refactor the configuration table handling into it's own function to m…
jlambatl Jan 4, 2026
f101d79
fix tests to correctly use the table input for configuration
jlambatl Jan 4, 2026
f5d95a7
revert makefile after reordering windows includes
jlambatl Jan 4, 2026
f58513e
simplify serialize_append_object() numerical key handling
jlambatl Jan 4, 2026
a247816
fix sparse array encoding
jlambatl Jan 4, 2026
8c9f0f0
add cache to encoding settings to avoid lua registry lookups for ever…
jlambatl Jan 4, 2026
d1a57e7
fix sparse array handling as arrays not objects
jlambatl Jan 4, 2026
9567aa4
revert string format experiment
jlambatl Jan 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# see https://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Language: Cpp
Standard: c++11

SortIncludes: false

IndentWidth: 2
TabWidth: 2
UseTab: Never
ColumnLimit: 160
BreakBeforeBraces: Attach
IndentExternBlock: Indent
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AllowShortFunctionsOnASingleLine: None
IndentCaseLabels: true
PointerAlignment: Right
SpaceBeforeParens: ControlStatements
28 changes: 17 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,43 @@ jobs:
os: [linux, macos, macos-arm64]
lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1]
include:
- os: linux
runner: ubuntu-latest
- os: macos
runner: macos-15-intel
- os: macos-arm64
runner: macos-latest
- os: linux
runner: ubuntu-latest
- os: macos
runner: macos-15-intel
- os: macos-arm64
runner: macos-latest
exclude:
- os: macos-arm64
lua: luajit=2.0
- os: macos-arm64
lua: luajit=2.0
name: ${{ matrix.os }} (${{ matrix.lua }})
runs-on: ${{ matrix.runner }}
steps:
# Checks-out the repository under $GITHUB_WORKSPACE.
- uses: actions/checkout@v6
- name: Install libreadline
- name: Install libreadline
if: runner.os == 'Linux'
run: |
sudo apt-get install -y libreadline-dev

- name: Install Lua (${{ matrix.lua }})
run: |
pip install git+https://github.com/luarocks/hererocks
pipx install git+https://github.com/luarocks/hererocks
pipx ensurepath
export PATH=$PATH:/root/.local/bin:$HOME/.local/bin
hererocks lua_install -r^ --${{ matrix.lua }}
env:
MACOSX_DEPLOYMENT_TARGET: 11.0
- name: Build lua-simdjson
shell: bash
run: |
set -e
source lua_install/bin/activate
luarocks make
- name: Run tests
shell: bash
run: |
set -e
source lua_install/bin/activate
luarocks install lua-cjson2
luarocks install busted
Expand All @@ -51,7 +57,7 @@ jobs:
fail-fast: false
matrix:
lua: [lua=5.1, lua=5.2, lua=5.3, lua=5.4, luajit=2.0, luajit=2.1]
target: [mingw,vs]
target: [mingw, vs]
runs-on: windows-2022
steps:
# Checks-out the repository under $GITHUB_WORKSPACE.
Expand Down
2 changes: 1 addition & 1 deletion Makefile.win
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ clean:
del *.dll src\*.obj *.lib *.exp 2>nul

install: $(TARGET)
copy $(TARGET) $(INST_LIBDIR)
copy $(TARGET) $(INST_LIBDIR)
92 changes: 87 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,39 +1,48 @@
# lua-simdjson

[![Build Status](https://github.com/FourierTransformer/lua-simdjson/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/FourierTransformer/lua-simdjson/actions?query=branch%3Amaster)

A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer.

Current simdjson version: 4.2.3

## Installation

If all the requirements are met, lua-simdjson can be install via luarocks with:

```
```bash
luarocks install lua-simdjson
```

Otherwise it can be installed manually by pulling the repo and running luarocks make.

## Requirements
* lua-simdjson only works on 64bit systems.
* a Lua build environment with support for C++11
* g++ version 7+ and clang++ version 6+ or newer should work!

* lua-simdjson only works on 64bit systems.
* a Lua build environment with support for C++11
* g++ version 7+ and clang++ version 6+ or newer should work!

## Parsing

There are two main ways to parse JSON in lua-simdjson:

1. With `parse`: this parses JSON and returns a Lua table with the parsed values
2. With `open`: this reads in the JSON and keeps it in simdjson's internal format. The values can then be accessed using a JSON pointer (examples below)

Both of these methods also have support to read files on disc with `parseFile` and `openFile` respectively. If handling JSON from disk, these methods should be used and are incredibly fast.

## Typing

* lua-simdjson uses `simdjson.null` to represent `null` values from parsed JSON.
* Any application should use that for comparison as needed.
* it uses `lua_pushnumber` and `lua_pushinteger` for JSON floats and ints respectively, so your Lua version may handle that slightly differently.
* `lua_pushinteger` uses signed ints. A number from JSON larger than `LUA_MAXINTEGER` will be represented as a float/number
* All other types map as expected.

### Parse some JSON

The `parse` methods will return a normal Lua table that can be interacted with.

```lua
local simdjson = require("simdjson")
local response = simdjson.parse([[
Expand Down Expand Up @@ -61,7 +70,9 @@ print(fileResponse["statuses"][1]["id"])
```

### Open some json

The `open` methods currently require the use of a JSON pointer, but are very quick. They are best used when you only need a part of a response. In the example below, it could be useful for just getting the `Thumnail` object with `:atPointer("/Image/Thumbnail")` which will then only create a Lua table with those specific values.

```lua
local simdjson = require("simdjson")
local response = simdjson.open([[
Expand Down Expand Up @@ -93,10 +104,81 @@ The `open` and `parse` codeblocks should print out the same values. It's worth n

This lazy style of using the simdjson data structure could also be used with array access in the future.

## Encoding

The `encode` method converts Lua tables into JSON strings. It supports nested tables, arrays, and all standard JSON types.

```lua
local simdjson = require("simdjson")

-- Encode a simple table
local data = {
name = "John Doe",
age = 30,
active = true,
score = 95.5
}
local json = simdjson.encode(data)
print(json) -- {"name":"John Doe","age":30,"active":true,"score":95.5}

-- Encode nested structures
local complex = {
user = {
id = 123,
tags = {"lua", "json", "fast"}
},
metadata = {
created = "2024-01-01",
count = 42
}
}
local json = simdjson.encode(complex)

-- Use simdjson.null for JSON null values
local withNull = {
value = simdjson.null,
name = "test"
}
local json = simdjson.encode(withNull) -- {"value":null,"name":"test"}

-- Optional: specify encoding options with a configuration table
local deepData = { level1 = { level2 = { level3 = "value" } } }
local json = simdjson.encode(deepData, {maxDepth = 10}) -- max depth of 10

-- You can also specify bufferSize per-call (default: 16KB)
local json = simdjson.encode(data, {bufferSize = 32 * 1024}) -- 32KB buffer

-- Or combine both options
local json = simdjson.encode(deepData, {maxDepth = 10, bufferSize = 8192})
```

You can also configure global encoding settings:

```lua
-- Set maximum nesting depth globally (default: 1024)
simdjson.setMaxEncodeDepth(512)
local currentDepth = simdjson.getMaxEncodeDepth()

-- Set encode buffer size in bytes (default: 16KB)
simdjson.setEncodeBufferSize(32 * 1024) -- 32KB
local currentSize = simdjson.getEncodeBufferSize()
```

**Encoding behavior:**

* Tables with consecutive integer keys starting at 1 are encoded as JSON arrays
* All other tables are encoded as JSON objects
* Numbers are formatted as integers when possible, or floats with 14 digits of precision
* Integers larger than 2^53 are encoded in scientific notation for JSON compatibility
* Strings are automatically escaped according to JSON specifications
* `simdjson.null` represents JSON `null`

## Error Handling

lua-simdjson will error out with any errors from simdjson encountered while parsing. They are very good at helping identify what has gone wrong during parsing.

## Benchmarks

I ran some benchmarks against lua-cjson, rapidjson, and dkjson. For each test, I loaded the JSON into memory, and then had the parsers go through each file 100 times and took the average time it took to parse to a Lua table. You can see all the results in the [benchmark](benchmark/) folder. I've included a sample output run via Lua (the LuaJIT graph looks very similar, also in the benchmark folder). The y-axis is logarithmic, so every half step down is twice as fast.

![Lua Performance Column Chart](benchmark/lua-perf.png)
Expand All @@ -116,7 +198,7 @@ lua-simdjson, like the simdjson library performs better on more modern hardware.
* since it's an external module, it's not quite as easy to just grab the file and go (dkjson has you covered here!)

## Philosophy
I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options. The big _thing_ that's missing so far is encoding a lua table to JSON. I may add in an encoder at some point.
I plan to keep it fairly inline with what the original simdjson library is capable of doing, which really means not adding too many additional options.

## Licenses
* The jsonexamples, src/simdjson.cpp, src/simdjson.h are unmodified from the released version simdjson under the Apache License 2.0.
Expand Down
20 changes: 17 additions & 3 deletions spec/compile_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ end)

local major, minor = _VERSION:match('([%d]+)%.(%d+)')
if tonumber(major) >= 5 and tonumber(minor) >= 3 then
describe("Make sure ints and floats parse correctly", function ()
describe("Make sure ints and floats parse correctly", function()
it("should handle decoding numbers appropriately", function()

local numberCheck = simdjson.parse([[
{
"float": 1.2,
Expand All @@ -101,7 +100,6 @@ if tonumber(major) >= 5 and tonumber(minor) >= 3 then
assert.are.same("float", math.type(numberCheck["one_above_max_signed_integer"]))
assert.are.same("integer", math.type(numberCheck["min_unsigned_integer"]))
assert.are.same("float", math.type(numberCheck["max_unsigned_integer"]))

end)
end)
end
Expand Down Expand Up @@ -129,3 +127,19 @@ describe("Make sure invalid files are not accepted", function()
end)
end
end)

describe("Active implementation function", function()
it("should return a valid implementation name", function()
local impl = simdjson.activeImplementation()
assert.is_not_nil(impl)
assert.is_string(impl)
assert.is_truthy(impl:match("%w+")) -- Contains at least one word character
assert.is_true(#impl > 0) -- Non-empty string
end)

it("should contain implementation details", function()
local impl = simdjson.activeImplementation()
-- Implementation string should have format like "arm64 (ARM NEON)" or "haswell (Intel AVX2)"
assert.is_truthy(impl:match("%(.*%)")) -- Contains parentheses with description
end)
end)
Loading