From 8acba4f9939259e5a140f599876c550c55a6dceb Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 20:52:51 +0100 Subject: [PATCH 1/6] Make map encoding deterministic by default --- lib/msgpack/encoder.ex | 14 ++++++++++++-- test/msgpack_test.exs | 42 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/msgpack/encoder.ex b/lib/msgpack/encoder.ex index 4d7a1a8..7830019 100644 --- a/lib/msgpack/encoder.ex +++ b/lib/msgpack/encoder.ex @@ -15,7 +15,8 @@ defmodule Msgpack.Encoder do def default_opts() do [ atoms: :string, - string_validation: true + string_validation: true, + deterministic: true ] end @@ -160,6 +161,15 @@ defmodule Msgpack.Encoder do # ==== Maps ==== defp do_encode(map, opts) when is_map(map) do + enumerable = + if Keyword.get(opts, :deterministic, true) == false do + map + else + map + |> Map.to_list() + |> Enum.sort_by(fn {key, _value} -> key end) + end + acc = {:ok, []} reducer = fn {key, value}, {:ok, acc_list} -> @@ -172,7 +182,7 @@ defmodule Msgpack.Encoder do end end - case Enum.reduce(map, acc, reducer) do + case Enum.reduce(enumerable, acc, reducer) do {:ok, encoded_pairs} -> size = map_size(map) {:ok, [encode_map_header(size), Enum.reverse(encoded_pairs)]} diff --git a/test/msgpack_test.exs b/test/msgpack_test.exs index 59dbd70..0c6a851 100644 --- a/test/msgpack_test.exs +++ b/test/msgpack_test.exs @@ -76,6 +76,44 @@ defmodule MsgpackTest do string_32 = String.duplicate("a", 32) assert_encode(string_32, <<0xD9, 32, string_32::binary>>) end + + test "produces identical output for maps with different key orders, by default" do + map1 = %{c: 3, b: 2, a: 1} + map2 = %{a: 1, c: 3, b: 2} + + expected_binary = <<0x83, 0xA1, "a", 1, 0xA1, "b", 2, 0xA1, "c", 3>> + + assert_encode(map1, expected_binary) + assert_encode(map2, expected_binary) + end + + test "correctly sorts maps with mixed key types, by default" do + map = %{"a" => 1, 100 => 2, :z => 3, nil => 4} + expected_binary = <<0x84, 100, 2, 0xC0, 4, 0xA1, "z", 3, 0xA1, "a", 1>> + + assert_encode(map, expected_binary) + end + + test "applies sorting to nested maps, by default" do + map1 = %{b: %{y: 2, x: 1}, a: 10} + map2 = %{a: 10, b: %{x: 1, y: 2}} + + {:ok, expected_binary} = Msgpack.encode(map2) + + assert_encode(map1, expected_binary) + end + + test "with `deterministic: false` opts out of sorted key encoding" do + map = %{c: 1, a: 2} + + {:ok, sorted_binary} = Msgpack.encode(map) + + assert sorted_binary == <<0x82, 0xA1, "a", 2, 0xA1, "c", 1>> + + {:ok, unsorted_binary} = Msgpack.encode(map, deterministic: false) + + refute unsorted_binary == sorted_binary + end end describe "decode/2" do @@ -357,8 +395,8 @@ defmodule MsgpackTest do # ==== Helpers ==== - defp assert_encode(input, expected_binary) do - assert Msgpack.encode(input) == {:ok, expected_binary} + defp assert_encode(input, expected_binary, opts \\ []) do + assert Msgpack.encode(input, opts) == {:ok, expected_binary} end defp assert_encode_error(input, expected_reason, opts \\ []) do From 38bc7e245708041a85bdfe83df010c310c558743 Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 20:59:13 +0100 Subject: [PATCH 2/6] Update @doc for encode/2 --- lib/msgpack.ex | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/msgpack.ex b/lib/msgpack.ex index 1898d3a..ddd0e8a 100644 --- a/lib/msgpack.ex +++ b/lib/msgpack.ex @@ -87,6 +87,12 @@ defmodule Msgpack do payload with non-UTF-8 strings, which may be incompatible with other MessagePack decoders. + * `:deterministic` - Controls whether map keys are sorted before encoding. + * `true` (default) - Enables key sorting, which ensures that encoding the + same map always produces the same binary. + * `false` - Disables key sorting, which can provide a performance gain in + cases where determinism is not required. + ## Examples ### Standard Encoding From d7a6e34d271faec525a04c7c387a15670b0dc112 Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 21:10:48 +0100 Subject: [PATCH 3/6] Update README --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index ff314ea..892d3e3 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,36 @@ iex> encoded_stream |> Stream.take(3) |> Enum.to_list() ] ``` +### Map Encoding + +By default, `Msgpack.encode/2` serializes Elixir maps in a **deterministic** +manner. + +It achieves this by sorting the map keys according to Elixir's standard term +ordering before encoding. This ensures that encoding the same map will always +produce the exact same binary output, which is critical for tasks like +generating signatures or comparing hashes. + +```elixir +iex> map1 = %{a: 1, b: 2} +iex> map2 = %{b: 2, a: 1} + +# Both produce the same output because their keys are sorted [:a, :b] +iex> Msgpack.encode!(map1) == Msgpack.encode!(map2) +true +``` + +#### Performance Opt-Out + +Sorting keys has a performance cost (O(N log N)). + +If you are working in a performance-critical context where byte-for-byte +determinism is not required, you can disable it: + +```elixir +Msgpack.encode(map, deterministic: false) +``` + ## Full Documentation For detailed information on all features, options, and functions, see the [full From 01669c0beb8f94514dea332e500ad2b56f47c8d8 Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 21:11:15 +0100 Subject: [PATCH 4/6] Update library version to v2.0.0 --- mix.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index f5699db..9386637 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule MsgpackElixir.MixProject do use Mix.Project - @version "1.1.1" + @version "2.0.0" @source_url "https://github.com/nrednav/msgpack_elixir" def project do From 9a389aed2fbf381490b8678a366a6ee6c97ffa2b Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 21:16:22 +0100 Subject: [PATCH 5/6] Update changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eebde94..f92cbcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v2.0.0] - 2025-08-10 + +### Changed + +- **BREAKING:** Map encoding is now deterministic by default + - `Msgpack.encode/2` sorts map keys according to Elixir's standard term + ordering before serialization + - This guarantees that identical maps produce identical binary output, but it + alters the output compared to previous versions of this library + +### Added + +- Added a `:deterministic` option to `Msgpack.encode/2` + - You can set this to `false` to disable key sorting for higher performance in + contexts where deterministic output is not required. + ## [v1.1.1] - 2025-08-09 ### Fixed From 121f4fb2cad95a5e05c203d2a663a69ed6f495e5 Mon Sep 17 00:00:00 2001 From: Vandern Rodrigues Date: Sun, 10 Aug 2025 21:34:00 +0100 Subject: [PATCH 6/6] Update test for `deterministic: false` to be more robust --- test/msgpack_test.exs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/test/msgpack_test.exs b/test/msgpack_test.exs index 0c6a851..b65cdf7 100644 --- a/test/msgpack_test.exs +++ b/test/msgpack_test.exs @@ -104,15 +104,26 @@ defmodule MsgpackTest do end test "with `deterministic: false` opts out of sorted key encoding" do - map = %{c: 1, a: 2} - - {:ok, sorted_binary} = Msgpack.encode(map) + # Per the Erlang docs: + # https://www.erlang.org/doc/system/maps.html#how-large-maps-are-implemented, + # maps with 32 or fewer elements are internally stored with sorted keys. + # To reliably test the non-deterministic path, a large map (33+ elements) + # must be used, which uses a HAMT implementation and does not iterate in + # key-sorted order. + large_map = + Enum.into(1..33, %{}, fn i -> + key = String.to_atom(<<123 - i>> <> "_#{i}") + {key, i} + end) - assert sorted_binary == <<0x82, 0xA1, "a", 2, 0xA1, "c", 1>> + assert map_size(large_map) == 33 - {:ok, unsorted_binary} = Msgpack.encode(map, deterministic: false) + {:ok, sorted_binary} = Msgpack.encode(large_map) + {:ok, unsorted_binary} = Msgpack.encode(large_map, deterministic: false) - refute unsorted_binary == sorted_binary + refute unsorted_binary == sorted_binary, + "Expected binaries to be different, but both were identical. The + non-deterministic path may be producing sorted output." end end