From aae5db4c403875ade5b60117366add4730b32af7 Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Fri, 13 Feb 2026 18:25:47 +0000 Subject: [PATCH 1/5] lib: add bf_vector Dynamically-sized array of fixed-size elements. Elements are stored inline in a single contiguous allocation. The vector doubles in capacity on overflow. --- .clang-format | 2 +- src/libbpfilter/CMakeLists.txt | 2 + src/libbpfilter/include/bpfilter/vector.h | 167 ++++++++++++++++++ src/libbpfilter/vector.c | 148 ++++++++++++++++ tests/unit/CMakeLists.txt | 3 +- tests/unit/libbpfilter/vector.c | 199 ++++++++++++++++++++++ 6 files changed, 519 insertions(+), 2 deletions(-) create mode 100644 src/libbpfilter/include/bpfilter/vector.h create mode 100644 src/libbpfilter/vector.c create mode 100644 tests/unit/libbpfilter/vector.c diff --git a/.clang-format b/.clang-format index b5cbbd0ad..00d83fdc8 100644 --- a/.clang-format +++ b/.clang-format @@ -59,7 +59,7 @@ EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true -ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach'] +ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach', 'bf_vector_foreach'] IncludeBlocks: Regroup IncludeCategories: # net/if.h needs to be included BEFORE linux/if.h to avoid conflicts diff --git a/src/libbpfilter/CMakeLists.txt b/src/libbpfilter/CMakeLists.txt index be6a5205c..d490bdbe8 100644 --- a/src/libbpfilter/CMakeLists.txt +++ b/src/libbpfilter/CMakeLists.txt @@ -29,6 +29,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/rule.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/runtime.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/set.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/vector.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/verdict.h # Private sources and headers @@ -53,6 +54,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/response.c ${CMAKE_CURRENT_SOURCE_DIR}/rule.c ${CMAKE_CURRENT_SOURCE_DIR}/set.c + ${CMAKE_CURRENT_SOURCE_DIR}/vector.c ${CMAKE_CURRENT_SOURCE_DIR}/verdict.c ${CMAKE_CURRENT_SOURCE_DIR}/version.c diff --git a/src/libbpfilter/include/bpfilter/vector.h b/src/libbpfilter/include/bpfilter/vector.h new file mode 100644 index 000000000..7768a6c2c --- /dev/null +++ b/src/libbpfilter/include/bpfilter/vector.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#pragma once + +#include + +/** + * @file vector.h + * + * Dynamically-sized array of fixed-size elements, backed by a single + * contiguous allocation. Elements are stored inline (not as pointers), + * so the caller decides the element type and size at initialization. + */ + +struct bf_vector; + +#define _free_bf_vector_ __attribute__((cleanup(bf_vector_free))) +#define _clean_bf_vector_ __attribute__((cleanup(bf_vector_clean))) + +/** + * @struct bf_vector + * + * @var bf_vector::data + * Backing buffer. NULL when the vector is empty and has never been allocated. + * @var bf_vector::len + * Number of elements currently stored. + * @var bf_vector::cap + * Number of elements that can be stored before a reallocation is needed. + * @var bf_vector::elem_size + * Size of a single element in bytes. + */ +struct bf_vector +{ + void *data; + size_t len; + size_t cap; + size_t elem_size; +}; + +/** + * @brief Returns a zero-initialized @ref bf_vector for elements of size @p esz. + * + * @param esz Size of a single element in bytes. + * @return A zero-initialized @ref bf_vector. + */ +#define bf_vector_default(esz) \ + (struct bf_vector) \ + { \ + .data = NULL, .len = 0, .cap = 0, .elem_size = (esz) \ + } + +/** + * @brief Iterate over every element of a @ref bf_vector. + * + * @p elem is declared as a pointer to the element type and will point to each + * element in turn. Safe to break out of but not to remove elements during + * iteration. + * + * @param vec Pointer to the vector. Must be non-NULL. + * @param elem Name of the iteration variable. Will be declared as a + * `void *` and cast by the caller. + */ +#define bf_vector_foreach(vec, elem) \ + for (void *(elem) = (vec)->data; \ + (elem) && (elem) < (void *)((char *)(vec)->data + \ + (vec)->len * (vec)->elem_size); \ + (elem) = (char *)(elem) + (vec)->elem_size) + +/** + * @brief Allocate and initialise a new vector on the heap. + * + * @param vec Pointer to the vector pointer. Must be non-NULL. On failure, + * `*vec` is unchanged. + * @param elem_size Size of a single element in bytes. Must be > 0. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_vector_new(struct bf_vector **vec, size_t elem_size); + +/** + * @brief Free a heap-allocated vector. + * + * @param vec Pointer to the vector pointer. Must be non-NULL. + */ +void bf_vector_free(struct bf_vector **vec); + +/** + * @brief Clean up a vector, freeing its backing buffer. + * + * After this call the vector can be reused (e.g. by re-assigning via + * @ref bf_vector_default) or discarded. + * + * @param vec Pointer to the vector. Must be non-NULL. + */ +void bf_vector_clean(struct bf_vector *vec); + +/** + * @brief Get the number of elements in the vector. + * + * @param vec Initialised vector. Must be non-NULL. + * @return Number of elements stored. + */ +size_t bf_vector_len(const struct bf_vector *vec); + +/** + * @brief Get the current capacity. + * + * @param vec Initialised vector. Must be non-NULL. + * @return Number of elements that fit without reallocation. + */ +size_t bf_vector_cap(const struct bf_vector *vec); + +/** + * @brief Get a pointer to the n-th element. + * + * @param vec Initialised vector. Must be non-NULL. + * @param index Index of the element. Must be < @ref bf_vector_len. + * @return Pointer to the element, or NULL if @p index is out of bounds. + */ +void *bf_vector_get(const struct bf_vector *vec, size_t index); + +/** + * @brief Append an element to the end of the vector, growing it if necessary. + * + * The element is copied from @p elem into the vector's backing buffer. + * + * @param vec Initialised vector. Must be non-NULL. + * @param elem Pointer to the element to copy in. Must be non-NULL and point + * to at least @c vec->elem_size bytes. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_vector_add(struct bf_vector *vec, const void *elem); + +/** + * @brief Resize the vector's backing buffer to hold exactly @p new_cap elements. + * + * @p new_cap must be >= the current length. If @p new_cap is 0 the backing + * buffer is freed. + * + * @param vec Initialised vector. Must be non-NULL. + * @param new_cap New capacity (in number of elements). + * @return 0 on success, or a negative errno value on failure. + */ +int bf_vector_resize(struct bf_vector *vec, size_t new_cap); + +/** + * @brief Get a pointer to the backing buffer. + * + * @param vec Initialised vector. Must be non-NULL. + * @return Pointer to the first byte, or NULL if the vector has never been + * allocated. + */ +void *bf_vector_data(const struct bf_vector *vec); + +/** + * @brief Set the number of live elements. + * + * @p len must be <= the current capacity. No initialization of the new + * elements is performed. + * + * @param vec Initialised vector. Must be non-NULL. + * @param len New element count. + * @return 0 on success, or -EINVAL if @p len exceeds the capacity. + */ +int bf_vector_set_len(struct bf_vector *vec, size_t len); diff --git a/src/libbpfilter/vector.c b/src/libbpfilter/vector.c new file mode 100644 index 000000000..7328652d8 --- /dev/null +++ b/src/libbpfilter/vector.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#include "bpfilter/vector.h" + +#include +#include +#include +#include + +#include "bpfilter/helper.h" + +#define _BF_VECTOR_INIT_CAP 8 + +int bf_vector_new(struct bf_vector **vec, size_t elem_size) +{ + _free_bf_vector_ struct bf_vector *_vec = NULL; + + assert(vec); + assert(elem_size); + + _vec = calloc(1, sizeof(*_vec)); + if (!_vec) + return -ENOMEM; + + _vec->elem_size = elem_size; + + *vec = TAKE_PTR(_vec); + + return 0; +} + +void bf_vector_free(struct bf_vector **vec) +{ + assert(vec); + + if (!*vec) + return; + + bf_vector_clean(*vec); + free(*vec); + *vec = NULL; +} + +void bf_vector_clean(struct bf_vector *vec) +{ + assert(vec); + + freep((void *)&vec->data); + vec->len = 0; + vec->cap = 0; +} + +size_t bf_vector_len(const struct bf_vector *vec) +{ + assert(vec); + return vec->len; +} + +size_t bf_vector_cap(const struct bf_vector *vec) +{ + assert(vec); + return vec->cap; +} + +void *bf_vector_get(const struct bf_vector *vec, size_t index) +{ + assert(vec); + + if (index >= vec->len) + return NULL; + + return (char *)vec->data + (index * vec->elem_size); +} + +int bf_vector_add(struct bf_vector *vec, const void *elem) +{ + int r; + + assert(vec); + assert(elem); + + if (vec->len == vec->cap) { + if (vec->cap > SIZE_MAX / 2) + return -ENOMEM; + + size_t new_cap = vec->cap ? vec->cap * 2 : _BF_VECTOR_INIT_CAP; + + r = bf_vector_resize(vec, new_cap); + if (r) + return r; + } + + memcpy((char *)vec->data + (vec->len * vec->elem_size), elem, + vec->elem_size); + ++vec->len; + + return 0; +} + +int bf_vector_resize(struct bf_vector *vec, size_t new_cap) +{ + int r; + + assert(vec); + + if (new_cap < vec->len) + return -EINVAL; + + if (new_cap == 0) { + freep((void *)&vec->data); + vec->cap = 0; + return 0; + } + + size_t alloc_size; + + if (__builtin_mul_overflow(new_cap, vec->elem_size, &alloc_size)) + return -ENOMEM; + + r = bf_realloc(&vec->data, alloc_size); + if (r) + return r; + + vec->cap = new_cap; + + return 0; +} + +void *bf_vector_data(const struct bf_vector *vec) +{ + assert(vec); + return vec->data; +} + +int bf_vector_set_len(struct bf_vector *vec, size_t len) +{ + assert(vec); + + if (len > vec->cap) + return -EINVAL; + + vec->len = len; + + return 0; +} diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 3bb77535f..6c6c2b212 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -92,5 +92,6 @@ bf_add_c_test(unit libbpfilter/request.c) bf_add_c_test(unit libbpfilter/response.c) bf_add_c_test(unit libbpfilter/rule.c) bf_add_c_test(unit libbpfilter/set.c) +bf_add_c_test(unit libbpfilter/vector.c) bf_add_c_test(unit libbpfilter/verdict.c) -bf_add_c_test(unit libbpfilter/version.c) \ No newline at end of file +bf_add_c_test(unit libbpfilter/version.c) diff --git a/tests/unit/libbpfilter/vector.c b/tests/unit/libbpfilter/vector.c new file mode 100644 index 000000000..79ddd23a3 --- /dev/null +++ b/tests/unit/libbpfilter/vector.c @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#include +#include + +#include "test.h" + +static void new_and_free(void **state) +{ + (void)state; + + { + // Allocate and free, empty vector + struct bf_vector *vec; + + assert_ok(bf_vector_new(&vec, sizeof(int))); + assert_int_equal(bf_vector_len(vec), 0); + assert_int_equal(bf_vector_cap(vec), 0); + bf_vector_free(&vec); + assert_null(vec); + } + + { + // Auto-free via cleanup attribute + _free_bf_vector_ struct bf_vector *vec = NULL; + + assert_ok(bf_vector_new(&vec, sizeof(int))); + assert_int_equal(bf_vector_len(vec), 0); + } +} + +static void init_and_clean(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + + (void)state; + + assert_int_equal(bf_vector_len(&vec), 0); + assert_int_equal(bf_vector_cap(&vec), 0); + assert_null(vec.data); + + int val = 42; + assert_ok(bf_vector_add(&vec, &val)); + assert_int_equal(bf_vector_len(&vec), 1); + bf_vector_clean(&vec); + assert_int_equal(bf_vector_len(&vec), 0); + assert_int_equal(bf_vector_cap(&vec), 0); + assert_null(vec.data); +} + +static void default_macro(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + + (void)state; + + assert_int_equal(vec.len, 0); + assert_int_equal(vec.cap, 0); + assert_int_equal(vec.elem_size, sizeof(int)); + assert_null(vec.data); +} + +static void add_and_get(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + + (void)state; + + for (int i = 0; i < 100; ++i) + assert_ok(bf_vector_add(&vec, &i)); + + assert_int_equal(bf_vector_len(&vec), 100); + assert_int_gte(bf_vector_cap(&vec), 100); + + for (int i = 0; i < 100; ++i) { + int *p = bf_vector_get(&vec, i); + assert_non_null(p); + assert_int_equal(*p, i); + } + + // Out of bounds returns NULL + assert_null(bf_vector_get(&vec, 100)); + assert_null(bf_vector_get(&vec, 9999)); +} + +static void foreach(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + int expected = 0; + + (void)state; + + for (int i = 0; i < 50; ++i) + assert_ok(bf_vector_add(&vec, &i)); + + bf_vector_foreach (&vec, elem) { + assert_int_equal(*(int *)elem, expected); + ++expected; + } + + assert_int_equal(expected, 50); +} + +static void foreach_empty(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + int count = 0; + + (void)state; + + bf_vector_foreach (&vec, elem) { + (void)elem; + ++count; + } + + assert_int_equal(count, 0); +} + +static void resize(void **state) +{ + _clean_bf_vector_ struct bf_vector vec = bf_vector_default(sizeof(int)); + + (void)state; + + // Resize up from empty + assert_ok(bf_vector_resize(&vec, 32)); + assert_int_equal(bf_vector_cap(&vec), 32); + assert_int_equal(bf_vector_len(&vec), 0); + + // Add some elements + for (int i = 0; i < 10; ++i) + assert_ok(bf_vector_add(&vec, &i)); + + assert_int_equal(bf_vector_len(&vec), 10); + + // Shrink to fit + assert_ok(bf_vector_resize(&vec, 10)); + assert_int_equal(bf_vector_cap(&vec), 10); + assert_int_equal(bf_vector_len(&vec), 10); + + // Data is preserved after resize + for (int i = 0; i < 10; ++i) + assert_int_equal(*(int *)bf_vector_get(&vec, i), i); + + // Can't shrink below current length + assert_err(bf_vector_resize(&vec, 5)); + assert_int_equal(bf_vector_cap(&vec), 10); + + // Resize to 0 when empty + bf_vector_clean(&vec); + vec = bf_vector_default(sizeof(int)); + assert_ok(bf_vector_resize(&vec, 0)); + assert_int_equal(bf_vector_cap(&vec), 0); + assert_null(vec.data); +} + +static void large_elements(void **state) +{ + struct big + { + char buf[256]; + }; + + _clean_bf_vector_ struct bf_vector vec = + bf_vector_default(sizeof(struct big)); + + (void)state; + + for (int i = 0; i < 20; ++i) { + struct big b; + memset(b.buf, i, sizeof(b.buf)); + assert_ok(bf_vector_add(&vec, &b)); + } + + assert_int_equal(bf_vector_len(&vec), 20); + + for (int i = 0; i < 20; ++i) { + struct big *p = bf_vector_get(&vec, i); + assert_non_null(p); + + for (size_t j = 0; j < sizeof(p->buf); ++j) + assert_int_equal((unsigned char)p->buf[j], (unsigned char)i); + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(new_and_free), cmocka_unit_test(init_and_clean), + cmocka_unit_test(default_macro), cmocka_unit_test(add_and_get), + cmocka_unit_test(foreach), cmocka_unit_test(foreach_empty), + cmocka_unit_test(resize), cmocka_unit_test(large_elements), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} From 48e020970db25aa65f4373ef65c687ae4d9e4dbd Mon Sep 17 00:00:00 2001 From: yaakov-stein Date: Tue, 10 Feb 2026 14:10:20 -0800 Subject: [PATCH 2/5] lib: helper: add FNV-1a hash function A simple data hashing function. --- src/libbpfilter/helper.c | 14 +++++++++++++ src/libbpfilter/include/bpfilter/helper.h | 20 +++++++++++++++++++ tests/unit/libbpfilter/helper.c | 24 +++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/src/libbpfilter/helper.c b/src/libbpfilter/helper.c index 64ae9d230..cae1adf13 100644 --- a/src/libbpfilter/helper.c +++ b/src/libbpfilter/helper.c @@ -195,3 +195,17 @@ char *bf_trim(char *str) return bf_rtrim(bf_ltrim(str)); } + +uint64_t bf_fnv1a(const void *data, size_t len, uint64_t hash) +{ + assert(data); + + const uint8_t *bytes = data; + + for (size_t i = 0; i < len; ++i) { + hash ^= bytes[i]; + hash *= BF_FNV1A_PRIME; + } + + return hash; +} diff --git a/src/libbpfilter/include/bpfilter/helper.h b/src/libbpfilter/include/bpfilter/helper.h index fa2ef7de5..ca0c315b0 100644 --- a/src/libbpfilter/include/bpfilter/helper.h +++ b/src/libbpfilter/include/bpfilter/helper.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -394,3 +395,22 @@ int bf_read_file(const char *path, void **buf, size_t *len); * @return 0 on success, negative errno value on error. */ int bf_write_file(const char *path, const void *buf, size_t len); + +/// FNV-1a 64-bit offset basis. +/// @see https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function +#define BF_FNV1A_INIT 0xcbf29ce484222325ULL +/// FNV-1a 64-bit prime. +#define BF_FNV1A_PRIME 0x100000001b3ULL + +/** + * @brief Compute a FNV-1a 64-bit hash. + * + * Pass @ref BF_FNV1A_INIT as @p hash for the initial call. To hash + * multiple fields, chain calls by passing the previous return value. + * + * @param data Data to hash. Can't be NULL. + * @param len Number of bytes to hash. + * @param hash Initial or chained hash value. + * @return Updated hash value. + */ +uint64_t bf_fnv1a(const void *data, size_t len, uint64_t hash); diff --git a/tests/unit/libbpfilter/helper.c b/tests/unit/libbpfilter/helper.c index 8897e9269..6d5720d69 100644 --- a/tests/unit/libbpfilter/helper.c +++ b/tests/unit/libbpfilter/helper.c @@ -356,12 +356,36 @@ static void overwrite_existing_file(void **state) assert_memory_equal(read_buf, second_data, strlen(second_data)); } +static void fnv1a_hash(void **state) +{ + uint32_t val_a = 42; + uint32_t val_b = 99; + uint64_t hash_a; + uint64_t hash_b; + uint64_t hash_ab; + uint64_t hash_ba; + + (void)state; + + hash_a = bf_fnv1a(&val_a, sizeof(val_a), BF_FNV1A_INIT); + hash_b = bf_fnv1a(&val_b, sizeof(val_b), BF_FNV1A_INIT); + + assert_int_equal(hash_a, bf_fnv1a(&val_a, sizeof(val_a), BF_FNV1A_INIT)); + assert_int_not_equal(hash_a, hash_b); + + // Chaining: order matters for sequential hashing + hash_ab = bf_fnv1a(&val_b, sizeof(val_b), hash_a); + hash_ba = bf_fnv1a(&val_a, sizeof(val_a), hash_b); + assert_int_not_equal(hash_ab, hash_ba); +} + int main(void) { const struct CMUnitTest tests[] = { cmocka_unit_test(close_fd), cmocka_unit_test(string_copy), cmocka_unit_test(realloc_mem), + cmocka_unit_test(fnv1a_hash), cmocka_unit_test(trim_left), cmocka_unit_test(trim_right), cmocka_unit_test(trim_both), From 55257e00e7698d7dd83a184629db3a7d058c74ff Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Tue, 17 Feb 2026 01:31:37 +0000 Subject: [PATCH 3/5] lib: add bf_set_foreach and bf_set_size Callers previously accessed the inner bf_list directly. Provide proper iteration and size accessors so the set implementation can be swapped without touching every call site. --- .clang-format | 2 +- src/bfcli/print.c | 43 +++++++++++++------------- src/bpfilter/cgen/prog/map.c | 2 +- src/bpfilter/cgen/program.c | 6 ++-- src/libbpfilter/include/bpfilter/set.h | 31 +++++++++++++++++++ src/libbpfilter/set.c | 43 ++++++++++++++++++++------ tests/harness/test.c | 2 +- tests/unit/libbpfilter/set.c | 16 +++++----- 8 files changed, 100 insertions(+), 45 deletions(-) diff --git a/.clang-format b/.clang-format index 00d83fdc8..af6437447 100644 --- a/.clang-format +++ b/.clang-format @@ -59,7 +59,7 @@ EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true -ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach', 'bf_vector_foreach'] +ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach', 'bf_set_foreach', 'bf_vector_foreach'] IncludeBlocks: Regroup IncludeCategories: # net/if.h needs to be included BEFORE linux/if.h to avoid conflicts diff --git a/src/bfcli/print.c b/src/bfcli/print.c index 9e07ed52b..9a9d2ce76 100644 --- a/src/bfcli/print.c +++ b/src/bfcli/print.c @@ -148,31 +148,31 @@ void bfc_chain_dump(struct bf_chain *chain, struct bf_hookopts *hookopts, bf_list_foreach (&chain->sets, set_node) { struct bf_set *set = bf_list_node_get_data(set_node); - if (!set->name) + if (!bf_set_get_name(set)) continue; - (void)fprintf(stdout, " set %s (", set->name); - for (size_t i = 0; i < set->n_comps; ++i) { - (void)fprintf(stdout, "%s", bf_matcher_type_to_str(set->key[i])); + (void)fprintf(stdout, " set %s (", bf_set_get_name(set)); + for (size_t i = 0; i < bf_set_get_n_comps(set); ++i) { + (void)fprintf(stdout, "%s", + bf_matcher_type_to_str(bf_set_get_key_comp(set, i))); - if (i != set->n_comps - 1) + if (i != bf_set_get_n_comps(set) - 1) (void)fprintf(stdout, ", "); } (void)fprintf(stdout, ") in {\n"); - bf_list_foreach (&set->elems, elem_node) { + bf_set_foreach (set, payload) { uint32_t payload_idx = 0; - void *payload = bf_list_node_get_data(elem_node); (void)fprintf(stdout, " "); - for (size_t i = 0; i < set->n_comps; ++i) { + for (size_t i = 0; i < bf_set_get_n_comps(set); ++i) { const struct bf_matcher_meta *meta = - bf_matcher_get_meta(set->key[i]); + bf_matcher_get_meta(bf_set_get_key_comp(set, i)); meta->ops[BF_MATCHER_IN].print(payload + payload_idx); payload_idx += meta->ops[BF_MATCHER_IN].ref_payload_size; - if (i != set->n_comps - 1) + if (i != bf_set_get_n_comps(set) - 1) (void)fprintf(stdout, ", "); } (void)fprintf(stdout, "\n"); @@ -196,34 +196,35 @@ void bfc_chain_dump(struct bf_chain *chain, struct bf_hookopts *hookopts, bf_chain_get_set_for_matcher(chain, matcher); (void)fprintf(stdout, " ("); - for (size_t i = 0; i < set->n_comps; ++i) { - (void)fprintf(stdout, "%s", - bf_matcher_type_to_str(set->key[i])); + for (size_t i = 0; i < bf_set_get_n_comps(set); ++i) { + (void)fprintf( + stdout, "%s", + bf_matcher_type_to_str(bf_set_get_key_comp(set, i))); - if (i != set->n_comps - 1) + if (i != bf_set_get_n_comps(set) - 1) (void)fprintf(stdout, ", "); } - if (set->name) { - (void)fprintf(stdout, ") in %s", set->name); + if (bf_set_get_name(set)) { + (void)fprintf(stdout, ") in %s", bf_set_get_name(set)); } else { (void)fprintf(stdout, ") in {\n"); - bf_list_foreach (&set->elems, elem_node) { + bf_set_foreach (set, payload) { uint32_t payload_idx = 0; - void *payload = bf_list_node_get_data(elem_node); (void)fprintf(stdout, " "); - for (size_t i = 0; i < set->n_comps; ++i) { + for (size_t i = 0; i < bf_set_get_n_comps(set); ++i) { const struct bf_matcher_meta *meta = - bf_matcher_get_meta(set->key[i]); + bf_matcher_get_meta( + bf_set_get_key_comp(set, i)); meta->ops[BF_MATCHER_IN].print(payload + payload_idx); payload_idx += meta->ops[BF_MATCHER_IN].ref_payload_size; - if (i != set->n_comps - 1) + if (i != bf_set_get_n_comps(set) - 1) (void)fprintf(stdout, ", "); } (void)fprintf(stdout, "\n"); diff --git a/src/bpfilter/cgen/prog/map.c b/src/bpfilter/cgen/prog/map.c index f52aa3d89..405e093cc 100644 --- a/src/bpfilter/cgen/prog/map.c +++ b/src/bpfilter/cgen/prog/map.c @@ -221,7 +221,7 @@ int bf_map_new_from_set(struct bf_map **map, const char *name, return _bf_map_new(map, name, BF_MAP_TYPE_SET, set->use_trie ? BF_BPF_MAP_TYPE_LPM_TRIE : BF_BPF_MAP_TYPE_HASH, - set->elem_size, 1, bf_list_size(&set->elems)); + set->elem_size, 1, bf_set_size(set)); } int bf_map_new_from_pack(struct bf_map **map, int dir_fd, bf_rpack_node_t node) diff --git a/src/bpfilter/cgen/program.c b/src/bpfilter/cgen/program.c index 9f98f64e0..acdbf7110 100644 --- a/src/bpfilter/cgen/program.c +++ b/src/bpfilter/cgen/program.c @@ -697,7 +697,7 @@ static int _bf_program_load_sets_maps(struct bf_program *new_prog) _free_bf_map_ struct bf_map *map = NULL; _cleanup_free_ uint8_t *values = NULL; _cleanup_free_ uint8_t *keys = NULL; - size_t nelems = bf_list_size(&set->elems); + size_t nelems = bf_set_size(set); size_t idx = 0; if (!nelems) { @@ -721,9 +721,7 @@ static int _bf_program_load_sets_maps(struct bf_program *new_prog) if (!keys) return bf_err_r(errno, "failed to allocate map keys"); - bf_list_foreach (&set->elems, elem_node) { - void *elem = bf_list_node_get_data(elem_node); - + bf_set_foreach (set, elem) { memcpy(keys + (idx * set->elem_size), elem, set->elem_size); values[idx] = 1; ++idx; diff --git a/src/libbpfilter/include/bpfilter/set.h b/src/libbpfilter/include/bpfilter/set.h index dbc57daa9..24dcc0c45 100644 --- a/src/libbpfilter/include/bpfilter/set.h +++ b/src/libbpfilter/include/bpfilter/set.h @@ -65,6 +65,24 @@ struct bf_set bool use_trie; }; +/** + * @brief Iterate over the elements of a set. + * + * @param set Pointer to the set to iterate over. Must be non-NULL. + * @param elem_var Name of the variable containing the current element data + * (as `void *`). This variable will be created automatically. + */ +#define bf_set_foreach(set, elem_var) \ + for (bf_list_node *_bf_set_node = (set)->elems.head, \ + *_bf_set_next = _bf_set_node ? _bf_set_node->next : \ + NULL, \ + *_bf_set_brk = NULL; \ + _bf_set_node; _bf_set_node = _bf_set_brk ? NULL : _bf_set_next, \ + _bf_set_next = _bf_set_node ? _bf_set_node->next : NULL) \ + for (void *(elem_var) = (_bf_set_brk = (void *)1, \ + bf_list_node_get_data(_bf_set_node)); \ + _bf_set_brk; _bf_set_brk = NULL) + /** * @brief Allocate and initialise a new set. * @@ -126,6 +144,19 @@ void bf_set_dump(const struct bf_set *set, prefix_t *prefix); */ bool bf_set_is_empty(const struct bf_set *set); +/** + * @brief Get the number of elements in a set. + * + * @param set Initialised set. Can't be NULL. + * @return Number of elements in the set. + */ +size_t bf_set_size(const struct bf_set *set); + +const char *bf_set_get_name(const struct bf_set *set); +size_t bf_set_get_n_comps(const struct bf_set *set); +enum bf_matcher_type bf_set_get_key_comp(const struct bf_set *set, + size_t index); + int bf_set_add_elem(struct bf_set *set, const void *elem); /** diff --git a/src/libbpfilter/set.c b/src/libbpfilter/set.c index d576c6d68..5a99098f0 100644 --- a/src/libbpfilter/set.c +++ b/src/libbpfilter/set.c @@ -346,8 +346,8 @@ int bf_set_pack(const struct bf_set *set, bf_wpack_t *pack) bf_wpack_close_array(pack); bf_wpack_open_array(pack, "elements"); - bf_list_foreach (&set->elems, elem_node) - bf_wpack_bin(pack, bf_list_node_get_data(elem_node), set->elem_size); + bf_set_foreach (set, elem) + bf_wpack_bin(pack, elem, set->elem_size); bf_wpack_close_array(pack); return bf_wpack_is_valid(pack) ? 0 : -EINVAL; @@ -374,7 +374,7 @@ void bf_set_dump(const struct bf_set *set, prefix_t *prefix) DUMP(prefix, "elem_size: %lu", set->elem_size); DUMP(bf_dump_prefix_last(prefix), "elems: bf_list[%lu]", - bf_list_size(&set->elems)); + bf_set_size(set)); bf_dump_prefix_push(prefix); bf_list_foreach (&set->elems, elem_node) { @@ -421,6 +421,35 @@ bool bf_set_is_empty(const struct bf_set *set) return bf_list_is_empty(&set->elems); } +size_t bf_set_size(const struct bf_set *set) +{ + assert(set); + + return bf_list_size(&set->elems); +} + +const char *bf_set_get_name(const struct bf_set *set) +{ + assert(set); + + return set->name; +} + +size_t bf_set_get_n_comps(const struct bf_set *set) +{ + assert(set); + + return set->n_comps; +} + +enum bf_matcher_type bf_set_get_key_comp(const struct bf_set *set, size_t index) +{ + assert(set); + assert(index < set->n_comps); + + return set->key[index]; +} + /** * @brief Check if two sets have the same key format. * @@ -466,9 +495,7 @@ int bf_set_add_many(struct bf_set *dest, struct bf_set **to_add) void *elem_to_add = bf_list_node_get_data(elem_node); bool found = false; - bf_list_foreach (&dest->elems, dest_elem_node) { - const void *dest_elem = bf_list_node_get_data(dest_elem_node); - + bf_set_foreach (dest, dest_elem) { if (memcmp(dest_elem, elem_to_add, dest->elem_size) == 0) { found = true; break; @@ -503,9 +530,7 @@ int bf_set_remove_many(struct bf_set *dest, struct bf_set **to_remove) return r; // @todo This has O(n * m) complexity. Could be O(m) if we used hashsets. - bf_list_foreach (&(*to_remove)->elems, elem_node) { - const void *elem_to_remove = bf_list_node_get_data(elem_node); - + bf_set_foreach (*to_remove, elem_to_remove) { bf_list_foreach (&dest->elems, dest_elem_node) { const void *dest_elem = bf_list_node_get_data(dest_elem_node); diff --git a/tests/harness/test.c b/tests/harness/test.c index af438fa66..b0929619f 100644 --- a/tests/harness/test.c +++ b/tests/harness/test.c @@ -232,7 +232,7 @@ bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs) { const struct bf_list_node *n0, *n1; - if (bf_list_size(&lhs->elems) != bf_list_size(&rhs->elems)) + if (bf_set_size(lhs) != bf_set_size(rhs)) return false; if (lhs->elem_size != rhs->elem_size) diff --git a/tests/unit/libbpfilter/set.c b/tests/unit/libbpfilter/set.c index d582b01ce..3fadfa132 100644 --- a/tests/unit/libbpfilter/set.c +++ b/tests/unit/libbpfilter/set.c @@ -102,7 +102,7 @@ static void add_elem(void **state) assert_ok(bf_set_new(&set, "test", key, ARRAY_SIZE(key))); assert_ok(bf_set_add_elem(set, &elem)); - assert_int_equal(bf_list_size(&set->elems), 1); + assert_int_equal(bf_set_size(set), 1); } static void add_multiple_elems(void **state) @@ -122,7 +122,7 @@ static void add_multiple_elems(void **state) assert_ok(bf_set_add_elem(set, elem)); } - assert_int_equal(bf_list_size(&set->elems), 5); + assert_int_equal(bf_set_size(set), 5); } static void pack_and_unpack(void **state) @@ -181,7 +181,7 @@ static void pack_and_unpack_empty(void **state) assert_ok(bf_set_new_from_pack(&destination, node)); assert_true(bft_set_eq(source, destination)); - assert_int_equal(bf_list_size(&destination->elems), 0); + assert_int_equal(bf_set_size(destination), 0); } static void dump(void **state) @@ -224,7 +224,7 @@ static void new_from_raw(void **state) assert_string_equal(set->name, "test_raw"); assert_int_equal(set->n_comps, 1); assert_int_equal(set->key[0], BF_MATCHER_IP4_SADDR); - assert_int_equal(bf_list_size(&set->elems), 2); + assert_int_equal(bf_set_size(set), 2); } static void new_from_raw_multiple_keys(void **state) @@ -240,7 +240,7 @@ static void new_from_raw_multiple_keys(void **state) assert_int_equal(set->n_comps, 2); assert_int_equal(set->key[0], BF_MATCHER_IP4_DADDR); assert_int_equal(set->key[1], BF_MATCHER_TCP_SPORT); - assert_int_equal(bf_list_size(&set->elems), 2); + assert_int_equal(bf_set_size(set), 2); } static void new_from_raw_invalid(void **state) @@ -280,7 +280,7 @@ static void add_many_basic(void **state) assert_ok(bf_set_add_many(dest, &to_add)); - assert_int_equal(bf_list_size(&dest->elems), 3); + assert_int_equal(bf_set_size(dest), 3); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem2); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 2), elem3); @@ -347,7 +347,7 @@ static void remove_many_basic(void **state) assert_ok(bf_set_remove_many(dest, &to_remove)); - assert_int_equal(bf_list_size(&dest->elems), 2); + assert_int_equal(bf_set_size(dest), 2); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem3); assert_null(to_remove); @@ -377,7 +377,7 @@ static void remove_many_disjoint_sets(void **state) assert_ok(bf_set_add_elem(to_remove, &elem4)); assert_ok(bf_set_remove_many(dest, &to_remove)); - assert_int_equal(bf_list_size(&dest->elems), 2); + assert_int_equal(bf_set_size(dest), 2); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 0), elem1); assert_int_equal(*(uint32_t *)bf_list_get_at(&dest->elems, 1), elem2); assert_null(to_remove); From 29490bc29d379e0296b02864bcbf04a39c33682c Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Tue, 17 Feb 2026 01:33:24 +0000 Subject: [PATCH 4/5] lib: add bf_hashset Open-addressing hashset with linear probing backed by bf_vector. Elements are fixed-size blobs hashed with FNV-1a and compared with memcmp. Removed elements leave tombstones; the table grows at 70% load but never shrinks. This provides an O(1) average-case alternative to the linked-list-based bf_set for large element counts. --- .clang-format | 2 +- src/libbpfilter/CMakeLists.txt | 2 + src/libbpfilter/hashset.c | 729 +++++++++++++++++++++ src/libbpfilter/include/bpfilter/hashset.h | 250 +++++++ tests/harness/fake.c | 26 + tests/harness/fake.h | 2 + tests/harness/test.c | 24 + tests/harness/test.h | 2 + tests/unit/CMakeLists.txt | 1 + tests/unit/libbpfilter/hashset.c | 665 +++++++++++++++++++ 10 files changed, 1702 insertions(+), 1 deletion(-) create mode 100644 src/libbpfilter/hashset.c create mode 100644 src/libbpfilter/include/bpfilter/hashset.h create mode 100644 tests/unit/libbpfilter/hashset.c diff --git a/.clang-format b/.clang-format index af6437447..e5c1b1b8e 100644 --- a/.clang-format +++ b/.clang-format @@ -59,7 +59,7 @@ EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: Always ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true -ForEachMacros: ['bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach', 'bf_set_foreach', 'bf_vector_foreach'] +ForEachMacros: ['bf_hashset_foreach', 'bf_list_foreach', 'bf_list_foreach_rev', 'bf_rpack_array_foreach', 'bf_set_foreach', 'bf_vector_foreach'] IncludeBlocks: Regroup IncludeCategories: # net/if.h needs to be included BEFORE linux/if.h to avoid conflicts diff --git a/src/libbpfilter/CMakeLists.txt b/src/libbpfilter/CMakeLists.txt index d490bdbe8..64d6a6dc1 100644 --- a/src/libbpfilter/CMakeLists.txt +++ b/src/libbpfilter/CMakeLists.txt @@ -15,6 +15,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/dump.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/dynbuf.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/flavor.h + ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/hashset.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/helper.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/hook.h ${CMAKE_CURRENT_SOURCE_DIR}/include/bpfilter/if.h @@ -41,6 +42,7 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/dump.c ${CMAKE_CURRENT_SOURCE_DIR}/dynbuf.c ${CMAKE_CURRENT_SOURCE_DIR}/flavor.c + ${CMAKE_CURRENT_SOURCE_DIR}/hashset.c ${CMAKE_CURRENT_SOURCE_DIR}/helper.c ${CMAKE_CURRENT_SOURCE_DIR}/hook.c ${CMAKE_CURRENT_SOURCE_DIR}/if.c diff --git a/src/libbpfilter/hashset.c b/src/libbpfilter/hashset.c new file mode 100644 index 000000000..822d59d90 --- /dev/null +++ b/src/libbpfilter/hashset.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#include "bpfilter/hashset.h" + +#include +#include +#include +#include + +#include "bpfilter/dump.h" +#include "bpfilter/helper.h" +#include "bpfilter/logger.h" +#include "bpfilter/pack.h" +#include "bpfilter/vector.h" + +enum _bf_slot_status +{ + _BF_SLOT_EMPTY = 0, + _BF_SLOT_OCCUPIED, + _BF_SLOT_TOMBSTONE, +}; + +static_assert(_BF_SLOT_OCCUPIED == _BF_HASHSET_SLOT_OCCUPIED, + "header and internal slot-occupied values must match"); + +static_assert(_BF_MATCHER_TYPE_MAX < 8 * sizeof(uint32_t), + "matcher type bitmask won't fit in 32 bits"); + +#define _BF_HASHSET_INIT_CAP 16 +#define _BF_HASHSET_MAX_LOAD_NUM 7 +#define _BF_HASHSET_MAX_LOAD_DEN 10 + +static size_t _bf_slot_size(size_t elem_size) +{ + return sizeof(uint8_t) + elem_size; +} + +static size_t _bf_n_slots(const struct bf_hashset *set) +{ + return bf_vector_len(&set->slots); +} + +static uint8_t *_bf_slot_at(const struct bf_hashset *set, size_t index) +{ + return bf_vector_get(&set->slots, index); +} + +static uint8_t _bf_slot_status(const struct bf_hashset *set, size_t index) +{ + return *_bf_slot_at(set, index); +} + +static void *_bf_slot_data(const struct bf_hashset *set, size_t index) +{ + return _bf_slot_at(set, index) + sizeof(uint8_t); +} + +static void _bf_slot_set(struct bf_hashset *set, size_t index, uint8_t status, + const void *elem) +{ + uint8_t *slot = _bf_slot_at(set, index); + + *slot = status; + if (elem) + memcpy(slot + sizeof(uint8_t), elem, set->elem_size); +} + +static size_t _bf_hashset_hash(const struct bf_hashset *set, const void *elem) +{ + return bf_fnv1a(elem, set->elem_size, BF_FNV1A_INIT) % _bf_n_slots(set); +} + +/** + * @brief Insert an element without duplicate or load-factor checks. + * + * The caller must guarantee that @p elem is not already present and that the + * table has room. Used during rehash where both invariants hold by + * construction. + */ +static void _bf_hashset_insert_unchecked(struct bf_hashset *set, + const void *elem) +{ + size_t n = _bf_n_slots(set); + size_t idx = _bf_hashset_hash(set, elem); + + while (_bf_slot_status(set, idx) == _BF_SLOT_OCCUPIED) + idx = (idx + 1) % n; + + _bf_slot_set(set, idx, _BF_SLOT_OCCUPIED, elem); + ++set->len; + ++set->n_used; +} + +static int _bf_hashset_grow(struct bf_hashset *set) +{ + size_t old_n_slots = _bf_n_slots(set); + size_t slot_size = _bf_slot_size(set->elem_size); + struct bf_vector old_slots; + size_t new_n_slots; + int r; + + if (old_n_slots > SIZE_MAX / 2) + return -ENOMEM; + + new_n_slots = old_n_slots ? old_n_slots * 2 : _BF_HASHSET_INIT_CAP; + + old_slots = TAKE_STRUCT(set->slots); + + set->slots = bf_vector_default(slot_size); + + r = bf_vector_resize(&set->slots, new_n_slots); + if (r) { + bf_vector_clean(&set->slots); + set->slots = old_slots; + return r; + } + + memset(bf_vector_data(&set->slots), 0, new_n_slots * slot_size); + (void)bf_vector_set_len(&set->slots, new_n_slots); + + set->len = 0; + set->n_used = 0; + + for (size_t i = 0; i < old_n_slots; ++i) { + uint8_t *old_slot = bf_vector_get(&old_slots, i); + + if (*old_slot != _BF_SLOT_OCCUPIED) + continue; + + _bf_hashset_insert_unchecked(set, old_slot + sizeof(uint8_t)); + } + + bf_vector_clean(&old_slots); + + return 0; +} + +static bool _bf_hashset_needs_grow(const struct bf_hashset *set) +{ + size_t n = _bf_n_slots(set); + + if (n == 0) + return true; + + return set->n_used * _BF_HASHSET_MAX_LOAD_DEN >= + n * _BF_HASHSET_MAX_LOAD_NUM; +} + +static bool _bf_hashset_find(const struct bf_hashset *set, const void *elem, + size_t *index) +{ + size_t n; + size_t idx; + + assert(set); + assert(elem); + + n = _bf_n_slots(set); + if (n == 0) + return false; + + idx = _bf_hashset_hash(set, elem); + + for (size_t i = 0; i < n; ++i) { + uint8_t status = _bf_slot_status(set, idx); + + if (status == _BF_SLOT_EMPTY) + return false; + + if (status == _BF_SLOT_OCCUPIED && + memcmp(_bf_slot_data(set, idx), elem, set->elem_size) == 0) { + if (index) + *index = idx; + return true; + } + + idx = (idx + 1) % n; + } + + return false; +} + +void bf_hashset_free(struct bf_hashset **set) +{ + assert(set); + + if (!*set) + return; + + bf_vector_clean(&(*set)->slots); + freep((void *)&(*set)->name); + free(*set); + *set = NULL; +} + +size_t bf_hashset_size(const struct bf_hashset *set) +{ + assert(set); + return set->len; +} + +size_t bf_hashset_cap(const struct bf_hashset *set) +{ + assert(set); + return _bf_n_slots(set); +} + +bool bf_hashset_is_empty(const struct bf_hashset *set) +{ + assert(set); + return set->len == 0; +} + +const char *bf_hashset_get_name(const struct bf_hashset *set) +{ + assert(set); + return set->name; +} + +size_t bf_hashset_get_n_comps(const struct bf_hashset *set) +{ + assert(set); + return set->n_comps; +} + +enum bf_matcher_type bf_hashset_get_key_comp(const struct bf_hashset *set, + size_t index) +{ + assert(set); + assert(index < set->n_comps); + return set->key[index]; +} + +int bf_hashset_add_elem(struct bf_hashset *set, const void *elem) +{ + size_t idx; + bool was_tombstone; + int r; + + assert(set); + assert(elem); + + if (_bf_hashset_find(set, elem, NULL)) + return 0; + + if (_bf_hashset_needs_grow(set)) { + r = _bf_hashset_grow(set); + if (r) + return r; + } + + idx = _bf_hashset_hash(set, elem); + + for (size_t i = 0; + i < _bf_n_slots(set) && _bf_slot_status(set, idx) == _BF_SLOT_OCCUPIED; + ++i) + idx = (idx + 1) % _bf_n_slots(set); + + was_tombstone = _bf_slot_status(set, idx) == _BF_SLOT_TOMBSTONE; + _bf_slot_set(set, idx, _BF_SLOT_OCCUPIED, elem); + ++set->len; + + if (!was_tombstone) + ++set->n_used; + + return 0; +} + +bool bf_hashset_contains(const struct bf_hashset *set, const void *elem) +{ + assert(set); + assert(elem); + + return _bf_hashset_find(set, elem, NULL); +} + +int bf_hashset_remove(struct bf_hashset *set, const void *elem) +{ + size_t idx; + + assert(set); + assert(elem); + + if (!_bf_hashset_find(set, elem, &idx)) + return 0; + + _bf_slot_set(set, idx, _BF_SLOT_TOMBSTONE, NULL); + --set->len; + + return 0; +} + +#define _BF_HASHSET_USE_TRIE_MASK \ + (BF_FLAGS(BF_MATCHER_IP4_SNET, BF_MATCHER_IP4_DNET, BF_MATCHER_IP6_SNET, \ + BF_MATCHER_IP6_DNET)) + +int bf_hashset_new(struct bf_hashset **set, const char *name, + enum bf_matcher_type *key, size_t n_comps) +{ + _free_bf_hashset_ struct bf_hashset *_set = NULL; + uint32_t mask = 0; + size_t elem_size = 0; + + assert(set); + assert(key); + + if (n_comps == 0) + return bf_err_r(-EINVAL, "at least 1 key component is required"); + + if (n_comps > BF_HASHSET_MAX_N_COMPS) { + return bf_err_r(-E2BIG, + "a set key can't contain more than %d components", + BF_HASHSET_MAX_N_COMPS); + } + + for (size_t i = 0; i < n_comps; ++i) { + const struct bf_matcher_ops *ops; + + ops = bf_matcher_get_ops(key[i], BF_MATCHER_IN); + if (!ops) { + return bf_err_r(-ENOTSUP, + "matcher '%s' (%d) is not supported as a set key", + bf_matcher_type_to_str(key[i]), key[i]); + } + elem_size += ops->ref_payload_size; + mask |= BF_FLAG(key[i]); + } + + if (n_comps > 1 && mask & _BF_HASHSET_USE_TRIE_MASK) { + return bf_err_r( + -EINVAL, + "network matchers can't be used in combination with other matchers in a set"); + } + + _set = calloc(1, sizeof(*_set)); + if (!_set) + return -ENOMEM; + + _set->slots = bf_vector_default(_bf_slot_size(elem_size)); + _set->elem_size = elem_size; + + _set->name = NULL; + if (name) { + _set->name = strdup(name); + if (!_set->name) + return bf_err_r(-ENOMEM, "failed to allocate memory for set name"); + } + + memcpy(_set->key, key, n_comps * sizeof(enum bf_matcher_type)); + _set->n_comps = n_comps; + _set->use_trie = n_comps == 1 && mask & _BF_HASHSET_USE_TRIE_MASK; + + *set = TAKE_PTR(_set); + + return 0; +} + +/** + * @brief Parse a hashset's raw key into an array of @c bf_matcher_type. + * + * @param raw_key Raw set key, as a string of comma-separated matcher types + * enclosed in parentheses. Can't be NULL. + * @param key Parsed key components. Can't be NULL. + * @param n_comps Number of components written to @p key. Can't be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +static int _bf_hashset_parse_key(const char *raw_key, enum bf_matcher_type *key, + size_t *n_comps) +{ + _cleanup_free_ char *_raw_key = NULL; + char *tmp, *saveptr, *token; + + assert(raw_key); + assert(key); + assert(n_comps); + + _raw_key = strdup(raw_key); + if (!_raw_key) { + return bf_err_r(-ENOMEM, "failed to duplicate set raw key '%s'", + raw_key); + } + + *n_comps = 0; + + tmp = _raw_key; + while ((token = strtok_r(tmp, "(),", &saveptr))) { + int r; + + if (*n_comps == BF_HASHSET_MAX_N_COMPS) { + return bf_err_r(-E2BIG, "set keys are limited to %d components", + BF_HASHSET_MAX_N_COMPS); + } + + token = bf_trim(token); + + r = bf_matcher_type_from_str(token, &key[*n_comps]); + if (r) + return bf_err_r(r, "failed to parse set key component '%s'", token); + + tmp = NULL; + ++*n_comps; + } + + if (!*n_comps) + return bf_err_r(-EINVAL, "set key can't have no component"); + + return 0; +} + +int bf_hashset_add_elem_raw(struct bf_hashset *set, const char *raw_elem) +{ + _cleanup_free_ void *elem = NULL; + _cleanup_free_ char *_raw_elem = NULL; + char *tmp, *saveptr, *token; + size_t elem_offset = 0; + size_t comp_idx = 0; + int r; + + assert(set); + assert(raw_elem); + + _raw_elem = strdup(raw_elem); + if (!_raw_elem) { + return bf_err_r(-ENOMEM, + "failed to create a copy of the raw element '%s'", + raw_elem); + } + + elem = malloc(set->elem_size); + if (!elem) + return bf_err_r(-ENOMEM, "failed to allocate a new set element"); + + tmp = _raw_elem; + while ((token = strtok_r(tmp, ",", &saveptr))) { + const struct bf_matcher_ops *ops; + + if (comp_idx >= set->n_comps) { + return bf_err_r( + -EINVAL, + "set element has more components than defined in the key '%s'", + token); + } + + token = bf_trim(token); + + ops = bf_matcher_get_ops(set->key[comp_idx], BF_MATCHER_IN); + if (!ops) { + return bf_err_r(-EINVAL, "matcher type '%s' has no matcher_ops", + bf_matcher_type_to_str(set->key[comp_idx])); + } + + r = ops->parse(set->key[comp_idx], BF_MATCHER_IN, elem + elem_offset, + token); + if (r) { + return bf_err_r(r, "failed to parse set element component '%s'", + token); + } + + elem_offset += ops->ref_payload_size; + tmp = NULL; + ++comp_idx; + } + + if (comp_idx != set->n_comps) { + return bf_err_r(-EINVAL, "missing component in set element '%s'", + raw_elem); + } + + r = bf_hashset_add_elem(set, elem); + if (r) + return bf_err_r(r, "failed to insert element into set"); + + return 0; +} + +int bf_hashset_new_from_raw(struct bf_hashset **set, const char *name, + const char *raw_key, const char *raw_payload) +{ + _free_bf_hashset_ struct bf_hashset *_set = NULL; + _cleanup_free_ char *_raw_payload = NULL; + enum bf_matcher_type key[BF_HASHSET_MAX_N_COMPS]; + char *raw_elem, *tmp, *saveptr; + size_t n_comps; + int r; + + assert(set); + assert(raw_key); + assert(raw_payload); + + r = _bf_hashset_parse_key(raw_key, key, &n_comps); + if (r) + return bf_err_r(r, "failed to parse set key '%s'", raw_key); + + r = bf_hashset_new(&_set, name, key, n_comps); + if (r) + return r; + + _raw_payload = strdup(raw_payload); + if (!_raw_payload) + return bf_err_r(-ENOMEM, "failed to copy set raw payload '%s'", + raw_payload); + + tmp = _raw_payload; + while ((raw_elem = strtok_r(tmp, "{};\n", &saveptr))) { + raw_elem = bf_trim(raw_elem); + + if (raw_elem[0] == '\0') + continue; + + r = bf_hashset_add_elem_raw(_set, raw_elem); + if (r) + return bf_err_r(r, "failed to parse set element '%s'", raw_elem); + + tmp = NULL; + } + + *set = TAKE_PTR(_set); + + return 0; +} + +int bf_hashset_new_from_pack(struct bf_hashset **set, bf_rpack_node_t node) +{ + _free_bf_hashset_ struct bf_hashset *_set = NULL; + _cleanup_free_ char *name = NULL; + bf_rpack_node_t child, comp_node, elem_node; + size_t n_comps = 0; + enum bf_matcher_type key[BF_HASHSET_MAX_N_COMPS]; + int r; + + assert(set); + + r = bf_rpack_kv_node(node, "name", &child); + if (r) + return bf_rpack_key_err(r, "bf_hashset.name"); + if (!bf_rpack_is_nil(child)) { + r = bf_rpack_str(child, &name); + if (r) + return bf_err_r( + r, "failed to read set name from bf_hashset.name pack"); + } + + r = bf_rpack_kv_array(node, "key", &child); + if (r) + return bf_rpack_key_err(r, "bf_hashset.key"); + bf_rpack_array_foreach (child, comp_node) { + ++n_comps; + if (n_comps > BF_HASHSET_MAX_N_COMPS) { + return bf_err_r( + -E2BIG, + "bf_hashset.key in pack contains %lu key components, only %d allowed", + n_comps, BF_HASHSET_MAX_N_COMPS); + } + + r = bf_rpack_enum(comp_node, &key[n_comps - 1], 0, + _BF_MATCHER_TYPE_MAX); + if (r) + return bf_rpack_key_err(r, "bf_hashset.key"); + } + + r = bf_hashset_new(&_set, name, key, n_comps); + if (r) + return bf_err_r(r, "failed to create bf_hashset from pack"); + + r = bf_rpack_kv_array(node, "elements", &child); + if (r) + return bf_rpack_key_err(r, "bf_hashset.elements"); + bf_rpack_array_foreach (child, elem_node) { + const void *elem; + size_t elem_len; + + r = bf_rpack_bin(elem_node, &elem, &elem_len); + if (r) + return bf_rpack_key_err(r, "bf_hashset.elements"); + + if (elem_len != _set->elem_size) { + return bf_err_r( + -EINVAL, "bf_hashset pack element is %lu bytes, it must be %lu", + elem_len, _set->elem_size); + } + + r = bf_hashset_add_elem(_set, elem); + if (r) + return bf_err_r(r, "failed to insert element to bf_hashset"); + } + + *set = TAKE_PTR(_set); + + return 0; +} + +int bf_hashset_pack(const struct bf_hashset *set, bf_wpack_t *pack) +{ + assert(set); + assert(pack); + + if (set->name) + bf_wpack_kv_str(pack, "name", set->name); + else + bf_wpack_kv_nil(pack, "name"); + + bf_wpack_open_array(pack, "key"); + for (size_t i = 0; i < set->n_comps; ++i) + bf_wpack_enum(pack, set->key[i]); + bf_wpack_close_array(pack); + + bf_wpack_open_array(pack, "elements"); + bf_hashset_foreach (set, elem) + bf_wpack_bin(pack, elem, set->elem_size); + bf_wpack_close_array(pack); + + return bf_wpack_is_valid(pack) ? 0 : -EINVAL; +} + +void bf_hashset_dump(const struct bf_hashset *set, prefix_t *prefix) +{ + assert(set); + assert(prefix); + + DUMP(prefix, "struct bf_hashset at %p", set); + bf_dump_prefix_push(prefix); + + DUMP(prefix, "name: %s", set->name ?: ""); + DUMP(prefix, "key: bf_matcher_type[%zu]", set->n_comps); + bf_dump_prefix_push(prefix); + for (size_t i = 0; i < set->n_comps; ++i) { + if (i == set->n_comps - 1) + bf_dump_prefix_last(prefix); + + DUMP(prefix, "%s", bf_matcher_type_to_str(set->key[i])); + } + bf_dump_prefix_pop(prefix); + + DUMP(prefix, "elem_size: %lu", set->elem_size); + DUMP(bf_dump_prefix_last(prefix), "elems: bf_hashset[%lu]", + bf_hashset_size(set)); + + bf_dump_prefix_push(prefix); + size_t n = 0; + size_t total = bf_hashset_size(set); + bf_hashset_foreach (set, elem) { + ++n; + if (n == total) + bf_dump_prefix_last(prefix); + DUMP(prefix, "void * @ %p", elem); + bf_dump_prefix_push(prefix); + bf_dump_hex(prefix, elem, set->elem_size); + bf_dump_prefix_pop(prefix); + } + bf_dump_prefix_pop(prefix); + + bf_dump_prefix_pop(prefix); +} + +/** + * @brief Check if two hashsets have the same key format. + * + * @param first First hashset. Can't be NULL. + * @param second Second hashset. Can't be NULL. + * @return 0 if hashsets have matching format, or -EINVAL on mismatch. + */ +static int _bf_hashset_cmp_key_format(const struct bf_hashset *first, + const struct bf_hashset *second) +{ + assert(first); + assert(second); + + if (first->n_comps != second->n_comps) + return bf_err_r( + -EINVAL, + "set key format mismatch: first set has %lu components, second has %lu", + first->n_comps, second->n_comps); + + if (memcmp(first->key, second->key, + first->n_comps * sizeof(enum bf_matcher_type)) != 0) + return bf_err_r(-EINVAL, "set key component type mismatch"); + + return 0; +} + +int bf_hashset_add_many(struct bf_hashset *dest, struct bf_hashset **to_add) +{ + int r; + + assert(dest); + assert(to_add); + assert(*to_add); + + r = _bf_hashset_cmp_key_format(dest, *to_add); + if (r) + return r; + + bf_hashset_foreach (*to_add, elem) { + r = bf_hashset_add_elem(dest, elem); + if (r) + return r; + } + + bf_hashset_free(to_add); + + return 0; +} + +int bf_hashset_remove_many(struct bf_hashset *dest, + struct bf_hashset **to_remove) +{ + int r; + + assert(dest); + assert(to_remove); + assert(*to_remove); + + r = _bf_hashset_cmp_key_format(dest, *to_remove); + if (r) + return r; + + bf_hashset_foreach (*to_remove, elem) { + r = bf_hashset_remove(dest, elem); + if (r) + return r; + } + + bf_hashset_free(to_remove); + + return 0; +} diff --git a/src/libbpfilter/include/bpfilter/hashset.h b/src/libbpfilter/include/bpfilter/hashset.h new file mode 100644 index 000000000..7d3e0586d --- /dev/null +++ b/src/libbpfilter/include/bpfilter/hashset.h @@ -0,0 +1,250 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +/** + * @file hashset.h + * + * Open-addressing hashset with linear probing, backed by @ref bf_vector. + * Elements are fixed-size blobs compared with @c memcmp. Uses FNV-1a for + * hashing. Removed elements leave tombstones; no compaction is performed. + */ + +struct bf_hashset; + +#define _free_bf_hashset_ __attribute__((cleanup(bf_hashset_free))) + +/// Maximum number of components (matchers) allowed in a hashset key/element. +#define BF_HASHSET_MAX_N_COMPS 8 + +/** + * @struct bf_hashset + * + * @var bf_hashset::slots + * Backing vector. Each element in the vector is a status byte followed + * by @c elem_size bytes of element data. + * @var bf_hashset::elem_size + * Size of a single element in bytes (not including the status byte). + * @var bf_hashset::len + * Number of occupied slots (not counting tombstones). + * @var bf_hashset::n_used + * Number of occupied + tombstone slots (used for load factor). + * @var bf_hashset::name + * Name of the set. If NULL, the set is anonymous. + * @var bf_hashset::key + * Key defining how elements are structured, using @c bf_matcher_type values. + * @var bf_hashset::n_comps + * Number of components (types) present in the key. + * @var bf_hashset::use_trie + * If the key has a single network address component, use LPM trie. + */ +struct bf_hashset +{ + struct bf_vector slots; + size_t elem_size; + size_t len; + size_t n_used; + + const char *name; + enum bf_matcher_type key[BF_HASHSET_MAX_N_COMPS]; + size_t n_comps; + bool use_trie; +}; + +/** + * @brief Allocate and initialise a new hashset. + * + * @param set Set to allocate and initialise. Can't be NULL. + * @param name Name of the set, can be used to identify it. If NULL, the set + * is anonymous. + * @param key Key of the set, as an array of `bf_matcher_type`. Not all the + * matcher types can be used as set key components. Can't be NULL. + * @param n_comps Number of components in `key`. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_new(struct bf_hashset **set, const char *name, + enum bf_matcher_type *key, size_t n_comps); + +/** + * @brief Free a hashset. + * + * @param set Pointer to the hashset pointer. Must be non-NULL. + */ +void bf_hashset_free(struct bf_hashset **set); + +/** + * @brief Get the number of elements in the hashset. + * + * @param set Initialised hashset. Must be non-NULL. + * @return Number of elements stored. + */ +size_t bf_hashset_size(const struct bf_hashset *set); + +/** + * @brief Get the current number of slots. + * + * @param set Initialised hashset. Must be non-NULL. + * @return Current slot count. + */ +size_t bf_hashset_cap(const struct bf_hashset *set); + +/** + * @brief Check if the hashset is empty. + * + * @param set Initialised hashset. Must be non-NULL. + * @return True if the hashset has no elements. + */ +bool bf_hashset_is_empty(const struct bf_hashset *set); + +const char *bf_hashset_get_name(const struct bf_hashset *set); +size_t bf_hashset_get_n_comps(const struct bf_hashset *set); +enum bf_matcher_type bf_hashset_get_key_comp(const struct bf_hashset *set, + size_t index); + +/** + * @brief Check whether an element exists in the hashset. + * + * @param set Initialised hashset. Must be non-NULL. + * @param elem Element to look up. Must be non-NULL. + * @return True if @p elem is present. + */ +bool bf_hashset_contains(const struct bf_hashset *set, const void *elem); + +/** + * @brief Remove an element from the hashset. + * + * The slot is marked as a tombstone; no memory is reclaimed. + * Removing an element that doesn't exist is a no-op (returns 0). + * + * @param set Initialised hashset. Must be non-NULL. + * @param elem Element to remove. Must be non-NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_remove(struct bf_hashset *set, const void *elem); + +/** + * @brief Add all elements from @p to_add into @p dest, then free @p to_add. + * + * Duplicate elements are skipped. Both hashsets must have the same + * key format. On success, @p *to_add is freed and set to NULL. + * + * @param dest Destination hashset. Can't be NULL. + * @param to_add Source hashset. Can't be NULL. Freed on success. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_add_many(struct bf_hashset *dest, struct bf_hashset **to_add); + +/** + * @brief Remove all elements in @p to_remove from @p dest, then free + * @p to_remove. + * + * Elements in @p to_remove that aren't in @p dest are ignored. Both hashsets + * must have the same key format. On success, @p *to_remove is freed and + * set to NULL. + * + * @param dest Destination hashset. Can't be NULL. + * @param to_remove Source hashset. Can't be NULL. Freed on success. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_remove_many(struct bf_hashset *dest, + struct bf_hashset **to_remove); + +/** + * @brief Allocate and initialise a new hashset from raw key and payload. + * + * @param set Set to allocate and initialise. Can't be NULL. + * @param name Name of the set. If NULL, the set is anonymous. + * @param raw_key Set key as comma-separated matcher types in parentheses. + * Can't be NULL. + * @param raw_payload Set payload to parse according to @p raw_key. Can't + * be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_new_from_raw(struct bf_hashset **set, const char *name, + const char *raw_key, const char *raw_payload); + +/** + * @brief Allocate and initialise a new hashset from serialized data. + * + * @param set Set to allocate and initialise. Can't be NULL. On failure, + * @p *set is unchanged. + * @param node Node containing the serialized set. Can't be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_new_from_pack(struct bf_hashset **set, bf_rpack_node_t node); + +/** + * @brief Serialize a hashset. + * + * @param set Set to serialize. Can't be NULL. + * @param pack @c bf_wpack_t object to serialize into. Can't be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_pack(const struct bf_hashset *set, bf_wpack_t *pack); + +/** + * @brief Dump a human-readable representation of the hashset. + * + * @param set Hashset to dump. Can't be NULL. + * @param prefix Dump prefix for indentation. Can't be NULL. + */ +void bf_hashset_dump(const struct bf_hashset *set, prefix_t *prefix); + +/** + * @brief Insert an element into the hashset. + * + * If the element already exists, no duplicate is added and 0 is returned. + * The hashset grows automatically when the load factor is exceeded. + * + * @param set Initialised hashset. Can't be NULL. + * @param elem Pointer to the element to insert. Can't be NULL and must point + * to at least @c elem_size bytes. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_add_elem(struct bf_hashset *set, const void *elem); + +/** + * @brief Parse a raw element string and insert it into a hashset. + * + * The element is parsed according to @p set->key. + * + * @param set Set to insert the element into. Can't be NULL. + * @param raw_elem Raw element to parse. Can't be NULL. + * @return 0 on success, or a negative errno value on failure. + */ +int bf_hashset_add_elem_raw(struct bf_hashset *set, const char *raw_elem); + +/// Status byte value for an occupied slot in @ref bf_hashset. +#define _BF_HASHSET_SLOT_OCCUPIED 1 + +/** + * @brief Iterate over all occupied elements in a hashset. + * + * @param set Pointer to the hashset to iterate over. Must be non-NULL. + * @param elem_var Name of the void* variable to hold each element. + */ +#define bf_hashset_foreach(set, elem_var) \ + for (size_t _bf_hset_idx = 0, _bf_hset_brk = 0; \ + _bf_hset_idx < bf_hashset_cap(set) && !_bf_hset_brk; ++_bf_hset_idx) \ + if (*(uint8_t *)bf_vector_get(&(set)->slots, _bf_hset_idx) != \ + _BF_HASHSET_SLOT_OCCUPIED) \ + continue; \ + else \ + for (void *(elem_var) = \ + (_bf_hset_brk = 1, \ + (void *)((uint8_t *)bf_vector_get(&(set)->slots, \ + _bf_hset_idx) + \ + sizeof(uint8_t))); \ + _bf_hset_brk; _bf_hset_brk = 0) diff --git a/tests/harness/fake.c b/tests/harness/fake.c index cf1eb6187..cdc0db007 100644 --- a/tests/harness/fake.c +++ b/tests/harness/fake.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -244,3 +245,28 @@ struct bf_set *bft_set_dummy(size_t n_elems) return TAKE_PTR(set); } + +struct bf_hashset *bft_hashset_dummy(size_t n_elems) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_DADDR, BF_MATCHER_TCP_SPORT}; + + int r; + + r = bf_hashset_new(&set, "bft_hashset_dummy", key, ARRAY_SIZE(key)); + if (r) + return NULL; + + for (size_t i = 0; i < n_elems; ++i) { + uint8_t elem[set->elem_size]; + + memset(elem, (uint8_t)i, set->elem_size); + + r = bf_hashset_add_elem(set, elem); + if (r) + return NULL; + } + + return TAKE_PTR(set); +} diff --git a/tests/harness/fake.h b/tests/harness/fake.h index da3e438bb..e17ad8502 100644 --- a/tests/harness/fake.h +++ b/tests/harness/fake.h @@ -8,6 +8,7 @@ #include #include +#include #include typedef bool (*bft_list_eq_cb)(const void *, const void *); @@ -53,3 +54,4 @@ struct bf_chain *bft_chain_dummy(bool with_rules); struct bf_rule *bft_rule_dummy(size_t n_matchers); struct bf_matcher *bft_matcher_dummy(const void *data, size_t data_len); struct bf_set *bft_set_dummy(size_t n_elems); +struct bf_hashset *bft_hashset_dummy(size_t n_elems); diff --git a/tests/harness/test.c b/tests/harness/test.c index b0929619f..3e1488299 100644 --- a/tests/harness/test.c +++ b/tests/harness/test.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -252,6 +253,29 @@ bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs) lhs->use_trie == rhs->use_trie; } +bool bft_hashset_eq(const struct bf_hashset *lhs, const struct bf_hashset *rhs) +{ + if (bf_hashset_size(lhs) != bf_hashset_size(rhs)) + return false; + + if (lhs->elem_size != rhs->elem_size) + return false; + + bf_hashset_foreach (lhs, elem) { + if (!bf_hashset_contains(rhs, elem)) + return false; + } + + if (lhs->name != rhs->name && + (!lhs->name || !rhs->name || strcmp(lhs->name, rhs->name) != 0)) + return false; + + return lhs->n_comps == rhs->n_comps && + memcmp(lhs->key, rhs->key, + sizeof(enum bf_matcher_type) * lhs->n_comps) == 0 && + lhs->use_trie == rhs->use_trie; +} + bool bft_chain_equal(const struct bf_chain *chain0, const struct bf_chain *chain1) { diff --git a/tests/harness/test.h b/tests/harness/test.h index 29d01da99..772c8d751 100644 --- a/tests/harness/test.h +++ b/tests/harness/test.h @@ -21,6 +21,7 @@ #include "fake.h" struct bf_set; +struct bf_hashset; struct bf_counter; #define assert_ok(expr) assert_true((expr) == 0) @@ -80,6 +81,7 @@ struct bf_counter; bool bft_list_eq(const bf_list *lhs, const bf_list *rhs, bft_list_eq_cb cb); bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs); +bool bft_hashset_eq(const struct bf_hashset *lhs, const struct bf_hashset *rhs); bool bft_counter_eq(const struct bf_counter *lhs, const struct bf_counter *rhs); bool bft_chain_equal(const struct bf_chain *chain0, const struct bf_chain *chain1); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 6c6c2b212..85f445cd2 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -79,6 +79,7 @@ bf_add_c_test(unit libbpfilter/counter.c) bf_add_c_test(unit libbpfilter/dump.c) bf_add_c_test(unit libbpfilter/dynbuf.c) bf_add_c_test(unit libbpfilter/flavor.c) +bf_add_c_test(unit libbpfilter/hashset.c) bf_add_c_test(unit libbpfilter/helper.c) bf_add_c_test(unit libbpfilter/hook.c) bf_add_c_test(unit libbpfilter/if.c) diff --git a/tests/unit/libbpfilter/hashset.c b/tests/unit/libbpfilter/hashset.c new file mode 100644 index 000000000..4363217c6 --- /dev/null +++ b/tests/unit/libbpfilter/hashset.c @@ -0,0 +1,665 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. + */ + +#include + +#include + +#include "bpfilter/dump.h" +#include "bpfilter/pack.h" +#include "fake.h" +#include "test.h" + +static void new_and_free(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + (void)state; + + // Free set manually + assert_ok(bf_hashset_new(&set, "test_set", key, ARRAY_SIZE(key))); + assert_non_null(set); + assert_string_equal(set->name, "test_set"); + assert_int_equal(set->n_comps, 1); + assert_int_equal(set->key[0], BF_MATCHER_IP4_SADDR); + bf_hashset_free(&set); + assert_null(set); + + // Free set using the cleanup attribute + assert_ok(bf_hashset_new(&set, NULL, key, ARRAY_SIZE(key))); + assert_null(set->name); +} + +static void new_with_multiple_keys(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_DADDR, BF_MATCHER_TCP_SPORT}; + + (void)state; + + assert_ok(bf_hashset_new(&set, "multi_key_set", key, ARRAY_SIZE(key))); + assert_non_null(set); + assert_int_equal(set->n_comps, 2); + assert_int_equal(set->key[0], BF_MATCHER_IP4_DADDR); + assert_int_equal(set->key[1], BF_MATCHER_TCP_SPORT); +} + +static void new_with_invalid_params(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + (void)state; + + // Test with 0 components + assert_err(bf_hashset_new(&set, "test", key, 0)); + + // Test with too many components + enum bf_matcher_type large_key[BF_HASHSET_MAX_N_COMPS + 1]; + for (size_t i = 0; i <= BF_HASHSET_MAX_N_COMPS; ++i) + large_key[i] = BF_MATCHER_IP4_SADDR; + assert_err( + bf_hashset_new(&set, "test", large_key, BF_HASHSET_MAX_N_COMPS + 1)); +} + +static void new_with_trie_key(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SNET}; + + (void)state; + + // Network matchers should enable trie + assert_ok(bf_hashset_new(&set, "trie_set", key, ARRAY_SIZE(key))); + assert_true(set->use_trie); +} + +static void new_with_invalid_network_combination(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SNET, BF_MATCHER_TCP_SPORT}; + + (void)state; + + // Network matchers can't be combined with other matchers + assert_err(bf_hashset_new(&set, "invalid_set", key, ARRAY_SIZE(key))); +} + +static void add_elem(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem = 0x01020304; // 1.2.3.4 + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_add_elem(set, &elem)); + assert_int_equal(bf_hashset_size(set), 1); +} + +static void add_multiple_elems(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_DADDR, BF_MATCHER_TCP_SPORT}; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + // Add 5 elements + for (size_t i = 0; i < 5; ++i) { + uint8_t elem[set->elem_size]; + memset(elem, (uint8_t)i, set->elem_size); + assert_ok(bf_hashset_add_elem(set, elem)); + } + + assert_int_equal(bf_hashset_size(set), 5); +} + +static void pack_and_unpack(void **state) +{ + _free_bf_hashset_ struct bf_hashset *source = NULL; + _free_bf_hashset_ struct bf_hashset *destination = NULL; + _free_bf_wpack_ bf_wpack_t *wpack = NULL; + _free_bf_rpack_ bf_rpack_t *rpack = NULL; + bf_rpack_node_t node; + const void *data; + size_t data_len; + + (void)state; + + // Create and pack the source set + assert_non_null(source = bft_hashset_dummy(4)); + assert_ok(bf_wpack_new(&wpack)); + bf_wpack_open_object(wpack, "set"); + assert_ok(bf_hashset_pack(source, wpack)); + bf_wpack_close_object(wpack); + assert_ok(bf_wpack_get_data(wpack, &data, &data_len)); + + // Unpack into destination set + assert_ok(bf_rpack_new(&rpack, data, data_len)); + assert_ok(bf_rpack_kv_obj(bf_rpack_root(rpack), "set", &node)); + assert_ok(bf_hashset_new_from_pack(&destination, node)); + + assert_true(bft_hashset_eq(source, destination)); +} + +static void pack_and_unpack_empty(void **state) +{ + _free_bf_hashset_ struct bf_hashset *source = NULL; + _free_bf_hashset_ struct bf_hashset *destination = NULL; + _free_bf_wpack_ bf_wpack_t *wpack = NULL; + _free_bf_rpack_ bf_rpack_t *rpack = NULL; + bf_rpack_node_t node; + const void *data; + size_t data_len; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + (void)state; + + // Create empty set and pack it + assert_ok(bf_hashset_new(&source, "empty_set", key, ARRAY_SIZE(key))); + assert_ok(bf_wpack_new(&wpack)); + bf_wpack_open_object(wpack, "set"); + assert_ok(bf_hashset_pack(source, wpack)); + bf_wpack_close_object(wpack); + assert_ok(bf_wpack_get_data(wpack, &data, &data_len)); + + // Unpack into destination + assert_ok(bf_rpack_new(&rpack, data, data_len)); + assert_ok(bf_rpack_kv_obj(bf_rpack_root(rpack), "set", &node)); + assert_ok(bf_hashset_new_from_pack(&destination, node)); + + assert_true(bft_hashset_eq(source, destination)); + assert_int_equal(bf_hashset_size(destination), 0); +} + +static void dump(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + prefix_t prefix = {}; + + (void)state; + + // Dump a set with elements + assert_non_null(set = bft_hashset_dummy(4)); + bf_hashset_dump(set, &prefix); +} + +static void dump_empty(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + prefix_t prefix = {}; + + (void)state; + + // Dump an empty set + assert_ok(bf_hashset_new(&set, "empty", key, ARRAY_SIZE(key))); + bf_hashset_dump(set, &prefix); +} + +static void new_from_raw(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + (void)state; + + // Test creating set from raw key and payload + assert_ok(bf_hashset_new_from_raw(&set, "test_raw", "(ip4.saddr)", + "{1.2.3.4; 5.6.7.8}")); + assert_non_null(set); + assert_string_equal(set->name, "test_raw"); + assert_int_equal(set->n_comps, 1); + assert_int_equal(set->key[0], BF_MATCHER_IP4_SADDR); + assert_int_equal(bf_hashset_size(set), 2); +} + +static void new_from_raw_multiple_keys(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + (void)state; + + // Test creating set with multiple key components + assert_ok(bf_hashset_new_from_raw(&set, "test_multi", + "(ip4.daddr, tcp.sport)", + "{1.2.3.4, 80; 5.6.7.8, 443}")); + assert_non_null(set); + assert_int_equal(set->n_comps, 2); + assert_int_equal(set->key[0], BF_MATCHER_IP4_DADDR); + assert_int_equal(set->key[1], BF_MATCHER_TCP_SPORT); + assert_int_equal(bf_hashset_size(set), 2); +} + +static void new_from_raw_invalid(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + (void)state; + + // Test with invalid key format + assert_err(bf_hashset_new_from_raw(&set, "test", "INVALID", "{1.2.3.4}")); + + // Test with empty key + assert_err(bf_hashset_new_from_raw(&set, "test", "()", "{1.2.3.4}")); +} + +static void add_many_basic(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_add = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem1 = 0x01010101; + uint32_t elem2 = 0x02020202; + uint32_t elem3 = 0x03030303; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_new(&to_add, "to_add", key, ARRAY_SIZE(key))); + + assert_ok(bf_hashset_add_elem(dest, &elem1)); + assert_ok(bf_hashset_add_elem(dest, &elem2)); + + assert_ok(bf_hashset_add_elem(to_add, &elem2)); + assert_ok(bf_hashset_add_elem(to_add, &elem3)); + + assert_ok(bf_hashset_add_many(dest, &to_add)); + + assert_int_equal(bf_hashset_size(dest), 3); + assert_true(bf_hashset_contains(dest, &elem1)); + assert_true(bf_hashset_contains(dest, &elem2)); + assert_true(bf_hashset_contains(dest, &elem3)); + assert_null(to_add); +} + +static void add_many_mismatched_key_count(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_add = NULL; + + enum bf_matcher_type key1[] = {BF_MATCHER_IP4_SADDR}; + + enum bf_matcher_type key2[] = {BF_MATCHER_IP4_SADDR, BF_MATCHER_TCP_SPORT}; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key1, ARRAY_SIZE(key1))); + assert_ok(bf_hashset_new(&to_add, "to_add", key2, ARRAY_SIZE(key2))); + + assert_err(bf_hashset_add_many(dest, &to_add)); + assert_non_null(to_add); +} + +static void add_many_mismatched_key_type(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_add = NULL; + + enum bf_matcher_type key1[] = {BF_MATCHER_IP4_SADDR}; + + enum bf_matcher_type key2[] = {BF_MATCHER_IP4_DADDR}; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key1, ARRAY_SIZE(key1))); + assert_ok(bf_hashset_new(&to_add, "to_add", key2, ARRAY_SIZE(key2))); + + assert_err(bf_hashset_add_many(dest, &to_add)); + assert_non_null(to_add); +} + +static void remove_many_basic(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_remove = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem1 = 0x01010101; + uint32_t elem2 = 0x02020202; + uint32_t elem3 = 0x03030303; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_new(&to_remove, "to_remove", key, ARRAY_SIZE(key))); + + assert_ok(bf_hashset_add_elem(dest, &elem1)); + assert_ok(bf_hashset_add_elem(dest, &elem2)); + assert_ok(bf_hashset_add_elem(dest, &elem3)); + + assert_ok(bf_hashset_add_elem(to_remove, &elem2)); + + assert_ok(bf_hashset_remove_many(dest, &to_remove)); + + assert_int_equal(bf_hashset_size(dest), 2); + assert_true(bf_hashset_contains(dest, &elem1)); + assert_false(bf_hashset_contains(dest, &elem2)); + assert_true(bf_hashset_contains(dest, &elem3)); + assert_null(to_remove); +} + +static void remove_many_disjoint_sets(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_remove = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem1 = 0x01010101; + uint32_t elem2 = 0x02020202; + uint32_t elem3 = 0x03030303; + uint32_t elem4 = 0x04040404; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_new(&to_remove, "to_remove", key, ARRAY_SIZE(key))); + + assert_ok(bf_hashset_add_elem(dest, &elem1)); + assert_ok(bf_hashset_add_elem(dest, &elem2)); + + assert_ok(bf_hashset_add_elem(to_remove, &elem3)); + assert_ok(bf_hashset_add_elem(to_remove, &elem4)); + + assert_ok(bf_hashset_remove_many(dest, &to_remove)); + assert_int_equal(bf_hashset_size(dest), 2); + assert_true(bf_hashset_contains(dest, &elem1)); + assert_true(bf_hashset_contains(dest, &elem2)); + assert_null(to_remove); +} + +static void contains(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem1 = 0x01010101; + uint32_t elem2 = 0x02020202; + uint32_t missing = 0x09090909; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + assert_false(bf_hashset_contains(set, &elem1)); + + assert_ok(bf_hashset_add_elem(set, &elem1)); + assert_ok(bf_hashset_add_elem(set, &elem2)); + + assert_true(bf_hashset_contains(set, &elem1)); + assert_true(bf_hashset_contains(set, &elem2)); + assert_false(bf_hashset_contains(set, &missing)); +} + +static void remove_elem(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem1 = 0x01010101; + uint32_t elem2 = 0x02020202; + uint32_t missing = 0x09090909; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_add_elem(set, &elem1)); + assert_ok(bf_hashset_add_elem(set, &elem2)); + assert_int_equal(bf_hashset_size(set), 2); + + // Remove existing element + assert_ok(bf_hashset_remove(set, &elem1)); + assert_int_equal(bf_hashset_size(set), 1); + assert_false(bf_hashset_contains(set, &elem1)); + assert_true(bf_hashset_contains(set, &elem2)); + + // Remove nonexistent element is a no-op + assert_ok(bf_hashset_remove(set, &missing)); + assert_int_equal(bf_hashset_size(set), 1); + + // Re-add after removal (tombstone reuse) + assert_ok(bf_hashset_add_elem(set, &elem1)); + assert_int_equal(bf_hashset_size(set), 2); + assert_true(bf_hashset_contains(set, &elem1)); +} + +static void is_empty_and_cap(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem = 0x01010101; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + assert_true(bf_hashset_is_empty(set)); + assert_int_equal(bf_hashset_cap(set), 0); + + assert_ok(bf_hashset_add_elem(set, &elem)); + + assert_false(bf_hashset_is_empty(set)); + assert_true(bf_hashset_cap(set) > 0); +} + +static void foreach_basic(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elems[] = {0x01010101, 0x02020202, 0x03030303}; + size_t count; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + // foreach on empty set does nothing + count = 0; + bf_hashset_foreach (set, elem) { + (void)elem; + ++count; + } + assert_int_equal(count, 0); + + for (size_t i = 0; i < ARRAY_SIZE(elems); ++i) + assert_ok(bf_hashset_add_elem(set, &elems[i])); + + // foreach visits every element + count = 0; + bf_hashset_foreach (set, elem) { + (void)elem; + ++count; + } + assert_int_equal(count, 3); +} + +static void foreach_after_removal(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elems[] = {0x01010101, 0x02020202, 0x03030303}; + size_t count; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + for (size_t i = 0; i < ARRAY_SIZE(elems); ++i) + assert_ok(bf_hashset_add_elem(set, &elems[i])); + + assert_ok(bf_hashset_remove(set, &elems[1])); + + // Tombstoned slot must be skipped + count = 0; + bf_hashset_foreach (set, elem) { + (void)elem; + ++count; + } + assert_int_equal(count, 2); +} + +static void add_triggers_grow(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + for (uint32_t i = 0; i < 20; ++i) { + uint32_t addr = htonl(0x0a000001 + i); + assert_ok(bf_hashset_add_elem(set, &addr)); + } + + assert_int_equal(bf_hashset_size(set), 20); + assert_true(bf_hashset_cap(set) > 16); + + for (uint32_t i = 0; i < 20; ++i) { + uint32_t addr = htonl(0x0a000001 + i); + assert_true(bf_hashset_contains(set, &addr)); + } +} + +static void foreach_break(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elems[] = {0x01010101, 0x02020202, 0x03030303}; + size_t count; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + + for (size_t i = 0; i < ARRAY_SIZE(elems); ++i) + assert_ok(bf_hashset_add_elem(set, &elems[i])); + + count = 0; + bf_hashset_foreach (set, elem) { + (void)elem; + ++count; + break; + } + assert_int_equal(count, 1); +} + +static void add_duplicate(void **state) +{ + _free_bf_hashset_ struct bf_hashset *set = NULL; + + enum bf_matcher_type key[] = {BF_MATCHER_IP4_SADDR}; + + uint32_t elem = 0x01010101; + + (void)state; + + assert_ok(bf_hashset_new(&set, "test", key, ARRAY_SIZE(key))); + assert_ok(bf_hashset_add_elem(set, &elem)); + assert_ok(bf_hashset_add_elem(set, &elem)); + assert_int_equal(bf_hashset_size(set), 1); +} + +static void remove_many_mismatched_key_count(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_remove = NULL; + + enum bf_matcher_type key1[] = {BF_MATCHER_IP4_SADDR}; + + enum bf_matcher_type key2[] = {BF_MATCHER_IP4_SADDR, BF_MATCHER_TCP_SPORT}; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key1, ARRAY_SIZE(key1))); + assert_ok(bf_hashset_new(&to_remove, "to_remove", key2, ARRAY_SIZE(key2))); + + assert_err(bf_hashset_remove_many(dest, &to_remove)); + assert_non_null(to_remove); +} + +static void remove_many_mismatched_key_type(void **state) +{ + _free_bf_hashset_ struct bf_hashset *dest = NULL; + _free_bf_hashset_ struct bf_hashset *to_remove = NULL; + + enum bf_matcher_type key1[] = {BF_MATCHER_IP4_SADDR}; + + enum bf_matcher_type key2[] = {BF_MATCHER_IP4_DADDR}; + + (void)state; + + assert_ok(bf_hashset_new(&dest, "dest", key1, ARRAY_SIZE(key1))); + assert_ok(bf_hashset_new(&to_remove, "to_remove", key2, ARRAY_SIZE(key2))); + + assert_err(bf_hashset_remove_many(dest, &to_remove)); + assert_non_null(to_remove); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(new_and_free), + cmocka_unit_test(new_with_multiple_keys), + cmocka_unit_test(new_with_invalid_params), + cmocka_unit_test(new_with_trie_key), + cmocka_unit_test(new_with_invalid_network_combination), + cmocka_unit_test(add_elem), + cmocka_unit_test(add_multiple_elems), + cmocka_unit_test(pack_and_unpack), + cmocka_unit_test(pack_and_unpack_empty), + cmocka_unit_test(dump), + cmocka_unit_test(dump_empty), + cmocka_unit_test(new_from_raw), + cmocka_unit_test(new_from_raw_multiple_keys), + cmocka_unit_test(new_from_raw_invalid), + cmocka_unit_test(contains), + cmocka_unit_test(remove_elem), + cmocka_unit_test(is_empty_and_cap), + cmocka_unit_test(foreach_basic), + cmocka_unit_test(foreach_after_removal), + cmocka_unit_test(add_triggers_grow), + cmocka_unit_test(foreach_break), + cmocka_unit_test(add_duplicate), + cmocka_unit_test(add_many_basic), + cmocka_unit_test(add_many_mismatched_key_count), + cmocka_unit_test(add_many_mismatched_key_type), + cmocka_unit_test(remove_many_basic), + cmocka_unit_test(remove_many_disjoint_sets), + cmocka_unit_test(remove_many_mismatched_key_count), + cmocka_unit_test(remove_many_mismatched_key_type), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} From 61d3779991b0b9c72e9a8179782ababf149c11c5 Mon Sep 17 00:00:00 2001 From: Pawel Zmarzly Date: Tue, 17 Feb 2026 02:01:12 +0000 Subject: [PATCH 5/5] build: add USE_HASHSET option to switch to hashset implementation When cmake -DUSE_HASHSET=1 is passed, set.h redirects all bf_set_* symbols to their bf_hashset_* counterparts via preprocessor macros. The CI workflow is extended to test both paths. USE_HASHSET defaults to off for now. --- .github/workflows/ci.yaml | 10 +++--- CMakeLists.txt | 1 + src/libbpfilter/CMakeLists.txt | 12 ++++++- src/libbpfilter/bpfilter.pc.in | 2 +- src/libbpfilter/include/bpfilter/bpfilter.h | 2 +- src/libbpfilter/include/bpfilter/chain.h | 2 +- src/libbpfilter/include/bpfilter/set.h | 38 +++++++++++++++++++++ src/libbpfilter/set.c | 13 +++++++ tests/harness/fake.c | 2 ++ tests/harness/fake.h | 4 +++ tests/harness/test.c | 2 ++ tests/harness/test.h | 7 +++- tests/unit/CMakeLists.txt | 4 ++- tests/unit/libbpfilter/chain.c | 4 +++ 14 files changed, 93 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ec188eb0d..81bb1eb25 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -94,24 +94,25 @@ jobs: host: - { name: 8-core-ubuntu, arch: x64 } - { name: 4-core-ubuntu-arm, arch: arm64 } + use_hashset: [ 0, 1 ] runs-on: [ "${{ matrix.host.name }}" ] container: image: ghcr.io/facebook/bpfilter:fedora-43-${{ matrix.host.arch }} options: --privileged - name: "Test: ${{ matrix.host.arch }}" + name: "Test: ${{ matrix.host.arch }}${{ matrix.use_hashset == 1 && ' (hashset)' || '' }}" steps: - name: Checkout bpfilter uses: actions/checkout@v2 - name: Restore the cached test results uses: actions/cache@v4 - if: matrix.host.arch == 'x64' + if: matrix.host.arch == 'x64' && matrix.use_hashset == 0 with: path: build/coverage key: tests-results-${{ github.run_id }} - name: Mount bpffs run: mount bpffs /sys/fs/bpf -t bpf - name: Configure the build - run: cmake -S $GITHUB_WORKSPACE -B $GITHUB_WORKSPACE/build -DWITH_COVERAGE=1 + run: cmake -S $GITHUB_WORKSPACE -B $GITHUB_WORKSPACE/build -DWITH_COVERAGE=1 -DUSE_HASHSET=${{ matrix.use_hashset }} - name: Build tests run: make -C $GITHUB_WORKSPACE/build -j `nproc` test_bin @@ -125,7 +126,7 @@ jobs: run: ctest --test-dir $GITHUB_WORKSPACE/build -L fuzzing --verbose - name: Upload fuzzer findings uses: actions/upload-artifact@v4 - if: always() + if: always() && matrix.use_hashset == 0 with: name: fuzzer-findings-${{ matrix.host.arch }} path: ${{ github.workspace }}/build/findings @@ -136,6 +137,7 @@ jobs: - name: Run checks run: ctest --test-dir $GITHUB_WORKSPACE/build -L check --verbose - name: Generate the coverage report + if: matrix.use_hashset == 0 run: make -C $GITHUB_WORKSPACE/build coverage benchmark: diff --git a/CMakeLists.txt b/CMakeLists.txt index abc62d704..010aaf94e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,7 @@ option(NO_TESTS "Disable unit, end-to-end, and integration tests" 0) option(NO_CHECKS "Disable the check target (clang-tidy and clang-format" 0) option(NO_BENCHMARKS "Disable the benchmark" 0) option(WITH_COVERAGE "Build with code coverage support. Disabled by default" 0) +option(USE_HASHSET "Use bf_hashset instead of bf_set for set implementation. Disabled by default" 0) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) diff --git a/src/libbpfilter/CMakeLists.txt b/src/libbpfilter/CMakeLists.txt index 64d6a6dc1..6059ada3e 100644 --- a/src/libbpfilter/CMakeLists.txt +++ b/src/libbpfilter/CMakeLists.txt @@ -55,7 +55,6 @@ set(libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/request.c ${CMAKE_CURRENT_SOURCE_DIR}/response.c ${CMAKE_CURRENT_SOURCE_DIR}/rule.c - ${CMAKE_CURRENT_SOURCE_DIR}/set.c ${CMAKE_CURRENT_SOURCE_DIR}/vector.c ${CMAKE_CURRENT_SOURCE_DIR}/verdict.c ${CMAKE_CURRENT_SOURCE_DIR}/version.c @@ -65,6 +64,15 @@ set(libbpfilter_srcs ${CMAKE_SOURCE_DIR}/src/external/mpack.c ) +if (NOT USE_HASHSET) + list(APPEND libbpfilter_srcs ${CMAKE_CURRENT_SOURCE_DIR}/set.c) +endif () + +set(BF_PC_EXTRA_CFLAGS "") +if (USE_HASHSET) + string(APPEND BF_PC_EXTRA_CFLAGS " -DBF_USE_HASHSET") +endif () + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/bpfilter.pc.in ${CMAKE_BINARY_DIR}/output/lib/pkgconfig/bpfilter.pc @@ -86,6 +94,8 @@ target_compile_definitions(libbpfilter PRIVATE # MPack should use the C standard library API MPACK_STDLIB + PUBLIC + $<$:BF_USE_HASHSET> ) target_include_directories(libbpfilter diff --git a/src/libbpfilter/bpfilter.pc.in b/src/libbpfilter/bpfilter.pc.in index 746c98289..670bb5308 100644 --- a/src/libbpfilter/bpfilter.pc.in +++ b/src/libbpfilter/bpfilter.pc.in @@ -6,5 +6,5 @@ Name: bpfilter Description: BPF-based packet filtering framework URL: https://github.com/facebook/bpfilter Version: @PROJECT_VERSION@@PROJECT_VERSION_SUFFIX@ -Cflags: -I${includedir} +Cflags: -I${includedir}@BF_PC_EXTRA_CFLAGS@ Libs: -L${libdir} -lbpfilter diff --git a/src/libbpfilter/include/bpfilter/bpfilter.h b/src/libbpfilter/include/bpfilter/bpfilter.h index b27a38d0b..c4ce3feb6 100644 --- a/src/libbpfilter/include/bpfilter/bpfilter.h +++ b/src/libbpfilter/include/bpfilter/bpfilter.h @@ -9,10 +9,10 @@ #include #include +#include struct bf_response; struct bf_chain; -struct bf_set; struct bf_hookopts; /** diff --git a/src/libbpfilter/include/bpfilter/chain.h b/src/libbpfilter/include/bpfilter/chain.h index 82add0afa..f402f9095 100644 --- a/src/libbpfilter/include/bpfilter/chain.h +++ b/src/libbpfilter/include/bpfilter/chain.h @@ -9,12 +9,12 @@ #include #include #include +#include #include struct bf_hookopts; struct bf_matcher; struct bf_rule; -struct bf_set; #define _free_bf_chain_ __attribute__((cleanup(bf_chain_free))) diff --git a/src/libbpfilter/include/bpfilter/set.h b/src/libbpfilter/include/bpfilter/set.h index 24dcc0c45..1681eeceb 100644 --- a/src/libbpfilter/include/bpfilter/set.h +++ b/src/libbpfilter/include/bpfilter/set.h @@ -5,6 +5,33 @@ #pragma once +#ifdef BF_USE_HASHSET + +#include + +#define bf_set bf_hashset +#define _free_bf_set_ _free_bf_hashset_ +#define BF_SET_MAX_N_COMPS BF_HASHSET_MAX_N_COMPS +#define bf_set_new bf_hashset_new +#define bf_set_new_from_raw bf_hashset_new_from_raw +#define bf_set_new_from_pack bf_hashset_new_from_pack +#define bf_set_free bf_hashset_free +#define bf_set_pack bf_hashset_pack +#define bf_set_dump bf_hashset_dump +#define bf_set_is_empty bf_hashset_is_empty +#define bf_set_add_elem bf_hashset_add_elem +#define bf_set_add_elem_raw bf_hashset_add_elem_raw +#define bf_set_add_many bf_hashset_add_many +#define bf_set_remove_many bf_hashset_remove_many +#define bf_set_foreach bf_hashset_foreach +#define bf_set_size bf_hashset_size +#define bf_set_contains bf_hashset_contains +#define bf_set_get_name bf_hashset_get_name +#define bf_set_get_n_comps bf_hashset_get_n_comps +#define bf_set_get_key_comp bf_hashset_get_key_comp + +#else + #include #include @@ -198,3 +225,14 @@ int bf_set_add_many(struct bf_set *dest, struct bf_set **to_add); * - `-EINVAL`: set key format doesn't match between dest and to_remove. */ int bf_set_remove_many(struct bf_set *dest, struct bf_set **to_remove); + +/** + * @brief Check whether an element exists in a set. + * + * @param set Initialised set. Can't be NULL. + * @param elem Element to look up. Can't be NULL. + * @return True if @p elem is present. + */ +bool bf_set_contains(const struct bf_set *set, const void *elem); + +#endif diff --git a/src/libbpfilter/set.c b/src/libbpfilter/set.c index 5a99098f0..f1fd9a783 100644 --- a/src/libbpfilter/set.c +++ b/src/libbpfilter/set.c @@ -545,3 +545,16 @@ int bf_set_remove_many(struct bf_set *dest, struct bf_set **to_remove) return 0; } + +bool bf_set_contains(const struct bf_set *set, const void *elem) +{ + assert(set); + assert(elem); + + bf_list_foreach (&set->elems, node) { + if (memcmp(bf_list_node_get_data(node), elem, set->elem_size) == 0) + return true; + } + + return false; +} diff --git a/tests/harness/fake.c b/tests/harness/fake.c index cdc0db007..46da13277 100644 --- a/tests/harness/fake.c +++ b/tests/harness/fake.c @@ -221,6 +221,7 @@ struct bf_matcher *bft_matcher_dummy(const void *data, size_t data_len) return TAKE_PTR(matcher); } +#ifndef BF_USE_HASHSET struct bf_set *bft_set_dummy(size_t n_elems) { _free_bf_set_ struct bf_set *set = NULL; @@ -245,6 +246,7 @@ struct bf_set *bft_set_dummy(size_t n_elems) return TAKE_PTR(set); } +#endif struct bf_hashset *bft_hashset_dummy(size_t n_elems) { diff --git a/tests/harness/fake.h b/tests/harness/fake.h index e17ad8502..8a969d46d 100644 --- a/tests/harness/fake.h +++ b/tests/harness/fake.h @@ -53,5 +53,9 @@ const void *bft_get_randomly_filled_buffer(size_t len); struct bf_chain *bft_chain_dummy(bool with_rules); struct bf_rule *bft_rule_dummy(size_t n_matchers); struct bf_matcher *bft_matcher_dummy(const void *data, size_t data_len); +#ifdef BF_USE_HASHSET +#define bft_set_dummy bft_hashset_dummy +#else struct bf_set *bft_set_dummy(size_t n_elems); +#endif struct bf_hashset *bft_hashset_dummy(size_t n_elems); diff --git a/tests/harness/test.c b/tests/harness/test.c index 3e1488299..df7d0b76f 100644 --- a/tests/harness/test.c +++ b/tests/harness/test.c @@ -229,6 +229,7 @@ bool bft_counter_eq(const struct bf_counter *lhs, const struct bf_counter *rhs) return lhs->count == rhs->count && lhs->size == rhs->size; } +#ifndef BF_USE_HASHSET bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs) { const struct bf_list_node *n0, *n1; @@ -252,6 +253,7 @@ bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs) sizeof(enum bf_matcher_type) * lhs->n_comps) && lhs->use_trie == rhs->use_trie; } +#endif bool bft_hashset_eq(const struct bf_hashset *lhs, const struct bf_hashset *rhs) { diff --git a/tests/harness/test.h b/tests/harness/test.h index 772c8d751..a21599294 100644 --- a/tests/harness/test.h +++ b/tests/harness/test.h @@ -18,9 +18,10 @@ #include #include +#include + #include "fake.h" -struct bf_set; struct bf_hashset; struct bf_counter; @@ -80,7 +81,11 @@ struct bf_counter; */ bool bft_list_eq(const bf_list *lhs, const bf_list *rhs, bft_list_eq_cb cb); +#ifdef BF_USE_HASHSET +#define bft_set_eq bft_hashset_eq +#else bool bft_set_eq(const struct bf_set *lhs, const struct bf_set *rhs); +#endif bool bft_hashset_eq(const struct bf_hashset *lhs, const struct bf_hashset *rhs); bool bft_counter_eq(const struct bf_counter *lhs, const struct bf_counter *rhs); bool bft_chain_equal(const struct bf_chain *chain0, diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 85f445cd2..90000d413 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -92,7 +92,9 @@ bf_add_c_test(unit libbpfilter/pack.c) bf_add_c_test(unit libbpfilter/request.c) bf_add_c_test(unit libbpfilter/response.c) bf_add_c_test(unit libbpfilter/rule.c) -bf_add_c_test(unit libbpfilter/set.c) +if (NOT USE_HASHSET) + bf_add_c_test(unit libbpfilter/set.c) +endif () bf_add_c_test(unit libbpfilter/vector.c) bf_add_c_test(unit libbpfilter/verdict.c) bf_add_c_test(unit libbpfilter/version.c) diff --git a/tests/unit/libbpfilter/chain.c b/tests/unit/libbpfilter/chain.c index 5a05000e4..9fca13326 100644 --- a/tests/unit/libbpfilter/chain.c +++ b/tests/unit/libbpfilter/chain.c @@ -147,7 +147,11 @@ static void get_set_by_name(void **state) (void)state; +#ifdef BF_USE_HASHSET + assert_non_null(bf_chain_get_set_by_name(chain, "bft_hashset_dummy")); +#else assert_non_null(bf_chain_get_set_by_name(chain, "bft_set_dummy")); +#endif assert_null(bf_chain_get_set_by_name(chain, "bft_set_missing")); }