From 91e54ab19537f611285ebe5057178735c27269e9 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 09:47:10 +0200 Subject: [PATCH 01/10] ignore .vscode --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c795b05..5acb669 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -build \ No newline at end of file +build +.vscode From 9d0cd24e3434c85082c5d978138b55668710b11b Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 09:47:31 +0200 Subject: [PATCH 02/10] remove tbb --- tools/names.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/names.cpp b/tools/names.cpp index 8d27079..7f83c99 100644 --- a/tools/names.cpp +++ b/tools/names.cpp @@ -330,7 +330,7 @@ int main(int argc, char** argv) { fmt::print("Used Substrings : {}\n", weighted_substrings.size()); const auto count = std::size_t(1 + 0.01 * double(weighted_substrings.size())); fmt::print("{}", count); - std::partial_sort(std::execution::par_unseq, + std::partial_sort( std::begin(weighted_substrings), std::begin(weighted_substrings) + count + 1, std::end(weighted_substrings), [](const auto & a, const auto &b ) { @@ -341,7 +341,7 @@ int main(int argc, char** argv) { weighted_substrings | ranges::views::take(count) | ranges::views::transform([](const auto& p) { return p.first; }) | ranges::to>; - std::partial_sort(std::execution::par_unseq, + std::partial_sort( std::begin(filtered), std::begin(filtered) + 11, std::end(filtered), [](const auto & a, const auto &b ) { From a8a70491d22964f1a9a6c23419440f5f59ecbd87 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 10:27:20 +0200 Subject: [PATCH 03/10] add .cache to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5acb669..69b610c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ build .vscode +.cache From dfbea573cb2f27695a09754e803590204c013e4d Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 10:27:41 +0200 Subject: [PATCH 04/10] fix duplicate key BreakConstructorInitializers --- .clang-format | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-format b/.clang-format index 9154648..f02e62c 100644 --- a/.clang-format +++ b/.clang-format @@ -39,7 +39,6 @@ IndentWidth: 4 UseTab: Never ColumnLimit: 100 IndentPPDirectives: AfterHash -BreakConstructorInitializers: AfterColon ConstructorInitializerIndentWidth: 4 IncludeBlocks: Regroup BraceWrapping: From d88d9ab0f5ac08d6bc1f8326467e3a102dd94600 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 10:28:17 +0200 Subject: [PATCH 05/10] upgrade project to C++23 and remove dependency bloat --- CMakeLists.txt | 13 +++---------- tests/CMakeLists.txt | 2 +- vcpkg.json | 5 +---- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 654a257..284035f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,22 +11,18 @@ message("-- Found Python ${Python3_EXECUTABLE}") include(cmake/unicode_data.cmake) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) add_executable(namesgen tools/names.cpp) find_package(Threads REQUIRED) -find_package(TBB CONFIG REQUIRED) -find_package(range-v3 CONFIG REQUIRED) -find_package(fmt CONFIG REQUIRED) find_package(pugixml CONFIG REQUIRED) target_link_libraries(namesgen PRIVATE Threads::Threads pugixml - fmt::fmt - TBB::tbb - range-v3-concepts ) SET(HEADERS_DIR ${PROJECT_BINARY_DIR}/include/cedilla/) @@ -37,9 +33,6 @@ add_executable(namesreversegen target_link_libraries(namesreversegen PRIVATE pugixml - fmt::fmt - TBB::tbb - range-v3-concepts Threads::Threads ) target_compile_options(namesreversegen PRIVATE "-fopenmp-simd" "-march=native") diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 42c1a23..094e869 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -12,4 +12,4 @@ endmacro() create_test(tst_prop_script prop_script.cpp) create_test(tst_name tst_names.cpp) -target_link_libraries(tst_name fmt::fmt) +target_link_libraries(tst_name) diff --git a/vcpkg.json b/vcpkg.json index 15cad50..9b37b12 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,11 +1,8 @@ { - "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg/master/scripts/vcpkg.schema.json", + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "name": "pacman", "version": "0.1", "dependencies": [ - "tbb", - "range-v3", - "fmt", "catch2", "pugixml" ] From c6cf97b0078dcfeb4f73568557d4f1a0127b92d1 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 10:28:40 +0200 Subject: [PATCH 06/10] use std::format, std::ranges, remove --- tools/names.cpp | 125 ++++++++++++++++++----------------------- tools/namesreverse.cpp | 42 +++++--------- 2 files changed, 70 insertions(+), 97 deletions(-) diff --git a/tools/names.cpp b/tools/names.cpp index 7f83c99..3a4d127 100644 --- a/tools/names.cpp +++ b/tools/names.cpp @@ -1,34 +1,16 @@ #include "pugixml.hpp" +#include +#include #include #include #include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "range/v3/view/span.hpp" - #include +#include + +namespace ranges = std::ranges; bool generated(char32_t c) { const std::array ranges = { @@ -121,7 +103,7 @@ template auto substrings(R&& r) { std::mutex m; std::unordered_set> set; - std::for_each(std::execution::par_unseq, ranges::begin(r), ranges::end(r), + std::for_each(ranges::begin(r), ranges::end(r), [&](const character_name& c) { for(const auto& b : c.bits()) { for(auto i : ranges::views::iota(std::size_t(1), b.second.size() + 1)) { @@ -148,7 +130,7 @@ void inc(Map& map, K&& k) { template auto sorted_by_occurences(Map& map) { - auto v = map | ranges::to>>; + auto v = map | ranges::to>>(); ranges::sort(v, std::greater<>{}, &std::pair::second); return v; } @@ -172,26 +154,26 @@ void print_dict(std::FILE* f, const std::vector & blocks) { int idx = 1; std::unordered_map table; - fmt::print(f, "constexpr const char* __name_dict = \""); + std::print(f, "constexpr const char* __name_dict = \""); for(const auto& b: blocks) { for(const auto& str : b.data) { for(auto c : str) { - fmt::print(f, "{}", c); + std::print(f, "{}", c); } } table[idx++] = data{start, b.elem_size, b.data.size()}; start += b.elem_size * b.data.size(); } - fmt::print(f, "\";"); - fmt::print(f, + std::print(f, "\";"); + std::print(f, "constexpr std::string_view __get_name_segment(std::size_t b, std::size_t idx) {{"); - fmt::print(f, "switch(b) {{"); + std::print(f, "switch(b) {{"); for(const auto& [k, v] : table) { - fmt::print(f, "case {}:", k); - fmt::print(f, "return std::string_view{{__name_dict + {0} + idx * {1}, {1} }};", v.start, + std::print(f, "case {}:", k); + std::print(f, "return std::string_view{{__name_dict + {0} + idx * {1}, {1} }};", v.start, v.elem_size); } - fmt::print(f, "}} return {{}};}}"); + std::print(f, "}} return {{}};}}"); } void print_indexes(std::FILE* f, const std::unordered_map>& mapping) { @@ -202,12 +184,12 @@ void print_indexes(std::FILE* f, std::vector data; auto sorted_mapping = - mapping | ranges::to>>>; + mapping | ranges::to>>>(); ranges::sort(sorted_mapping, {}, &std::pair>::first); for(auto& [k, v] : sorted_mapping) { - auto arr = v | ranges::to>>; + auto arr = v | ranges::to>>(); ranges::sort(arr, {}, &std::pair::first); for(auto& d : arr) { data.push_back(d.second); @@ -216,40 +198,40 @@ void print_indexes(std::FILE* f, start = data.size(); } - fmt::print(f, "constexpr uint64_t __name_indexes[] = {{"); + std::print(f, "constexpr uint64_t __name_indexes[] = {{"); for(auto& elem : data) { - fmt::print(f, "{:#018x},", elem); + std::print(f, "{:#018x},", elem); } - fmt::print(f, "0xFFFF'FFFF'FFFF'FFFF}};"); + std::print(f, "0xFFFF'FFFF'FFFF'FFFF}};"); for(const auto& [index, data] : ranges::views::enumerate(sorted_jump_table)) { - fmt::print(f, "constexpr uint64_t __name_indexes_{}[] = {{", index); + std::print(f, "constexpr uint64_t __name_indexes_{}[] = {{", index); bool first = true; char32_t prev = 0; size_t next_start = data.first; for(auto& c : data.second) { if(first || c.first != prev + 1) { - fmt::print(f, "{:#018x},", (uint64_t(prev + 1) << 32) | uint32_t(0xFFFFFFFF)); - fmt::print(f, "{:#018x},", (uint64_t(c.first) << 32) | uint32_t(next_start)); + std::print(f, "{:#018x},", (uint64_t(prev + 1) << 32) | uint32_t(0xFFFFFFFF)); + std::print(f, "{:#018x},", (uint64_t(c.first) << 32) | uint32_t(next_start)); } first = false; prev = c.first; next_start++; } - fmt::print(f, "{:#018x}}};", (uint64_t(prev + 1) << 32) | uint32_t(0xFFFFFFFF)); + std::print(f, "{:#018x}}};", (uint64_t(prev + 1) << 32) | uint32_t(0xFFFFFFFF)); } - fmt::print(f, "constexpr std::pair " + std::print(f, "constexpr std::pair " "__get_table_index(std::size_t index) {{"); - fmt::print(f, "switch(index) {{"); + std::print(f, "switch(index) {{"); for(const auto& [index, data] : ranges::views::enumerate(sorted_jump_table)) { - fmt::print(f, "case {}:", index); - fmt::print(f, + std::print(f, "case {}:", index); + std::print(f, "return {{ __name_indexes_{0}, __name_indexes_{0} + " "sizeof(__name_indexes_{0})/sizeof(uint64_t) }};", index); } - fmt::print(f, "}} return {{nullptr, nullptr}}; }}"); + std::print(f, "}} return {{nullptr, nullptr}}; }}"); } int main(int argc, char** argv) { @@ -258,7 +240,7 @@ int main(int argc, char** argv) { auto names = data | ranges::views::transform([](const auto& p) { return character_name{p.first, p.second, {}, 0}; }) | - ranges::to; + ranges::to(); std::mutex m; std::unordered_map all_used; @@ -267,21 +249,21 @@ int main(int argc, char** argv) { auto end = names.end(); while(true) { - end = std::partition(std::execution::par_unseq, names.begin(), end, + end = std::partition(names.begin(), end, [](const auto & a) { return !a.complete(); }); auto incomplete = ranges::views::counted(names.begin(), std::size_t(ranges::distance(names.begin(), end))); - fmt::print("Count : {}\n", ranges::distance(incomplete)); + std::print("Count : {}\n", ranges::distance(incomplete)); if(ranges::empty(incomplete)) { break; } const auto subs = [&incomplete] { - auto tmp = substrings(incomplete) | ranges::to; - std::sort(std::execution::par_unseq, tmp.begin(), tmp.end(), + auto tmp = substrings(incomplete) | ranges::to(); + std::sort(tmp.begin(), tmp.end(), [](const auto & a, const auto & b) { return a.size() > b.size(); }); @@ -297,10 +279,10 @@ int main(int argc, char** argv) { // Compute a list of all possible substrings for each char // the value is the distance - std::for_each(std::execution::par_unseq, incomplete.begin(), incomplete.end(), + std::for_each(incomplete.begin(), incomplete.end(), [&used_substrings](const character_name& c) { const auto bits = c.bits(); - std::for_each(std::execution::par_unseq, + std::for_each( ranges::begin(bits), ranges::end(bits), [&used_substrings, &c](const auto & b) { for(auto i : ranges::views::iota(std::size_t(0), b.second.size() + 1)) { @@ -316,20 +298,20 @@ int main(int argc, char** argv) { }); }); - fmt::print("Substrings : {}\n", subs.size()); + std::print("Substrings : {}\n", subs.size()); std::vector> weighted_substrings = - used_substrings | ranges::views::remove_if([](const auto& p) { - return p.second == 0; + used_substrings | ranges::views::filter([](const auto& p) { + return p.second != 0; }) | ranges::views::transform([](const auto& p) { const double d = p.first.size() < 5 ? 1.0 : double(p.second) * p.first.size(); return std::pair{p.first, d}; - }) | ranges::to; - fmt::print("Used Substrings : {}\n", weighted_substrings.size()); + }) | ranges::to(); + std::print("Used Substrings : {}\n", weighted_substrings.size()); const auto count = std::size_t(1 + 0.01 * double(weighted_substrings.size())); - fmt::print("{}", count); + std::print("{}", count); std::partial_sort( std::begin(weighted_substrings), std::begin(weighted_substrings) + count + 1, @@ -339,7 +321,7 @@ int main(int argc, char** argv) { ); auto filtered = weighted_substrings | ranges::views::take(count) | - ranges::views::transform([](const auto& p) { return p.first; }) | ranges::to>; + ranges::views::transform([](const auto& p) { return p.first; }) | ranges::to>(); std::partial_sort( std::begin(filtered), @@ -351,7 +333,7 @@ int main(int argc, char** argv) { std::mutex mutex; for(const auto& s : filtered | ranges::views::take(10)) { - std::for_each(std::execution::par_unseq, + std::for_each( ranges::begin(incomplete), ranges::end(incomplete), [&] (auto & c){ @@ -366,7 +348,7 @@ int main(int argc, char** argv) { } }); } - fmt::print("------\n"); + std::print("------\n"); } auto strings = sorted_by_occurences(all_used); @@ -395,11 +377,11 @@ int main(int argc, char** argv) { std::size_t dict_size = 0; for(const auto& b : blocks_by_size) { - fmt::print("--- BLOCK : string size : {}, elements {} -- total {} ({})\n", + std::print("--- BLOCK : string size : {}, elements {} -- total {} ({})\n", b.elem_size, b.size(), b.elem_size * b.data.size(), dict_size += (b.elem_size * b.data.size()) ); } - fmt::print("Total blocks: {}\n", blocks_by_size.size()); + std::print("Total blocks: {}\n", blocks_by_size.size()); std::size_t index_bytes = 0; std::unordered_map lengths; @@ -409,13 +391,18 @@ int main(int argc, char** argv) { } auto sorted_lengths = sorted_by_occurences(lengths); - // fmt::print("DICT : \n{}\n ----", strings); - fmt::print("LENGTHS : \n{}\n ----", sorted_lengths); - fmt::print("KBytes: {} ( dict {} + index : {} )\n", (index_bytes + dict_size) / 1024.0, + // std::print("DICT : \n{}\n ----", strings); + std::print("LENGTHS : \n"); + for (const auto& [key, occurrences] : sorted_lengths) { + std::print("({}, {})", key, occurrences); + } + std::print("\n ----"); + + std::print("KBytes: {} ( dict {} + index : {} )\n", (index_bytes + dict_size) / 1024.0, dict_size / 1024.0, index_bytes / 1024.0); auto f = fopen(argv[2], "w"); - fmt::print(f, "#pragma once\n#include \n#include \n\n"); + std::print(f, "#pragma once\n#include \n#include \n\n"); print_dict(f, blocks_by_size); diff --git a/tools/namesreverse.cpp b/tools/namesreverse.cpp index 01bbb39..97ae5a1 100644 --- a/tools/namesreverse.cpp +++ b/tools/namesreverse.cpp @@ -1,32 +1,18 @@ #include "pugixml.hpp" +#include +#include #include -#include -#include #include -#include #include -#include #include #include #include #include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "range/v3/view/span.hpp" +#include +#include -#include +namespace ranges = std::ranges; bool generated(char32_t c) { const std::array ranges = { @@ -141,7 +127,7 @@ class trie { bytes.reserve(250'000); auto add_children = [&sibling_nodes, &nodes](auto && container) { - for(auto && [idx, c] : ranges::view::enumerate(container)) { + for(auto && [idx, c] : ranges::views::enumerate(container)) { nodes.push_back(c.get()); if(idx != container.size() - 1) sibling_nodes[c.get()] = true; @@ -294,16 +280,16 @@ int main(int argc, char** argv) { } t.compact(); auto [dict, bytes] = t.dump(); - fmt::print("//dict : {} / tree : {} \n", dict.size()/1024, bytes.size()/1024); + std::print("//dict : {} / tree : {} \n", dict.size()/1024, bytes.size()/1024); - fmt::print("#pragma once\n"); - fmt::print("#include \n"); - fmt::print("namespace uni::details {{\n"); - fmt::print("constexpr const char* dict = \"{}\";\n", dict); - fmt::print("constexpr const uint8_t index[] = {{\n"); + std::print("#pragma once\n"); + std::print("#include \n"); + std::print("namespace uni::details {{\n"); + std::print("constexpr const char* dict = \"{}\";\n", dict); + std::print("constexpr const uint8_t index[] = {{\n"); for(auto b : bytes) { - fmt::print("0x{:02x},", b); + std::print("0x{:02x},", b); } - fmt::print("0}};\n}}\n"); + std::print("0}};\n}}\n"); } From abdf0fb848ebfafe75f336a6196d8652fb1f96b6 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 11:08:08 +0200 Subject: [PATCH 07/10] fix segfault in names.cpp --- tools/names.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/names.cpp b/tools/names.cpp index 3a4d127..1c39320 100644 --- a/tools/names.cpp +++ b/tools/names.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace ranges = std::ranges; @@ -323,9 +324,10 @@ int main(int argc, char** argv) { weighted_substrings | ranges::views::take(count) | ranges::views::transform([](const auto& p) { return p.first; }) | ranges::to>(); + auto filtered_middle = std::min(std::ptrdiff_t(11), std::ptrdiff_t(filtered.size())); std::partial_sort( std::begin(filtered), - std::begin(filtered) + 11, + std::begin(filtered) + filtered_middle, std::end(filtered), [](const auto & a, const auto &b ) { return a.size() > b.size(); } From 22fd7024b4383fe9956155867c1b5b5ec73e2253 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 11:14:48 +0200 Subject: [PATCH 08/10] fix intN_t with no namespace --- src/cedilla/base.h | 17 +++++++++-------- src/cedilla/unicode.h | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/cedilla/base.h b/src/cedilla/base.h index 72cdc9b..384929f 100644 --- a/src/cedilla/base.h +++ b/src/cedilla/base.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace uni::detail { @@ -72,7 +73,7 @@ struct compact_range { std::uint32_t _data[N]; constexpr T value(char32_t cp, T default_value) const { const auto end = std::end(_data); - auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, uint32_t v) { + auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, std::uint32_t v) { char32_t c = (v >> 8); return local_cp < c; }); @@ -91,7 +92,7 @@ struct compact_list { std::uint32_t _data[N]; constexpr T value(char32_t cp, T default_value) const { const auto end = std::end(_data); - auto it = detail::lower_bound(std::begin(_data), end, cp, [](uint32_t v, char32_t local_cp) { + auto it = detail::lower_bound(std::begin(_data), end, cp, [](std::uint32_t v, char32_t local_cp) { char32_t c = (v >> 8); return c < local_cp; }); @@ -119,9 +120,9 @@ using array_t = typename array::type; -template +template struct bool_trie { // not tries, just bitmaps for all code points 0..0x7FF (UTF-8 1- and 2-byte sequences) @@ -206,7 +207,7 @@ struct range_array { std::uint32_t _data[N]; constexpr bool lookup(char32_t cp) const { const auto end = std::end(_data); - auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, uint32_t v) { + auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, std::uint32_t v) { char32_t c = (v >> 8); return local_cp < c; }); @@ -269,7 +270,7 @@ struct pair template pair(A, B) -> pair; -struct string_with_idx { const char* name; uint32_t value; }; +struct string_with_idx { const char* name; std::uint32_t value; }; } // namespace uni::detail @@ -292,7 +293,7 @@ constexpr bool numeric_value::is_valid() const { return _d != 0; } -constexpr numeric_value::numeric_value(long long n, int16_t d) : _n(n), _d(d) {} +constexpr numeric_value::numeric_value(long long n, std::int16_t d) : _n(n), _d(d) {} } // namespace uni diff --git a/src/cedilla/unicode.h b/src/cedilla/unicode.h index a796156..88a1a23 100644 --- a/src/cedilla/unicode.h +++ b/src/cedilla/unicode.h @@ -127,7 +127,7 @@ constexpr version cp_age(char32_t cp) { constexpr block cp_block(char32_t cp) { const auto end = std::end(detail::tables::block_data._data); - auto it = detail::upper_bound(std::begin(detail::tables::block_data._data), end, cp, [](char32_t cp_, uint32_t v) { + auto it = detail::upper_bound(std::begin(detail::tables::block_data._data), end, cp, [](char32_t cp_, std::uint32_t v) { char32_t c = (v >> 8); return cp_ < c; }); @@ -263,7 +263,7 @@ constexpr numeric_value cp_numeric_value(char32_t cp) { }())) { return {}; } - int16_t d = 1; + std::int16_t d = 1; detail::get_numeric_value(cp, detail::tables::numeric_data_d, d); return numeric_value(res, d); } From d0e48ee39041108271c9e25374c43f3491d1f681 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 11:40:44 +0200 Subject: [PATCH 09/10] fix use of fmt in tests --- tests/tst_names.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/tst_names.cpp b/tests/tst_names.cpp index bb2c51a..4733898 100644 --- a/tests/tst_names.cpp +++ b/tests/tst_names.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include "common.h" #include @@ -36,9 +35,12 @@ TEST_CASE("Verify that all code point have the same name as in the DB") { continue; const auto & name = it->second.name; const auto res = uni::cp_name(c).to_string(); - fmt::print("{:0x} : expected {} found {}\n", uint32_t(c) - , name, res ); + // We do not use std::print here because these tests are compiled in C++17 + // to verify compatibility, unlike the rest of the project. + std::cout << std::hex << std::uint32_t(c) + << " : expected " << name + << " found " << res << '\n'; CHECK(res == name); } } -} \ No newline at end of file +} From e7331b589c5ef6e8b7c3cd502eaf4695d3646785 Mon Sep 17 00:00:00 2001 From: Eisenwave Date: Sat, 7 Jun 2025 11:41:26 +0200 Subject: [PATCH 10/10] fix missing include, -Wsign-conversion, and integers with no namespaces --- src/cedilla/synopsys.h | 5 ++-- src/name_to_cp.hpp | 60 ++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/cedilla/synopsys.h b/src/cedilla/synopsys.h index ed9c75e..98ea9c8 100644 --- a/src/cedilla/synopsys.h +++ b/src/cedilla/synopsys.h @@ -1,5 +1,6 @@ #pragma once #include +#include #ifndef CTRE_UNICODE_SYNOPSYS_WAS_INCLUDED namespace uni @@ -48,10 +49,10 @@ namespace uni protected: constexpr numeric_value() = default; - constexpr numeric_value(long long n, int16_t d); + constexpr numeric_value(long long n, std::int16_t d); long long _n = 0; - int16_t _d = 0; + std::int16_t _d = 0; friend constexpr numeric_value cp_numeric_value(char32_t cp); }; diff --git a/src/name_to_cp.hpp b/src/name_to_cp.hpp index e076aeb..0f08b1b 100644 --- a/src/name_to_cp.hpp +++ b/src/name_to_cp.hpp @@ -1,13 +1,15 @@ #include #include #include +#include + namespace uni { namespace details { struct node { char32_t value = 0xFFFFFF; - uint32_t children_offset = 0; + std::uint32_t children_offset = 0; bool has_sibling = false; - uint32_t size = 0; + std::uint32_t size = 0; std::string_view name; @@ -18,20 +20,20 @@ namespace uni { return children_offset != 0; } }; - constexpr node read_node(uint32_t offset) { + constexpr node read_node(std::uint32_t offset) { using namespace uni::details; const uint32_t origin = offset; node n; - uint8_t name = index[offset++]; + std::uint8_t name = index[offset++]; if(offset + 6 >= sizeof(index)) return n; const bool long_name = name & 0x40; const bool has_value = name & 0x80; - name &= ~0xC0; + name = std::uint8_t(name & ~0xC0); if(long_name) { - uint32_t name_offset = (index[offset++] << 8u); + std::uint32_t name_offset = (index[offset++] << 8u); name_offset |= index[offset++]; n.name = std::string_view(dict + name_offset, name); } @@ -39,28 +41,28 @@ namespace uni { n.name = std::string_view(dict + name, 1); } if(has_value) { - uint8_t h = index[offset++]; - uint8_t m = index[offset++]; - uint8_t l = index[offset++]; + std::uint8_t h = index[offset++]; + std::uint8_t m = index[offset++]; + std::uint8_t l = index[offset++]; n.value = uint32_t((h << 16u) | (m << 8u) | l) >> 3u; bool has_children = l & 0x02; n.has_sibling = l & 0x01; if(has_children) { - n.children_offset = uint32_t(index[offset++] << 16u); - n.children_offset |= uint32_t(index[offset++] << 8u); + n.children_offset = std::uint32_t(index[offset++] << 16u); + n.children_offset |= std::uint32_t(index[offset++] << 8u); n.children_offset |= index[offset++]; } } else { - uint8_t h = index[offset++]; + std::uint8_t h = index[offset++]; n.has_sibling = h & 0x80; bool has_children = h & 0x40; - h &= ~0xC0; + h = std::uint8_t(name & ~0xC0); if(has_children) { n.children_offset = (h << 16u); - n.children_offset |= (uint32_t(index[offset++]) << 8u); + n.children_offset |= (std::uint32_t(index[offset++]) << 8u); n.children_offset |= index[offset++]; } } @@ -69,7 +71,7 @@ namespace uni { } - constexpr int compare(std::string_view str, std::string_view needle, uint32_t start) { + constexpr int compare(std::string_view str, std::string_view needle, std::uint32_t start) { std::size_t str_i = start; std::size_t needle_i = 0; if(needle.size() == 0) @@ -101,14 +103,14 @@ namespace uni { return -1; } - constexpr std::tuple - compare_node(uint32_t offset, std::string_view name, uint32_t start = 0) { + constexpr std::tuple + compare_node(std::uint32_t offset, std::string_view name, std::uint32_t start = 0) { auto n = details::read_node(offset); auto cmp = details::compare(name, n.name, start); if(cmp == -1) { return {n, false, 0}; } - start = uint32_t(cmp); + start = std::uint32_t(cmp); if(name.size() == start) return {n, true, n.value}; if(n.has_children()) { @@ -159,8 +161,8 @@ namespace uni { struct generated_name_data { std::string_view prefix; - uint32_t start; - uint32_t end; + std::uint32_t start; + std::uint32_t end; }; constexpr const generated_name_data generated_name_data_table[] = { @@ -183,7 +185,7 @@ namespace uni { return str.size() >= needle.size() && str.compare(0, needle.size(), needle) == 0; } - constexpr uint32_t find_syllable(std::string_view str, int & pos, int count, int column) { + constexpr std::uint32_t find_syllable(std::string_view str, int & pos, int count, int column) { int len = -1; for (int i = 0; i < count; i++) { std::string_view s(hangul_syllables[i][column]); @@ -196,18 +198,18 @@ namespace uni { } if (len == -1) len = 0; - return uint32_t(len); + return std::uint32_t(len); } constexpr const char32_t SBase = 0xAC00; constexpr const char32_t LBase = 0x1100; constexpr const char32_t VBase = 0x1161; constexpr const char32_t TBase = 0x11A7; - constexpr const uint32_t LCount = 19; - constexpr const uint32_t VCount = 21; - constexpr const uint32_t TCount = 28; - constexpr const uint32_t NCount = (VCount * TCount); - constexpr const uint32_t SCount = (LCount * NCount); + constexpr const std::uint32_t LCount = 19; + constexpr const std::uint32_t VCount = 21; + constexpr const std::uint32_t TCount = 28; + constexpr const std::uint32_t NCount = (VCount * TCount); + constexpr const std::uint32_t SCount = (LCount * NCount); } @@ -230,7 +232,7 @@ namespace uni { if (starts_with(name, item.prefix)) { auto gn = name; gn.remove_prefix(item.prefix.size()); - uint32_t v = 0; + std::uint32_t v = 0; const auto end = gn.data() + gn.size(); auto [p, ec] = std::from_chars(gn.data(), end , v, 16); if(ec != std::errc() || p != end || v < item.start || v > item.end) @@ -239,7 +241,7 @@ namespace uni { } } - uint32_t offset = 0; + std::uint32_t offset = 0; for(;;) { auto [n, res, value] = details::compare_node(offset, name); if(!n.is_valid())